source: trunk/firmware_v4/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h

Last change on this file was 42, checked in by f.jahn, 5 days ago
File size: 48.8 KB
Line 
1/*
2 * Copyright (C) 2010-2022 Arm Limited or its affiliates.
3 *
4 * SPDX-License-Identifier: Apache-2.0
5 *
6 * Licensed under the Apache License, Version 2.0 (the License); you may
7 * not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an AS IS BASIS, WITHOUT
14 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19/* ----------------------------------------------------------------------
20 * Project: CMSIS NN Library
21 * Title: arm_nnsupportfunctions.h
22 * Description: Public header file of support functions for CMSIS NN Library
23 *
24 * $Date: 19. April 2022
25 * $Revision: V.7.0.1
26 *
27 * Target Processor: Cortex-M CPUs
28 * -------------------------------------------------------------------- */
29
30#ifndef _ARM_NNSUPPORTFUNCTIONS_H_
31#define _ARM_NNSUPPORTFUNCTIONS_H_
32
33#include "arm_nn_math_types.h"
34#include "arm_nn_types.h"
35
36#include <stdbool.h>
37
38#ifdef __cplusplus
39extern "C" {
40#endif
41
42#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0)
43#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift)
44#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0
45#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0
46#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b))
47
48#define MAX(A, B) ((A) > (B) ? (A) : (B))
49#define MIN(A, B) ((A) < (B) ? (A) : (B))
50#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l))
51#define REDUCE_MULTIPLIER(_mult) ((_mult < 0x7FFF0000) ? ((_mult + (1 << 15)) >> 16) : 0x7FFF)
52
53/**
54 * @brief definition to pack four 8 bit values.
55 */
56#define PACK_Q7x4_32x1(v0, v1, v2, v3) \
57 ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \
58 (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | (((int32_t)(v3) << 24) & (int32_t)0xFF000000))
59
60/**
61 * @brief Union for SIMD access of q31/q15/q7 types
62 */
63union arm_nnword
64{
65 q31_t word;
66 /**< q31 type */
67 q15_t half_words[2];
68 /**< q15 type */
69 q7_t bytes[4];
70 /**< q7 type */
71};
72
73/**
74 * @brief Union for data type long long
75 */
76struct arm_nn_double
77{
78 uint32_t low;
79 int32_t high;
80};
81
82union arm_nn_long_long
83{
84 int64_t long_long;
85 struct arm_nn_double word;
86};
87
88/**
89 * @defgroup nndata_convert Neural Network Data Conversion Functions
90 *
91 * Perform data type conversion in-between neural network operations
92 *
93 */
94
95/**
96 * @brief Converts the elements of the q7 vector to q15 vector without left-shift
97 * @param[in] *pSrc points to the q7 input vector
98 * @param[out] *pDst points to the q15 output vector
99 * @param[in] blockSize length of the input vector
100 *
101 */
102void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
103
104/**
105 * @brief Non-saturating addition of elements of a q7 vector
106 * @param[in] *input Pointer to the q7 input vector
107 * @param[out] *output Pointer to the q31 output variable.
108 * @param[in] block_size length of the input vector
109 * \par Description:
110 *
111 * 2^24 samples can be added without saturating the result.
112 *
113 * The equation used for the conversion process is:
114 *
115 * <pre>
116 * sum = input[0] + input[1] + .. + input[block_size -1]
117 * </pre>
118 *
119 * */
120void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size);
121
122/**
123 * @brief Converts the elements of the q7 vector to reordered q15 vector without left-shift
124 * @param[in] *pSrc points to the q7 input vector
125 * @param[out] *pDst points to the q15 output vector
126 * @param[in] blockSize length of the input vector
127 * @return none.
128 *
129 */
130void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize);
131
132/**
133 * @brief Converts the elements from a q7 vector to a q15 vector with an added offset
134 * @param[in] src pointer to the q7 input vector
135 * @param[out] dst pointer to the q15 output vector
136 * @param[in] block_size length of the input vector
137 * @param[in] offset q7 offset to be added to each input vector element.
138 *
139 * \par Description:
140 *
141 * The equation used for the conversion process is:
142 *
143 * <pre>
144 * dst[n] = (q15_t) src[n] + offset; 0 <= n < block_size.
145 * </pre>
146 *
147 */
148void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
149
150/**
151 * @brief Converts the elements of the q7 vector to reordered q15 vector with an added offset
152 * @param[in] src pointer to the q7 input vector
153 * @param[out] dst pointer to the q15 output vector
154 * @param[in] block_size length of the input vector
155 * @param[in] offset offset to be added to each input vector element.
156 * @return none.
157 *
158 * @details This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of
159 * the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its
160 * original order.
161 *
162 */
163void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset);
164
165/**
166 * @brief Converts the elements from a q7 vector and accumulate to a q15 vector
167 * @param[in] *src points to the q7 input vector
168 * @param[out] *dst points to the q15 output vector
169 * @param[in] block_size length of the input vector
170 *
171 * \par Description:
172 *
173 * The equation used for the conversion process is:
174 *
175 * <pre>
176 * dst[n] += (q15_t) src[n] ; 0 <= n < block_size.
177 * </pre>
178 *
179 */
180void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_size);
181
182/**
183 * @brief Depthwise conv on an im2col buffer where the input channel equals output channel.
184 * @param[in] row pointer to row
185 * @param[in] col pointer to im2col buffer, always consists of 2 columns.
186 * @param[in] num_ch number of channels
187 * @param[in] out_shift pointer to per output channel requantization shift parameter.
188 * @param[in] out_mult pointer to per output channel requantization multiplier parameter.
189 * @param[in] out_offset output tensor offset.
190 * @param[in] activation_min minimum value to clamp the output to. Range : int8
191 * @param[in] activation_max maximum value to clamp the output to. Range : int8
192 * @param[in] kernel_size number of elements in one column.
193 * @param[in] output_bias per output channel bias. Range : int32
194 * @param[out] out pointer to output
195 * @return The function returns one of the two
196 * 1. The incremented output pointer for a successful operation or
197 * 2. NULL if implementation is not available.
198 *
199 * @details Supported framework: TensorFlow Lite micro.
200 */
201q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row,
202 const q15_t *col,
203 const uint16_t num_ch,
204 const int32_t *out_shift,
205 const int32_t *out_mult,
206 const int32_t out_offset,
207 const int32_t activation_min,
208 const int32_t activation_max,
209 const uint16_t kernel_size,
210 const int32_t *const output_bias,
211 q7_t *out);
212
213/**
214 * @brief General Matrix-multiplication function with per-channel requantization.
215 * @param[in] input_row pointer to row operand
216 * @param[in] input_col pointer to col operand
217 * @param[in] output_ch number of rows of input_row
218 * @param[in] col_batches number of column batches. Range: 1 to 4
219 * @param[in] output_shift pointer to per output channel requantization shift parameter.
220 * @param[in] output_mult pointer to per output channel requantization multiplier parameter.
221 * @param[in] out_offset output tensor offset.
222 * @param[in] col_offset input tensor(col) offset.
223 * @param[in] row_offset kernel offset(row). Not used.
224 * @param[in] out_activation_min minimum value to clamp the output to. Range : int8
225 * @param[in] out_activation_max maximum value to clamp the output to. Range : int8
226 * @param[in] row_len number of elements in each row
227 * @param[in] bias per output channel bias. Range : int32
228 * @param[in,out] out pointer to output
229 * @return The function returns one of the two
230 * 1. The incremented output pointer for a successful operation or
231 * 2. NULL if implementation is not available.
232 *
233 * @details Supported framework: TensorFlow Lite
234 */
235q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
236 const q7_t *input_col,
237 const uint16_t output_ch,
238 const uint16_t col_batches,
239 const int32_t *output_shift,
240 const int32_t *output_mult,
241 const int32_t out_offset,
242 const int32_t col_offset,
243 const int32_t row_offset,
244 const int16_t out_activation_min,
245 const int16_t out_activation_max,
246 const uint16_t row_len,
247 const int32_t *const bias,
248 q7_t *out);
249/**
250 * @brief Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution.
251 * @param[in] input_a pointer to operand A
252 * @param[in] input_b pointer to operand B, always consists of 2 vectors.
253 * @param[in] output_ch number of rows of A
254 * @param[in] out_shift pointer to per output channel requantization shift parameter.
255 * @param[in] out_mult pointer to per output channel requantization multiplier parameter.
256 * @param[in] activation_min minimum value to clamp the output to. Range : int16
257 * @param[in] activation_max maximum value to clamp the output to. Range : int16
258 * @param[in] num_col_a number of columns of A
259 * @param[in] output_bias per output channel bias. Range : int64
260 * @param[in,out] out_0 pointer to output
261 * @return The function returns one of the two
262 * 1. The incremented output pointer for a successful operation or
263 * 2. NULL if implementation is not available.
264 *
265 * @details This function does the matrix multiplication of weight matrix for all output channels
266 * with 2 columns from im2col and produces two elements/output_channel. The outputs are
267 * clamped in the range provided by activation min and max.
268 * Supported framework: TensorFlow Lite micro.
269 */
270q15_t *arm_nn_mat_mult_kernel_s16(const q7_t *input_a,
271 const q15_t *input_b,
272 const int32_t output_ch,
273 const int32_t *out_shift,
274 const int32_t *out_mult,
275 const int16_t activation_min,
276 const int16_t activation_max,
277 const int32_t num_col_a,
278 const int64_t *const output_bias,
279 q15_t *out_0);
280/**
281 * @brief General Matrix-multiplication without requantization for one row & one column
282 * @param[in] row_elements number of row elements
283 * @param[in] row_base pointer to row operand
284 * @param[in] col_base pointer to col operand
285 * @param[out] sum_col pointer to store sum of column elements
286 * @param[out] output pointer to store result of multiply-accumulate
287 * @return The function returns the multiply-accumulated result of the row by column.
288 *
289 * @details Pseudo-code
290 * *output = 0
291 * sum_col = 0
292 * for (i = 0; i < row_elements; i++)
293 * *output += row_base[i] * col_base[i]
294 * sum_col += col_base[i]
295 *
296 */
297arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements,
298 const int8_t *row_base,
299 const int8_t *col_base,
300 int32_t *const sum_col,
301 int32_t *const output);
302
303/**
304 * @brief Matrix-multiplication with requantization & activation function for four rows and one column
305 * @param[in] row_elements number of row elements
306 * @param[in] offset offset between rows. Can be the same as row_elements.
307 * For e.g, in a 1x1 conv scenario with stride as 1.
308 * @param[in] row_base pointer to row operand
309 * @param[in] col_base pointer to col operand
310 * @param[in] out_ch Number of output channels
311 * @param[in] conv_params Pointer to convolution parameters like offsets and activation values
312 * @param[in] quant_params Pointer to per-channel quantization parameters
313 * @param[in] bias Pointer to per-channel bias
314 * @param[out] output Pointer to output where int8 results are stored.
315 *
316 * @return The function returns the updated output pointer or NULL if implementation is not available.
317 *
318 * @details Compliant to TFLM int8 specification. MVE implementation only
319 */
320int8_t *arm_nn_mat_mul_core_4x_s8(const int32_t row_elements,
321 const int32_t offset,
322 const int8_t *row_base,
323 const int8_t *col_base,
324 const int32_t out_ch,
325 const cmsis_nn_conv_params *conv_params,
326 const cmsis_nn_per_channel_quant_params *quant_params,
327 const int32_t *bias,
328 int8_t *output);
329
330/**
331 * @brief General Matrix-multiplication function with per-channel requantization.
332 * This function assumes:
333 * - LHS input matrix NOT transposed (nt)
334 * - RHS input matrix transposed (t)
335 *
336 * @note This operation also performs the broadcast bias addition before the requantization
337 *
338 * @param[in] lhs Pointer to the LHS input matrix
339 * @param[in] rhs Pointer to the RHS input matrix
340 * @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of
341 * output columns (or RHS input rows)
342 * @param[out] dst Pointer to the output matrix with "m" rows and "n" columns
343 * @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization.
344 * The length of this vector is equal to the number of output columns (or RHS input
345 * rows)
346 * @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length
347 * of this vector is equal to the number of output columns (or RHS input rows)
348 * @param[in] lhs_rows Number of LHS input rows
349 * @param[in] rhs_rows Number of RHS input rows
350 * @param[in] rhs_cols Number of LHS/RHS input columns
351 * @param[in] lhs_offset Offset to be applied to the LHS input value
352 * @param[in] dst_offset Offset to be applied the output result
353 * @param[in] activation_min Minimum value to clamp down the output. Range : int8
354 * @param[in] activation_max Maximum value to clamp up the output. Range : int8
355 *
356 * @return The function returns <code>ARM_MATH_SUCCESS</code>
357 *
358 */
359arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs,
360 const q7_t *rhs,
361 const q31_t *bias,
362 q7_t *dst,
363 const int32_t *dst_multipliers,
364 const int32_t *dst_shifts,
365 const int32_t lhs_rows,
366 const int32_t rhs_rows,
367 const int32_t rhs_cols,
368 const int32_t lhs_offset,
369 const int32_t dst_offset,
370 const int32_t activation_min,
371 const int32_t activation_max);
372
373/**
374 * @brief s8 Vector by Matrix (transposed) multiplication
375 *
376 * @param[in] lhs Input left-hand side vector
377 * @param[in] rhs Input right-hand side matrix (transposed)
378 * @param[in] bias Input bias
379 * @param[out] dst Output vector
380 * @param[in] lhs_offset Offset to be added to the input values of the left-hand side vector.
381 * Range: -127 to 128
382 * @param[in] rhs_offset Not used
383 * @param[in] dst_offset Offset to be added to the output values. Range: -127 to 128
384 * @param[in] dst_multiplier Output multiplier
385 * @param[in] dst_shift Output shift
386 * @param[in] rhs_cols Number of columns in the right-hand side input matrix
387 * @param[in] rhs_rows Number of rows in the right-hand side input matrix
388 * @param[in] activation_min Minimum value to clamp the output to. Range: int8
389 * @param[in] activation_max Maximum value to clamp the output to. Range: int8
390 * @param[in] address_offset Memory position offset for dst. First output is stored at 'dst', the
391 * second at 'dst + address_offset' and so on. Default value is typically 1.
392 *
393 * @return The function returns <code>ARM_MATH_SUCCESS</code>
394 *
395 */
396arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs,
397 const q7_t *rhs,
398 const q31_t *bias,
399 q7_t *dst,
400 const int32_t lhs_offset,
401 const int32_t rhs_offset,
402 const int32_t dst_offset,
403 const int32_t dst_multiplier,
404 const int32_t dst_shift,
405 const int32_t rhs_cols,
406 const int32_t rhs_rows,
407 const int32_t activation_min,
408 const int32_t activation_max,
409 const int32_t address_offset);
410
411/**
412 * @brief s16 Vector by Matrix (transposed) multiplication
413 *
414 * @param[in] lhs Input left-hand side vector
415 * @param[in] rhs Input right-hand side matrix (transposed)
416 * @param[in] bias Input bias
417 * @param[out] dst Output vector
418 * @param[in] dst_multiplier Output multiplier
419 * @param[in] dst_shift Output shift
420 * @param[in] rhs_cols Number of columns in the right-hand side input matrix
421 * @param[in] rhs_rows Number of rows in the right-hand side input matrix
422 * @param[in] activation_min Minimum value to clamp the output to. Range: int16
423 * @param[in] activation_max Maximum value to clamp the output to. Range: int16
424 *
425 * @return The function returns <code>ARM_MATH_SUCCESS</code>
426 *
427 */
428arm_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs,
429 const q7_t *rhs,
430 const q63_t *bias,
431 q15_t *dst,
432 const int32_t dst_multiplier,
433 const int32_t dst_shift,
434 const int32_t rhs_cols,
435 const int32_t rhs_rows,
436 const int32_t activation_min,
437 const int32_t activation_max);
438
439/**
440 * @brief s8 Vector by Matrix (transposed) multiplication with s16 output
441 *
442 * @param[in] lhs Input left-hand side vector
443 * @param[in] rhs Input right-hand side matrix (transposed)
444 * @param[out] dst Output vector
445 * @param[in] lhs_offset Offset to be added to the input values of the left-hand side
446 * vector. Range: -127 to 128
447 * @param[in] rhs_offset Not used
448 * @param[in] scatter_offset Address offset for dst. First output is stored at 'dst', the
449 * second at 'dst + scatter_offset' and so on.
450 * @param[in] dst_multiplier Output multiplier
451 * @param[in] dst_shift Output shift
452 * @param[in] rhs_cols Number of columns in the right-hand side input matrix
453 * @param[in] rhs_rows Number of rows in the right-hand side input matrix
454 * @param[in] activation_min Minimum value to clamp the output to. Range: int16
455 * @param[in] activation_max Maximum value to clamp the output to. Range: int16
456 *
457 * @return The function returns <code>ARM_MATH_SUCCESS</code>
458 *
459 */
460arm_status arm_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs,
461 const q7_t *rhs,
462 q15_t *dst,
463 const int32_t lhs_offset,
464 const int32_t rhs_offset,
465 const int32_t scatter_offset,
466 const int32_t dst_multiplier,
467 const int32_t dst_shift,
468 const int32_t rhs_cols,
469 const int32_t rhs_rows,
470 const int32_t activation_min,
471 const int32_t activation_max);
472
473/**
474 * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where
475 * the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs.
476 *
477 * @param[in] lhs Input left-hand side matrix
478 * @param[in] rhs Input right-hand side matrix (transposed)
479 * @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128
480 * @param[in] num_ch Number of channels in LHS/RHS
481 * @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels
482 * @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels
483 * @param[in] out_offset Offset to be added to the output values. Range: -127 to 128
484 * @param[in] activation_min Minimum value to clamp the output to. Range: int8
485 * @param[in] activation_max Maximum value to clamp the output to. Range: int8
486 * @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix
487 * @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels
488 * @param[in] out Output pointer
489 *
490 * @return The function returns one of the two
491 * - Updated output pointer if an implementation is available
492 * - NULL if no implementation is available.
493 *
494 * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
495 * out for the following.
496 * - Output shift
497 * - Output multiplier
498 * - Output bias
499 * - rhs
500 */
501q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs,
502 const q7_t *rhs,
503 const int32_t lhs_offset,
504 const uint16_t num_ch,
505 const int32_t *out_shift,
506 const int32_t *out_mult,
507 const int32_t out_offset,
508 const int32_t activation_min,
509 const int32_t activation_max,
510 const uint16_t row_x_col,
511 const int32_t *const output_bias,
512 q7_t *out);
513
514/**
515 * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases.
516 * Dimensions are the same for lhs and rhs.
517 *
518 * @param[in] lhs Input left-hand side matrix
519 * @param[in] rhs Input right-hand side matrix (transposed)
520 * @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128
521 * @param[in] num_ch Number of channels in LHS/RHS
522 * @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels.
523 * @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels.
524 * @param[in] out_offset Offset to be added to the output values. Range: -127 to 128
525 * @param[in] activation_min Minimum value to clamp the output to. Range: int8
526 * @param[in] activation_max Maximum value to clamp the output to. Range: int8
527 * @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix
528 * @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels.
529 * @param[in] out Output pointer
530 *
531 * @return The function returns one of the two
532 * - Updated output pointer if an implementation is available
533 * - NULL if no implementation is available.
534 *
535 * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read
536 * out for the following.
537 * - Output shift
538 * - Output multiplier
539 * - Output bias
540 * - rhs
541 */
542q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs,
543 const q7_t *rhs,
544 const int32_t lhs_offset,
545 const uint16_t num_ch,
546 const int32_t *out_shift,
547 const int32_t *out_mult,
548 const int32_t out_offset,
549 const int32_t activation_min,
550 const int32_t activation_max,
551 const uint16_t row_x_col,
552 const int32_t *const output_bias,
553 q7_t *out);
554
555/**
556 *@brief Matrix-multiplication function for convolution with reordered columns
557 *@param[in] pA pointer to operand A
558 *@param[in] pInBuffer pointer to operand B, always conssists of 2 vectors
559 *@param[in] ch_im_out numRow of A
560 *@param[in] numCol_A numCol of A
561 *@param[in] bias_shift amount of left-shift for bias
562 *@param[in] out_shift amount of right-shift for output
563 *@param[in] bias the bias
564 *@param[in,out] pOut pointer to output
565 *@return The function returns the incremented output pointer
566 *
567 *@details This function assumes that data in pInBuffer are reordered
568 */
569q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA,
570 const q15_t *pInBuffer,
571 const uint16_t ch_im_out,
572 const uint16_t numCol_A,
573 const uint16_t bias_shift,
574 const uint16_t out_shift,
575 const q7_t *bias,
576 q7_t *pOut);
577
578/**
579 @brief Read 2 q15 elements and post increment pointer.
580 @param[in] in_q15 Pointer to pointer that holds address of input.
581 @return q31 value
582 */
583__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15)
584{
585 q31_t val;
586
587 memcpy(&val, *in_q15, 4);
588 *in_q15 += 2;
589
590 return (val);
591}
592
593/**
594 @brief Read 4 q7 from q7 pointer and post increment pointer.
595 @param[in] in_q7 Pointer to pointer that holds address of input.
596 @return q31 value
597 */
598__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7)
599{
600 q31_t val;
601 memcpy(&val, *in_q7, 4);
602 *in_q7 += 4;
603
604 return (val);
605}
606
607/**
608 @brief Read 2 q15 from q15 pointer.
609 @param[in] in_q15 pointer to address of input.
610 @return q31 value
611 */
612__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2(const q15_t *in_q15)
613{
614 q31_t val;
615 memcpy(&val, in_q15, 4);
616
617 return (val);
618}
619
620/**
621 @brief Read 4 q7 values.
622 @param[in] in_q7 pointer to address of input.
623 @return q31 value
624 */
625__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7)
626{
627 q31_t val;
628 memcpy(&val, in_q7, 4);
629
630 return (val);
631}
632
633/**
634 @brief Write four q7 to q7 pointer and increment pointer afterwards.
635 @param[in] in Double pointer to input value
636 @param[in] value Four bytes to copy
637 */
638__STATIC_FORCEINLINE void arm_nn_write_q7x4_ia(q7_t **in, q31_t value)
639{
640 memcpy(*in, &value, 4);
641 *in += 4;
642}
643
644/**
645 * @brief memset optimized for MVE
646 * @param[in, out] dst Destination pointer
647 * @param[in] val Value to set
648 * @param[in] block_size Number of bytes to copy.
649 *
650 */
651__STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, const q7_t val, uint32_t block_size)
652{
653#if defined(ARM_MATH_MVEI)
654 __asm volatile(" vdup.8 q0, %[set_val] \n"
655 " wlstp.8 lr, %[cnt], 1f \n"
656 "2: \n"
657 " vstrb.8 q0, [%[in]], #16 \n"
658 " letp lr, 2b \n"
659 "1: \n"
660 : [ in ] "+r"(dst)
661 : [ cnt ] "r"(block_size), [ set_val ] "r"(val)
662 : "q0", "memory", "r14");
663#else
664 memset(dst, val, block_size);
665#endif
666}
667
668#if defined(ARM_MATH_DSP)
669
670/**
671 * @brief read and expand one q7 word into two q15 words
672 */
673
674__STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2)
675{
676 q31_t inA = arm_nn_read_q7x4_ia(&source);
677 q31_t inAbuf1 = __SXTB16_RORn((uint32_t)inA, 8);
678 q31_t inAbuf2 = __SXTB16(inA);
679
680#ifndef ARM_MATH_BIG_ENDIAN
681 *out2 = (int32_t)(__PKHTB(inAbuf1, inAbuf2, 16));
682 *out1 = (int32_t)(__PKHBT(inAbuf2, inAbuf1, 16));
683#else
684 *out1 = (int32_t)(__PKHTB(inAbuf1, inAbuf2, 16));
685 *out2 = (int32_t)(__PKHBT(inAbuf2, inAbuf1, 16));
686#endif
687
688 return source;
689}
690
691/**
692 * @brief read and expand one q7 word into two q15 words with reordering
693 */
694
695__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2)
696{
697 q31_t inA = arm_nn_read_q7x4_ia(&source);
698#ifndef ARM_MATH_BIG_ENDIAN
699 *out2 = __SXTB16(__ROR((uint32_t)inA, 8));
700 *out1 = __SXTB16(inA);
701#else
702 *out1 = __SXTB16(__ROR((uint32_t)inA, 8));
703 *out2 = __SXTB16(inA);
704#endif
705
706 return source;
707}
708
709/**
710 * @brief read and expand one q7 word into two q15 words with reordering and add an offset
711 */
712__STATIC_FORCEINLINE const q7_t *
713read_and_pad_reordered_with_offset(const q7_t *source, q31_t *out1, q31_t *out2, q31_t offset)
714{
715 q31_t inA = arm_nn_read_q7x4_ia(&source);
716
717#ifndef ARM_MATH_BIG_ENDIAN
718 *out2 = __SXTB16(__ROR((uint32_t)inA, 8));
719 *out1 = __SXTB16(inA);
720#else
721 *out1 = __SXTB16(__ROR((uint32_t)inA, 8));
722 *out2 = __SXTB16(inA);
723#endif
724 *out1 = __QADD16(*out1, offset);
725 *out2 = __QADD16(*out2, offset);
726
727 return source;
728}
729
730#endif
731
732/**
733 * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation
734 *
735 * Basic Math Functions for Neural Network Computation
736 *
737 */
738
739/**
740 * @brief q7 vector multiplication with variable output shifts
741 * @param[in] *pSrcA pointer to the first input vector
742 * @param[in] *pSrcB pointer to the second input vector
743 * @param[out] *pDst pointer to the output vector
744 * @param[in] out_shift amount of right-shift for output
745 * @param[in] blockSize number of samples in each vector
746 * @return none.
747 *
748 * <b>Scaling and Overflow Behavior:</b>
749 * \par
750 * The function uses saturating arithmetic.
751 * Results outside of the allowable q15 range [0x8000 0x7FFF] will be saturated.
752 */
753
754void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize);
755
756/**
757 * @brief q7 vector multiplication with variable output shifts
758 * @param[in] *pSrcA pointer to the first input vector
759 * @param[in] *pSrcB pointer to the second input vector
760 * @param[out] *pDst pointer to the output vector
761 * @param[in] out_shift amount of right-shift for output
762 * @param[in] blockSize number of samples in each vector
763 * @return none.
764 *
765 * <b>Scaling and Overflow Behavior:</b>
766 * \par
767 * The function uses saturating arithmetic.
768 * Results outside of the allowable q7 range [0x80 0x7F] will be saturated.
769 */
770
771void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize);
772
773/**
774 * @brief Matrix-multiplication function for convolution with per-channel requantization.
775 * @param[in] input_a pointer to operand A
776 * @param[in] input_b pointer to operand B, always consists of 2 vectors.
777 * @param[in] output_ch number of rows of A
778 * @param[in] out_shift pointer to per output channel requantization shift parameter.
779 * @param[in] out_mult pointer to per output channel requantization multiplier parameter.
780 * @param[in] out_offset output tensor offset.
781 * @param[in] activation_min minimum value to clamp the output to. Range : int8
782 * @param[in] activation_max maximum value to clamp the output to. Range : int8
783 * @param[in] num_col_a number of columns of A
784 * @param[in] output_bias per output channel bias. Range : int32
785 * @param[in,out] out_0 pointer to output
786 * @return The function returns one of the two
787 * 1. The incremented output pointer for a successful operation or
788 * 2. NULL if implementation is not available.
789 *
790 * @details This function does the matrix multiplication of weight matrix for all output channels
791 * with 2 columns from im2col and produces two elements/output_channel. The outputs are
792 * clamped in the range provided by activation min and max.
793 * Supported framework: TensorFlow Lite micro.
794 */
795q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
796 const q15_t *input_b,
797 const uint16_t output_ch,
798 const int32_t *out_shift,
799 const int32_t *out_mult,
800 const int32_t out_offset,
801 const int16_t activation_min,
802 const int16_t activation_max,
803 const uint16_t num_col_a,
804 const int32_t *const output_bias,
805 q7_t *out_0);
806
807/**
808 * @brief Common softmax function for s8 input and s8 or s16 output
809 * @param[in] input Pointer to the input tensor
810 * @param[in] num_rows Number of rows in the input tensor
811 * @param[in] row_size Number of elements in each input row
812 * @param[in] mult Input quantization multiplier
813 * @param[in] shift Input quantization shift within the range [0, 31]
814 * @param[in] diff_min Minimum difference with max in row. Used to check if
815 * the quantized exponential operation can be performed
816 * @param[in] int16_output Indicating s8 output if 0 else s16 output
817 * @param[out] output Pointer to the output tensor
818 *
819 * @note Supported framework: TensorFlow Lite micro (bit-accurate)
820 *
821 */
822void arm_nn_softmax_common_s8(const int8_t *input,
823 const int32_t num_rows,
824 const int32_t row_size,
825 const int32_t mult,
826 const int32_t shift,
827 const int32_t diff_min,
828 const bool int16_output,
829 void *output);
830
831/**
832 * @brief macro for adding rounding offset
833 */
834#ifndef ARM_NN_TRUNCATE
835#define NN_ROUND(out_shift) ((0x1 << out_shift) >> 1)
836#else
837#define NN_ROUND(out_shift) 0
838#endif
839
840// Macros for shortening quantization functions' names and avoid long lines
841#define MUL_SAT(a, b) arm_nn_doubling_high_mult((a), (b))
842#define MUL_SAT_MVE(a, b) arm_doubling_high_mult_mve_32x4((a), (b))
843#define MUL_POW2(a, b) arm_nn_mult_by_power_of_two((a), (b))
844
845#define DIV_POW2(a, b) arm_nn_divide_by_power_of_two((a), (b))
846#define DIV_POW2_MVE(a, b) arm_divide_by_power_of_two_mve((a), (b))
847
848#define EXP_ON_NEG(x) arm_nn_exp_on_negative_values((x))
849#define ONE_OVER1(x) arm_nn_one_over_one_plus_x_for_x_in_0_1((x))
850
851/**
852 * @brief Saturating doubling high multiply. Result matches
853 * NEON instruction VQRDMULH.
854 * @param[in] m1 Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
855 * @param[in] m2 Multiplier. Range: {NN_Q31_MIN, NN_Q31_MAX}
856 * @return Result of multiplication.
857 *
858 */
859__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult(const q31_t m1, const q31_t m2)
860{
861 q31_t result = 0;
862 // Rounding offset to add for a right shift of 31
863 q63_t mult = 1 << 30;
864
865 if ((m1 < 0) ^ (m2 < 0))
866 {
867 mult = 1 - mult;
868 }
869 // Gets resolved as a SMLAL instruction
870 mult = mult + (q63_t)m1 * m2;
871
872 // Utilize all of the upper 32 bits. This is the doubling step
873 // as well.
874 result = (int32_t)(mult / (1ll << 31));
875
876 if ((m1 == m2) && (m1 == (int32_t)NN_Q31_MIN))
877 {
878 result = NN_Q31_MAX;
879 }
880 return result;
881}
882
883/**
884 * @brief Doubling high multiply without saturation. This is intended
885 * for requantization where the scale is a positive integer
886 *
887 * @param[in] m1 Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX}
888 * @param[in] m2 Multiplier Range: {NN_Q31_MIN, NN_Q31_MAX}
889 * @return Result of multiplication.
890 * @note The result of this matches that of neon instruction
891 * VQRDMULH for m1 in range {NN_Q31_MIN, NN_Q31_MAX} and m2 in
892 * range {NN_Q31_MIN + 1, NN_Q31_MAX}. Saturation occurs when
893 * m1 equals m2 equals NN_Q31_MIN and that is not handled by
894 * this function.
895 *
896 */
897__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult_no_sat(const q31_t m1, const q31_t m2)
898{
899 q31_t result = 0;
900 union arm_nn_long_long mult;
901
902 // Rounding offset to add for a right shift of 31
903 mult.word.low = 1 << 30;
904 mult.word.high = 0;
905
906 // Gets resolved as a SMLAL instruction
907 mult.long_long = mult.long_long + (q63_t)m1 * m2;
908
909 // Utilize all of the upper 32 bits. This is the doubling step
910 // as well.
911 result = (int32_t)(mult.long_long >> 31);
912
913 return result;
914}
915
916/**
917 * @brief Rounding divide by power of two.
918 * @param[in] dividend - Dividend
919 * @param[in] exponent - Divisor = power(2, exponent)
920 * Range: [0, 31]
921 * @return Rounded result of division. Midpoint is rounded away from zero.
922 *
923 */
924__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent)
925{
926 q31_t result = 0;
927 const q31_t remainder_mask = (1 << exponent) - 1;
928 int32_t remainder = remainder_mask & dividend;
929
930 // Basic division
931 result = dividend >> exponent;
932
933 // Adjust 'result' for rounding (mid point away from zero)
934 q31_t threshold = remainder_mask >> 1;
935 if (result < 0)
936 {
937 threshold++;
938 }
939 if (remainder > threshold)
940 {
941 result++;
942 }
943
944 return result;
945}
946
947/**
948 * @brief Requantize a given value.
949 * @param[in] val Value to be requantized
950 * @param[in] multiplier multiplier. Range {NN_Q31_MIN + 1, Q32_MAX}
951 * @param[in] shift left or right shift for 'val * multiplier'
952 *
953 * @return Returns (val * multiplier)/(2 ^ shift)
954 *
955 */
956__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift)
957{
958#ifdef CMSIS_NN_USE_SINGLE_ROUNDING
959 const int64_t total_shift = 31 - shift;
960 const int64_t new_val = val * (int64_t)multiplier;
961
962 int32_t result = new_val >> (total_shift - 1);
963 result = (result + 1) >> 1;
964
965 return result;
966#else
967 return arm_nn_divide_by_power_of_two(arm_nn_doubling_high_mult_no_sat(val * (1 << LEFT_SHIFT(shift)), multiplier),
968 RIGHT_SHIFT(shift));
969#endif
970}
971
972/**
973 * @brief Requantize a given 64 bit value.
974 * @param[in] val Value to be requantized in the range {-(1<<47)} to {(1<<47) - 1}
975 * @param[in] reduced_multiplier Reduced multiplier in the range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1,
976 * Q16_MAX}
977 * @param[in] shift Left or right shift for 'val * multiplier' in the range {-31} to {7}
978 *
979 * @return Returns (val * multiplier)/(2 ^ shift)
980 *
981 */
982__STATIC_FORCEINLINE q31_t arm_nn_requantize_s64(const q63_t val, const q31_t reduced_multiplier, const q31_t shift)
983{
984 const q63_t new_val = val * reduced_multiplier;
985
986 q31_t result = new_val >> (14 - shift); // 64->32 bit reduction
987 result = (result + 1) >> 1; // Last shift position and insert round
988
989 return result;
990}
991
992/**
993 * @brief memcpy optimized for MVE
994 * @param[in, out] dst Destination pointer
995 * @param[in] src Source pointer.
996 * @param[in] block_size Number of bytes to copy.
997 *
998 */
999__STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst, const q7_t *__RESTRICT src, uint32_t block_size)
1000{
1001#if defined(ARM_MATH_MVEI)
1002 __asm volatile(" wlstp.8 lr, %[cnt], 1f \n"
1003 "2: \n"
1004 " vldrb.8 q0, [%[in]], #16 \n"
1005 " vstrb.8 q0, [%[out]], #16 \n"
1006 " letp lr, 2b \n"
1007 "1: \n"
1008 : [ in ] "+r"(src), [ out ] "+r"(dst)
1009 : [ cnt ] "r"(block_size)
1010 : "q0", "memory", "r14");
1011#else
1012 memcpy(dst, src, block_size);
1013#endif
1014}
1015
1016#if defined(ARM_MATH_MVEI)
1017/**
1018 * @brief Vector saturating doubling high multiply returning high half.
1019 * @param[in] m1 Multiplicand
1020 * @param[in] m2 Multiplier
1021 * @return Result of multiplication.
1022 *
1023 */
1024__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve(const int32x4_t m1, const q31_t m2)
1025{
1026 return vqrdmulhq_n_s32(m1, m2);
1027}
1028
1029/**
1030 * @brief Vector rounding divide by power of two.
1031 * @param[in] dividend - Dividend vector
1032 * @param[in] exponent - Divisor = power(2, exponent)
1033 * Range: [0, 31]
1034 * @return Rounded result of division. Midpoint is rounded away from zero.
1035 *
1036 */
1037__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t dividend, const q31_t exponent)
1038{
1039 const int32x4_t shift = vdupq_n_s32(-exponent);
1040 const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
1041 const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
1042 return vrshlq_s32(fixed_up_dividend, shift);
1043}
1044
1045/**
1046 * @brief Requantize a given vector.
1047 * @param[in] val Vector to be requantized
1048 * @param[in] multiplier multiplier
1049 * @param[in] shift shift
1050 *
1051 * @return Returns (val * multiplier)/(2 ^ shift)
1052 *
1053 */
1054__STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift)
1055{
1056#ifdef CMSIS_NN_USE_SINGLE_ROUNDING
1057 const int right_shift = MIN(-1, shift);
1058 const int left_shift = shift - right_shift;
1059
1060 const int32x4_t left_shift_dup = vdupq_n_s32(left_shift);
1061 const int32x4_t right_shift_dup = vdupq_n_s32(right_shift);
1062
1063 int32x4_t result = vqdmulhq_n_s32(vshlq_s32(val, left_shift_dup), multiplier);
1064 result = vrshlq_s32(result, right_shift_dup);
1065
1066 return result;
1067#else
1068 return arm_divide_by_power_of_two_mve(
1069 arm_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier), RIGHT_SHIFT(shift));
1070#endif
1071}
1072
1073__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2)
1074{
1075 return vqrdmulhq_s32(m1, m2);
1076}
1077
1078__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve_32x4(const int32x4_t dividend, const int32x4_t exponent)
1079{
1080 const int32x4_t shift = -exponent;
1081 const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31);
1082 const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup);
1083 return vrshlq_s32(fixed_up_dividend, shift);
1084}
1085
1086__STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val,
1087 const int32x4_t multiplier,
1088 const int32x4_t shift)
1089{
1090#ifdef CMSIS_NN_USE_SINGLE_ROUNDING
1091 const int32x4_t right_shift = vminq_s32(vdupq_n_s32(-1), shift);
1092 const int32x4_t left_shift = vqsubq_s32(shift, right_shift);
1093
1094 int32x4_t result = vqdmulhq_s32(vshlq_s32(val, left_shift), multiplier);
1095 result = vrshlq_s32(result, right_shift);
1096
1097 return result;
1098#else
1099 const int32x4_t zz = vdupq_n_s32(0);
1100 const mve_pred16_t p = vcmpgtq_n_s32(shift, 0);
1101
1102 const int32x4_t left_shift = vpselq_s32(shift, zz, p);
1103 const int32x4_t right_shift = -vpselq_s32(zz, shift, p);
1104
1105 return arm_divide_by_power_of_two_mve_32x4(arm_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier),
1106 right_shift);
1107#endif
1108}
1109#endif
1110
1111// @note The following functions are used only for softmax layer, scaled bits = 5 assumed
1112
1113__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val)
1114{
1115 int32_t mask = 0;
1116 int32_t shift = 24;
1117
1118 const int32_t val_mod_minus_quarter = (val & ((1 << shift) - 1)) - (1 << shift);
1119 const int32_t remainder = val_mod_minus_quarter - val;
1120 const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28);
1121 const int32_t x2 = MUL_SAT(x, x);
1122
1123 int32_t result = 1895147668 +
1124 MUL_SAT(1895147668, x + DIV_POW2(MUL_SAT(DIV_POW2(MUL_SAT(x2, x2), 2) + MUL_SAT(x2, x), 715827883) + x2, 1));
1125
1126#define SELECT_IF_NON_ZERO(x) \
1127 { \
1128 mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \
1129 result = SELECT_USING_MASK(mask, MUL_SAT(result, x), result); \
1130 }
1131
1132 SELECT_IF_NON_ZERO(1672461947)
1133 SELECT_IF_NON_ZERO(1302514674)
1134 SELECT_IF_NON_ZERO(790015084)
1135 SELECT_IF_NON_ZERO(290630308)
1136 SELECT_IF_NON_ZERO(39332535)
1137 SELECT_IF_NON_ZERO(720401)
1138 SELECT_IF_NON_ZERO(242)
1139
1140#undef SELECT_IF_NON_ZERO
1141
1142 mask = MASK_IF_ZERO(val);
1143 return SELECT_USING_MASK(mask, NN_Q31_MAX, result);
1144}
1145
1146__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp)
1147{
1148 const int32_t thresh = ((1 << (31 - exp)) - 1);
1149 int32_t result = val << exp;
1150 result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), NN_Q31_MAX, result);
1151 result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), NN_Q31_MIN, result);
1152 return result;
1153}
1154
1155__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val)
1156{
1157 const int64_t sum = (int64_t)val + (int64_t)NN_Q31_MAX;
1158 const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L);
1159 int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540);
1160
1161 const int32_t shift = (1 << 29);
1162 x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
1163 x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
1164 x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2);
1165
1166 return MUL_POW2(x, 1);
1167}
1168
1169/**
1170 @brief Write 2 q15 elements and post increment pointer.
1171 @param[in] dest_q15 Pointer to pointer that holds address of destination.
1172 @param[in] src_q31 Input value to be written.
1173 */
1174__STATIC_FORCEINLINE void arm_nn_write_q15x2_ia(q15_t **dest_q15, q31_t src_q31)
1175{
1176 q31_t val = src_q31;
1177
1178 memcpy(*dest_q15, &val, 4);
1179 *dest_q15 += 2;
1180}
1181
1182#ifdef __cplusplus
1183}
1184#endif
1185
1186#endif
Note: See TracBrowser for help on using the repository browser.