Initial commit
This commit is contained in:
@ -0,0 +1,20 @@
|
||||
|
||||
/*
|
||||
* Auto generated Run-Time-Environment Component Configuration File
|
||||
* *** Do not modify ! ***
|
||||
*
|
||||
* Project: 'arm_nnexamples_cifar10'
|
||||
* Target: 'ARMCM0'
|
||||
*/
|
||||
|
||||
#ifndef RTE_COMPONENTS_H
|
||||
#define RTE_COMPONENTS_H
|
||||
|
||||
|
||||
/*
|
||||
* Define the Device Header File:
|
||||
*/
|
||||
#define CMSIS_device_header "ARMCM0.h"
|
||||
|
||||
|
||||
#endif /* RTE_COMPONENTS_H */
|
@ -0,0 +1,26 @@
|
||||
|
||||
/*
|
||||
* Auto generated Run-Time-Environment Component Configuration File
|
||||
* *** Do not modify ! ***
|
||||
*
|
||||
* Project: 'arm_nnexamples_nn_test'
|
||||
* Target: 'ARMCM3'
|
||||
*/
|
||||
|
||||
#ifndef RTE_COMPONENTS_H
|
||||
#define RTE_COMPONENTS_H
|
||||
|
||||
|
||||
/*
|
||||
* Define the Device Header File:
|
||||
*/
|
||||
#define CMSIS_device_header "ARMCM3.h"
|
||||
|
||||
#define RTE_Compiler_IO_STDERR /* Compiler I/O: STDERR */
|
||||
#define RTE_Compiler_IO_STDERR_ITM /* Compiler I/O: STDERR ITM */
|
||||
#define RTE_Compiler_IO_STDOUT /* Compiler I/O: STDOUT */
|
||||
#define RTE_Compiler_IO_STDOUT_ITM /* Compiler I/O: STDOUT ITM */
|
||||
#define RTE_Compiler_IO_TTY /* Compiler I/O: TTY */
|
||||
#define RTE_Compiler_IO_TTY_ITM /* Compiler I/O: TTY ITM */
|
||||
|
||||
#endif /* RTE_COMPONENTS_H */
|
@ -0,0 +1,26 @@
|
||||
|
||||
/*
|
||||
* Auto generated Run-Time-Environment Component Configuration File
|
||||
* *** Do not modify ! ***
|
||||
*
|
||||
* Project: 'arm_nnexamples_nn_test'
|
||||
* Target: 'ARMCM4_FP'
|
||||
*/
|
||||
|
||||
#ifndef RTE_COMPONENTS_H
|
||||
#define RTE_COMPONENTS_H
|
||||
|
||||
|
||||
/*
|
||||
* Define the Device Header File:
|
||||
*/
|
||||
#define CMSIS_device_header "ARMCM4_FP.h"
|
||||
|
||||
#define RTE_Compiler_IO_STDERR /* Compiler I/O: STDERR */
|
||||
#define RTE_Compiler_IO_STDERR_ITM /* Compiler I/O: STDERR ITM */
|
||||
#define RTE_Compiler_IO_STDOUT /* Compiler I/O: STDOUT */
|
||||
#define RTE_Compiler_IO_STDOUT_ITM /* Compiler I/O: STDOUT ITM */
|
||||
#define RTE_Compiler_IO_TTY /* Compiler I/O: TTY */
|
||||
#define RTE_Compiler_IO_TTY_ITM /* Compiler I/O: TTY ITM */
|
||||
|
||||
#endif /* RTE_COMPONENTS_H */
|
@ -0,0 +1,26 @@
|
||||
|
||||
/*
|
||||
* Auto generated Run-Time-Environment Component Configuration File
|
||||
* *** Do not modify ! ***
|
||||
*
|
||||
* Project: 'arm_nnexamples_nn_test'
|
||||
* Target: 'ARMCM7_SP'
|
||||
*/
|
||||
|
||||
#ifndef RTE_COMPONENTS_H
|
||||
#define RTE_COMPONENTS_H
|
||||
|
||||
|
||||
/*
|
||||
* Define the Device Header File:
|
||||
*/
|
||||
#define CMSIS_device_header "ARMCM7_SP.h"
|
||||
|
||||
#define RTE_Compiler_IO_STDERR /* Compiler I/O: STDERR */
|
||||
#define RTE_Compiler_IO_STDERR_ITM /* Compiler I/O: STDERR ITM */
|
||||
#define RTE_Compiler_IO_STDOUT /* Compiler I/O: STDOUT */
|
||||
#define RTE_Compiler_IO_STDOUT_ITM /* Compiler I/O: STDOUT ITM */
|
||||
#define RTE_Compiler_IO_TTY /* Compiler I/O: TTY */
|
||||
#define RTE_Compiler_IO_TTY_ITM /* Compiler I/O: TTY ITM */
|
||||
|
||||
#endif /* RTE_COMPONENTS_H */
|
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_convolve_HWC_q15_ref(const q15_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimention
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q15_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel, // filter kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const q15_t * bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out, // output image
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
)
|
||||
{
|
||||
int i, j, k, l, m, n;
|
||||
int conv_out;
|
||||
int in_row, in_col;
|
||||
|
||||
for (i = 0; i < ch_im_out; i++)
|
||||
{
|
||||
for (j = 0; j < dim_im_out; j++)
|
||||
{
|
||||
for (k = 0; k < dim_im_out; k++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
conv_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
conv_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (m = 0; m < dim_kernel; m++)
|
||||
{
|
||||
for (n = 0; n < dim_kernel; n++)
|
||||
{
|
||||
in_row = stride * j + m - padding;
|
||||
in_col = stride * k + n - padding;
|
||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
||||
{
|
||||
for (l = 0; l < ch_im_in; l++)
|
||||
{
|
||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void
|
||||
arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in,
|
||||
const uint16_t dim_im_in_x,
|
||||
const uint16_t dim_im_in_y,
|
||||
const uint16_t ch_im_in,
|
||||
const q15_t * wt,
|
||||
const uint16_t ch_im_out,
|
||||
const uint16_t dim_kernel_x,
|
||||
const uint16_t dim_kernel_y,
|
||||
const uint16_t padding_x,
|
||||
const uint16_t padding_y,
|
||||
const uint16_t stride_x,
|
||||
const uint16_t stride_y,
|
||||
const q15_t * bias,
|
||||
const uint16_t bias_shift,
|
||||
const uint16_t out_shift,
|
||||
q15_t * Im_out,
|
||||
const uint16_t dim_im_out_x,
|
||||
const uint16_t dim_im_out_y,
|
||||
q15_t * bufferA,
|
||||
q7_t * bufferB)
|
||||
|
||||
{
|
||||
uint16_t i, j, k, l, m, n;
|
||||
int conv_out;
|
||||
signed char in_row, in_col;
|
||||
|
||||
for (i = 0; i < ch_im_out; i++)
|
||||
{
|
||||
for (j = 0; j < dim_im_out_y; j++)
|
||||
{
|
||||
for (k = 0; k < dim_im_out_x; k++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
conv_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
conv_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (m = 0; m < dim_kernel_y; m++)
|
||||
{
|
||||
for (n = 0; n < dim_kernel_x; n++)
|
||||
{
|
||||
in_row = stride_y * j + m - padding_y;
|
||||
in_col = stride_x * k + n - padding_x;
|
||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
||||
{
|
||||
for (l = 0; l < ch_im_in; l++)
|
||||
{
|
||||
conv_out +=
|
||||
Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in +
|
||||
l] * wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x +
|
||||
n) * ch_im_in + l];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t) __SSAT((conv_out >> out_shift), 16);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_convolve_HWC_q7_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimention
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel, // filter kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
)
|
||||
{
|
||||
int i, j, k, l, m, n;
|
||||
int conv_out;
|
||||
int in_row, in_col;
|
||||
|
||||
for (i = 0; i < ch_im_out; i++)
|
||||
{
|
||||
for (j = 0; j < dim_im_out; j++)
|
||||
{
|
||||
for (k = 0; k < dim_im_out; k++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
conv_out = ((q31_t) (bias[i]) << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
conv_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (m = 0; m < dim_kernel; m++)
|
||||
{
|
||||
for (n = 0; n < dim_kernel; n++)
|
||||
{
|
||||
// if-for implementation
|
||||
in_row = stride * j + m - padding;
|
||||
in_col = stride * k + n - padding;
|
||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
||||
{
|
||||
for (l = 0; l < ch_im_in; l++)
|
||||
{
|
||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
)
|
||||
{
|
||||
int i, j, k, l, m, n;
|
||||
int conv_out;
|
||||
int in_row, in_col;
|
||||
|
||||
for (i = 0; i < ch_im_out; i++)
|
||||
{
|
||||
for (j = 0; j < dim_im_out_y; j++)
|
||||
{
|
||||
for (k = 0; k < dim_im_out_x; k++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
conv_out = ((q31_t) (bias[i]) << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
conv_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (m = 0; m < dim_kernel_y; m++)
|
||||
{
|
||||
for (n = 0; n < dim_kernel_x; n++)
|
||||
{
|
||||
// if-for implementation
|
||||
in_row = stride_y * j + m - padding_y;
|
||||
in_col = stride_x * k + n - padding_x;
|
||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
||||
{
|
||||
for (l = 0; l < ch_im_in; l++)
|
||||
{
|
||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
|
||||
wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in +
|
||||
l];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t) __SSAT((conv_out >> out_shift), 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimention
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel, // filter kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
)
|
||||
{
|
||||
int i_out_y, i_out_x, i_ch_out;
|
||||
int i_ker_y, i_ker_x;
|
||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
||||
{
|
||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
||||
{
|
||||
for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
|
||||
{
|
||||
// for each output
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int conv_out = bias[i_ch_out] << bias_shift;
|
||||
#endif
|
||||
for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++)
|
||||
{
|
||||
for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++)
|
||||
{
|
||||
int in_row = stride * i_out_y + i_ker_y - padding;
|
||||
int in_col = stride * i_out_x + i_ker_x - padding;
|
||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
||||
{
|
||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + i_ch_out] *
|
||||
wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out];
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[(i_out_y * dim_im_out + i_out_x) * ch_im_out + i_ch_out] =
|
||||
(q7_t) __SSAT((conv_out >> out_shift), 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
)
|
||||
{
|
||||
int i_out_y, i_out_x, i_ch_out;
|
||||
int i_ker_y, i_ker_x;
|
||||
for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
|
||||
{
|
||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
||||
{
|
||||
for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
|
||||
{
|
||||
// for each output
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int conv_out = (bias[i_ch_out] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int conv_out = bias[i_ch_out] << bias_shift;
|
||||
#endif
|
||||
for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++)
|
||||
{
|
||||
for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++)
|
||||
{
|
||||
int in_row = stride_y * i_out_y + i_ker_y - padding_y;
|
||||
int in_col = stride_x * i_out_x + i_ker_x - padding_x;
|
||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
||||
{
|
||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] *
|
||||
wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out];
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] =
|
||||
(q7_t) __SSAT((conv_out >> out_shift), 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer)
|
||||
{
|
||||
|
||||
uint16_t rowCnt = num_of_rows >> 2;
|
||||
const q7_t *pB = pM;
|
||||
const q15_t *pA;
|
||||
q15_t *pO = pOut;
|
||||
const q7_t *pBias = bias;
|
||||
|
||||
while (rowCnt)
|
||||
{
|
||||
pA = pV;
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
q31_t sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
q31_t sum = *pBias++ << bias_shift;
|
||||
q31_t sum2 = *pBias++ << bias_shift;
|
||||
q31_t sum3 = *pBias++ << bias_shift;
|
||||
q31_t sum4 = *pBias++ << bias_shift;
|
||||
#endif
|
||||
|
||||
uint16_t colCnt = dim_vec >> 1;
|
||||
|
||||
while (colCnt)
|
||||
{
|
||||
q15_t inA1 = *pA++;
|
||||
q15_t inA2 = *pA++;
|
||||
|
||||
q7_t inB1 = *pB++;
|
||||
q7_t inB3 = *pB++;
|
||||
q7_t inB2 = *pB++;
|
||||
q7_t inB4 = *pB++;
|
||||
|
||||
sum += inA1 * inB1 + inA2 * inB2;
|
||||
sum2 += inA1 * inB3 + inA2 * inB4;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB3 = *pB++;
|
||||
inB2 = *pB++;
|
||||
inB4 = *pB++;
|
||||
|
||||
sum3 += inA1 * inB1 + inA2 * inB2;
|
||||
sum4 += inA1 * inB3 + inA2 * inB4;
|
||||
|
||||
colCnt--;
|
||||
}
|
||||
colCnt = dim_vec & 0x1;
|
||||
while (colCnt)
|
||||
{
|
||||
q15_t inA = *pA++;
|
||||
q7_t inB = *pB++;
|
||||
sum += inA * inB;
|
||||
inB = *pB++;
|
||||
sum2 += inA * inB;
|
||||
inB = *pB++;
|
||||
sum3 += inA * inB;
|
||||
inB = *pB++;
|
||||
sum4 += inA * inB;
|
||||
|
||||
colCnt--;
|
||||
}
|
||||
*pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
|
||||
*pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
|
||||
*pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
|
||||
*pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
|
||||
|
||||
rowCnt--;
|
||||
}
|
||||
|
||||
rowCnt = num_of_rows & 0x3;
|
||||
|
||||
while (rowCnt)
|
||||
{
|
||||
pA = pV;
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int ip_out = *pBias++ << bias_shift;
|
||||
#endif
|
||||
for (int j = 0; j < dim_vec; j++)
|
||||
{
|
||||
q15_t inA = *pA++;
|
||||
q7_t inB = *pB++;
|
||||
ip_out += inA * inB;
|
||||
}
|
||||
*pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
|
||||
|
||||
rowCnt--;
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer)
|
||||
{
|
||||
for (int i = 0; i < num_of_rows; i++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int ip_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (int j = 0; j < dim_vec; j++)
|
||||
{
|
||||
ip_out += pV[j] * pM[i * dim_vec + j];
|
||||
}
|
||||
pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
|
||||
}
|
||||
}
|
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_fully_connected_q15_opt_ref(const q15_t * pV, // pointer to vector
|
||||
const q15_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q15_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer)
|
||||
{
|
||||
|
||||
uint16_t rowCnt = num_of_rows >> 2;
|
||||
const q15_t *pB = pM;
|
||||
const q15_t *pA;
|
||||
q15_t *pO = pOut;
|
||||
const q15_t *pBias = bias;
|
||||
|
||||
while (rowCnt)
|
||||
{
|
||||
pA = pV;
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
q31_t sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
q31_t sum = *pBias++ << bias_shift;
|
||||
q31_t sum2 = *pBias++ << bias_shift;
|
||||
q31_t sum3 = *pBias++ << bias_shift;
|
||||
q31_t sum4 = *pBias++ << bias_shift;
|
||||
#endif
|
||||
|
||||
uint16_t colCnt = dim_vec >> 1;
|
||||
|
||||
while (colCnt)
|
||||
{
|
||||
q15_t inA1 = *pA++;
|
||||
q15_t inA2 = *pA++;
|
||||
|
||||
q15_t inB1 = *pB++;
|
||||
q15_t inB2 = *pB++;
|
||||
sum += inA1 * inB1 + inA2 * inB2;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB2 = *pB++;
|
||||
sum2 += inA1 * inB1 + inA2 * inB2;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB2 = *pB++;
|
||||
sum3 += inA1 * inB1 + inA2 * inB2;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB2 = *pB++;
|
||||
sum4 += inA1 * inB1 + inA2 * inB2;
|
||||
|
||||
colCnt--;
|
||||
}
|
||||
colCnt = dim_vec & 0x1;
|
||||
while (colCnt)
|
||||
{
|
||||
q15_t inA = *pA++;
|
||||
q15_t inB = *pB++;
|
||||
sum += inA * inB;
|
||||
inB = *pB++;
|
||||
sum2 += inA * inB;
|
||||
inB = *pB++;
|
||||
sum3 += inA * inB;
|
||||
inB = *pB++;
|
||||
sum4 += inA * inB;
|
||||
colCnt--;
|
||||
}
|
||||
*pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
|
||||
*pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
|
||||
*pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
|
||||
*pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
|
||||
|
||||
rowCnt--;
|
||||
}
|
||||
|
||||
rowCnt = num_of_rows & 0x3;
|
||||
|
||||
while (rowCnt)
|
||||
{
|
||||
pA = pV;
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int ip_out = *pBias++ << bias_shift;
|
||||
#endif
|
||||
for (int j = 0; j < dim_vec; j++)
|
||||
{
|
||||
q15_t inA = *pA++;
|
||||
q15_t inB = *pB++;
|
||||
ip_out += inA * inB;
|
||||
}
|
||||
*pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
|
||||
|
||||
rowCnt--;
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_fully_connected_q15_ref(const q15_t * pV, // pointer to vector
|
||||
const q15_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q15_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer)
|
||||
{
|
||||
for (int i = 0; i < num_of_rows; i++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int ip_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (int j = 0; j < dim_vec; j++)
|
||||
{
|
||||
ip_out += pV[j] * pM[i * dim_vec + j];
|
||||
}
|
||||
pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
|
||||
}
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_fully_connected_q7_opt_ref(const q7_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q7_t * pOut, // output operand
|
||||
q15_t * vec_buffer)
|
||||
{
|
||||
|
||||
uint16_t rowCnt = num_of_rows >> 2;
|
||||
const q7_t *pB = pM;
|
||||
const q7_t *pA;
|
||||
q7_t *pO = pOut;
|
||||
const q7_t *pBias = bias;
|
||||
|
||||
while (rowCnt)
|
||||
{
|
||||
pA = pV;
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
q31_t sum = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum2 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum3 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
q31_t sum4 = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
q31_t sum = *pBias++ << bias_shift;
|
||||
q31_t sum2 = *pBias++ << bias_shift;
|
||||
q31_t sum3 = *pBias++ << bias_shift;
|
||||
q31_t sum4 = *pBias++ << bias_shift;
|
||||
#endif
|
||||
|
||||
uint16_t colCnt = dim_vec >> 2;
|
||||
|
||||
while (colCnt)
|
||||
{
|
||||
q7_t inA1 = *pA++;
|
||||
q7_t inA3 = *pA++;
|
||||
q7_t inA2 = *pA++;
|
||||
q7_t inA4 = *pA++;
|
||||
|
||||
q7_t inB1 = *pB++;
|
||||
q7_t inB3 = *pB++;
|
||||
q7_t inB2 = *pB++;
|
||||
q7_t inB4 = *pB++;
|
||||
|
||||
sum += inA1 * inB1 + inA2 * inB2;
|
||||
sum2 += inA1 * inB3 + inA2 * inB4;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB3 = *pB++;
|
||||
inB2 = *pB++;
|
||||
inB4 = *pB++;
|
||||
|
||||
sum3 += inA1 * inB1 + inA2 * inB2;
|
||||
sum4 += inA1 * inB3 + inA2 * inB4;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB3 = *pB++;
|
||||
inB2 = *pB++;
|
||||
inB4 = *pB++;
|
||||
|
||||
sum += inA3 * inB1 + inA4 * inB2;
|
||||
sum2 += inA3 * inB3 + inA4 * inB4;
|
||||
|
||||
inB1 = *pB++;
|
||||
inB3 = *pB++;
|
||||
inB2 = *pB++;
|
||||
inB4 = *pB++;
|
||||
|
||||
sum3 += inA3 * inB1 + inA4 * inB2;
|
||||
sum4 += inA3 * inB3 + inA4 * inB4;
|
||||
|
||||
colCnt--;
|
||||
}
|
||||
colCnt = dim_vec & 0x3;
|
||||
while (colCnt)
|
||||
{
|
||||
q7_t inA = *pA++;
|
||||
q7_t inB = *pB++;
|
||||
sum += inA * inB;
|
||||
inB = *pB++;
|
||||
sum2 += inA * inB;
|
||||
inB = *pB++;
|
||||
sum3 += inA * inB;
|
||||
inB = *pB++;
|
||||
sum4 += inA * inB;
|
||||
|
||||
colCnt--;
|
||||
}
|
||||
*pO++ = (q7_t) __SSAT((sum >> out_shift), 8);
|
||||
*pO++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
|
||||
*pO++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
|
||||
*pO++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
|
||||
|
||||
rowCnt--;
|
||||
}
|
||||
|
||||
rowCnt = num_of_rows & 0x3;
|
||||
|
||||
while (rowCnt)
|
||||
{
|
||||
pA = pV;
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int ip_out = (*pBias++ << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int ip_out = *pBias++ << bias_shift;
|
||||
#endif
|
||||
for (int j = 0; j < dim_vec; j++)
|
||||
{
|
||||
q7_t inA = *pA++;
|
||||
q7_t inB = *pB++;
|
||||
ip_out += inA * inB;
|
||||
}
|
||||
*pO++ = (q7_t) __SSAT((ip_out >> out_shift), 8);
|
||||
|
||||
rowCnt--;
|
||||
}
|
||||
}
|
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_fully_connected_q7_ref(const q7_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q7_t * pOut, // output operand
|
||||
q15_t * vec_buffer)
|
||||
{
|
||||
for (int i = 0; i < num_of_rows; i++)
|
||||
{
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
int ip_out = (bias[i] << bias_shift) + (0x1 << (out_shift - 1));
|
||||
#else
|
||||
int ip_out = bias[i] << bias_shift;
|
||||
#endif
|
||||
for (int j = 0; j < dim_vec; j++)
|
||||
{
|
||||
ip_out += pV[j] * pM[i * dim_vec + j];
|
||||
}
|
||||
pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8);
|
||||
}
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
|
||||
void arm_nn_mult_q7_ref(q7_t * pSrcA,
|
||||
q7_t * pSrcB,
|
||||
q7_t * pDst,
|
||||
const uint16_t out_shift,
|
||||
uint32_t blockSize) {
|
||||
uint16_t i;
|
||||
|
||||
for (i = 0; i < blockSize; i++)
|
||||
{
|
||||
q31_t product = pSrcA[i] * pSrcB[i];
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
pDst[i] = (q7_t)__SSAT((product + (0x1 << (out_shift - 1)))>>out_shift, 8);
|
||||
#else
|
||||
pDst[i] = (q7_t)__SSAT(product >> out_shift, 8);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void arm_nn_mult_q15_ref(q15_t * pSrcA,
|
||||
q15_t * pSrcB,
|
||||
q15_t * pDst,
|
||||
const uint16_t out_shift,
|
||||
uint32_t blockSize) {
|
||||
uint16_t i;
|
||||
|
||||
for (i = 0; i < blockSize; i++)
|
||||
{
|
||||
q31_t product = pSrcA[i] * pSrcB[i];
|
||||
#ifndef ARM_NN_TRUNCATE
|
||||
pDst[i] = (q15_t)__SSAT((product + (0x1 << (out_shift - 1)))>>out_shift, 16);
|
||||
#else
|
||||
pDst[i] = (q15_t)__SSAT(product >> out_shift, 16);
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,96 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ref_functions.h"
|
||||
|
||||
void arm_avepool_q7_HWC_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimension
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel, // window kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q7_t * bufferA, // a buffer for local storage
|
||||
q7_t * Im_out)
|
||||
{
|
||||
int16_t i_ch_in, i_x, i_y;
|
||||
int16_t k_x, k_y;
|
||||
|
||||
for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++)
|
||||
{
|
||||
for (i_y = 0; i_y < dim_im_out; i_y++)
|
||||
{
|
||||
for (i_x = 0; i_x < dim_im_out; i_x++)
|
||||
{
|
||||
int sum = 0;
|
||||
int count = 0;
|
||||
for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++)
|
||||
{
|
||||
for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++)
|
||||
{
|
||||
if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in)
|
||||
{
|
||||
sum += Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)];
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = sum / count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void arm_maxpool_q7_HWC_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimension
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel, // window kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q7_t * bufferA, // a buffer for local storage
|
||||
q7_t * Im_out)
|
||||
{
|
||||
int16_t i_ch_in, i_x, i_y;
|
||||
int16_t k_x, k_y;
|
||||
|
||||
for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++)
|
||||
{
|
||||
for (i_y = 0; i_y < dim_im_out; i_y++)
|
||||
{
|
||||
for (i_x = 0; i_x < dim_im_out; i_x++)
|
||||
{
|
||||
int max = -129;
|
||||
for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++)
|
||||
{
|
||||
for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++)
|
||||
{
|
||||
if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in)
|
||||
{
|
||||
if (Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)] > max)
|
||||
{
|
||||
max = Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = max;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
|
||||
void arm_relu_q7_ref(q7_t * data, uint16_t size)
|
||||
{
|
||||
uint16_t i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
if (data[i] < 0)
|
||||
data[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void arm_relu_q15_ref(q15_t * data, uint16_t size)
|
||||
{
|
||||
uint16_t i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
if (data[i] < 0)
|
||||
data[i] = 0;
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
@ -0,0 +1,250 @@
|
||||
/*
|
||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef _REF_FUNCTIONS_H_
|
||||
#define _REF_FUNCTIONS_H_
|
||||
|
||||
#include "arm_math.h"
|
||||
#include "arm_nnfunctions.h"
|
||||
//#include "arm_nnsupportfunctions.h"
|
||||
#include "fully_connected_testing_weights.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
/*
|
||||
*
|
||||
* Convolution reference implemenation
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_convolve_HWC_q7_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimention
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel, // filter kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void arm_convolve_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
);
|
||||
|
||||
void arm_convolve_HWC_q15_ref(const q15_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimention
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q15_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel, // filter kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const q15_t * bias, // bias
|
||||
const uint16_t bias_shift, const uint16_t out_shift, q15_t * Im_out, // output image
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
);
|
||||
void arm_convolve_HWC_q15_nonsquare_ref(const q15_t * Im_in,
|
||||
const uint16_t dim_im_in_x,
|
||||
const uint16_t dim_im_in_y,
|
||||
const uint16_t ch_im_in,
|
||||
const q15_t * wt,
|
||||
const uint16_t ch_im_out,
|
||||
const uint16_t dim_kernel_x,
|
||||
const uint16_t dim_kernel_y,
|
||||
const uint16_t padding_x,
|
||||
const uint16_t padding_y,
|
||||
const uint16_t stride_x,
|
||||
const uint16_t stride_y,
|
||||
const q15_t * bias,
|
||||
const uint16_t bias_shift,
|
||||
const uint16_t out_shift,
|
||||
q15_t * Im_out,
|
||||
const uint16_t dim_im_out_x,
|
||||
const uint16_t dim_im_out_y,
|
||||
q15_t * bufferA,
|
||||
q7_t * bufferB);
|
||||
|
||||
void arm_depthwise_separable_conv_HWC_q7_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimention
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel, // filter kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
);
|
||||
void arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in_x, // input image dimention x
|
||||
const uint16_t dim_im_in_y, // input image dimention y
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const q7_t * wt, // kernel weights
|
||||
const uint16_t ch_im_out, // number of filters, i.e., output image channels
|
||||
const uint16_t dim_kernel_x, // filter kernel size x
|
||||
const uint16_t dim_kernel_y, // filter kernel size y
|
||||
const uint16_t padding_x, // padding sizes x
|
||||
const uint16_t padding_y, // padding sizes y
|
||||
const uint16_t stride_x, // stride x
|
||||
const uint16_t stride_y, // stride y
|
||||
const q7_t * bias, // bias
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
q7_t * Im_out, // output image
|
||||
const uint16_t dim_im_out_x, // output image dimension x
|
||||
const uint16_t dim_im_out_y, // output image dimension y
|
||||
q15_t * bufferA, //buffer space for input
|
||||
q7_t * bufferB //buffer space for output
|
||||
);
|
||||
|
||||
/*
|
||||
*
|
||||
* Fully-connected reference implemenation
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_fully_connected_q7_ref(const q7_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q7_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
void arm_fully_connected_q15_ref(const q15_t * pV, // pointer to vector
|
||||
const q15_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q15_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
void arm_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
void arm_fully_connected_q7_opt_ref(const q7_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q7_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
void arm_fully_connected_q15_opt_ref(const q15_t * pV, // pointer to vector
|
||||
const q15_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q15_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
void arm_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV, // pointer to vector
|
||||
const q7_t * pM, // pointer to matrix
|
||||
const uint16_t dim_vec, // length of the vector
|
||||
const uint16_t num_of_rows, // numCol of A
|
||||
const uint16_t bias_shift, // amount of left-shift for bias
|
||||
const uint16_t out_shift, // amount of right-shift for output
|
||||
const q7_t * bias, q15_t * pOut, // output operand
|
||||
q15_t * vec_buffer);
|
||||
|
||||
/*
|
||||
*
|
||||
* Pooling reference implemenation
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_avepool_q7_HWC_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimension
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel, // window kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q7_t * bufferA, // a buffer for local storage
|
||||
q7_t * Im_out);
|
||||
|
||||
void arm_maxpool_q7_HWC_ref(const q7_t * Im_in, // input image
|
||||
const uint16_t dim_im_in, // input image dimension
|
||||
const uint16_t ch_im_in, // number of input image channels
|
||||
const uint16_t dim_kernel, // window kernel size
|
||||
const uint16_t padding, // padding sizes
|
||||
const uint16_t stride, // stride
|
||||
const uint16_t dim_im_out, // output image dimension
|
||||
q7_t * bufferA, // a buffer for local storage
|
||||
q7_t * Im_out);
|
||||
|
||||
/*
|
||||
*
|
||||
* Other reference implemenation
|
||||
*
|
||||
*/
|
||||
|
||||
void arm_relu_q7_ref(q7_t * data, uint16_t size);
|
||||
|
||||
void arm_relu_q15_ref(q15_t * data, uint16_t size);
|
||||
|
||||
void arm_nn_mult_q7_ref(q7_t * pSrcA, q7_t * pSrcB, q7_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
void arm_nn_mult_q15_ref(q15_t * pSrcA, q15_t * pSrcB, q15_t * pDst, const uint16_t out_shift, uint32_t blockSize);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
801
Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp
Normal file
801
Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/arm_nnexamples_nn_test.cpp
Normal file
@ -0,0 +1,801 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
* Copyright (C) 2010-2018 Arm Limited. All rights reserved.
|
||||
*
|
||||
*
|
||||
* Project: CMSIS NN Library
|
||||
* Title: arm_nnexamples_nn_test.cpp
|
||||
*
|
||||
* Description: Example code for NN kernel testing.
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this
|
||||
* software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* -------------------------------------------------------------------- */
|
||||
|
||||
#include "arm_nnexamples_nn_test.h"
|
||||
|
||||
//#define TEST_SIGMOID
|
||||
//#define TEST_TANH
|
||||
#define TEST_POOL
|
||||
#define TEST_RELU
|
||||
#define TEST_IP
|
||||
#define TEST_CONV
|
||||
#define TEST_NONSQUARE
|
||||
#define TEST_NNMULT
|
||||
|
||||
int test_index = 0;
|
||||
q7_t test_flags[50];
|
||||
bool test_pass;
|
||||
|
||||
int main()
|
||||
{
|
||||
printf("start tests\n");
|
||||
|
||||
srand(1);
|
||||
|
||||
// common pointers for testing data
|
||||
q7_t *test1;
|
||||
q15_t *test2;
|
||||
q7_t *test3;
|
||||
q15_t *test4;
|
||||
|
||||
for (test_index = 0; test_index<50; test_index++) {
|
||||
test_flags[test_index] = -1;
|
||||
}
|
||||
test_index = 0;
|
||||
|
||||
#ifdef TEST_NNMULT
|
||||
#define NNMULT_DIM 128
|
||||
test1 = new q7_t[NNMULT_DIM*2];
|
||||
test2 = new q15_t[NNMULT_DIM*2];
|
||||
test3 = new q7_t[NNMULT_DIM*2];
|
||||
test4 = new q15_t[NNMULT_DIM*2];
|
||||
|
||||
q7_t * mult_out_q7 = test3;
|
||||
q7_t * mult_ref_q7 = test3 + NNMULT_DIM;
|
||||
q15_t * mult_out_q15 = test4;
|
||||
q15_t * mult_ref_q15 = test4 + NNMULT_DIM;
|
||||
|
||||
for (int i=0;i<NNMULT_DIM*2;i++) {
|
||||
test1[i] = (rand() % 256 - 128);
|
||||
test2[i] = (rand() % 65536 - 32768);
|
||||
}
|
||||
|
||||
// Test q7
|
||||
arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 5, NNMULT_DIM);
|
||||
|
||||
arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 5, NNMULT_DIM);
|
||||
|
||||
verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
|
||||
|
||||
arm_nn_mult_q7(test1, test1+NNMULT_DIM, mult_out_q7, 9, NNMULT_DIM);
|
||||
|
||||
arm_nn_mult_q7_ref(test1, test1+NNMULT_DIM, mult_ref_q7, 9, NNMULT_DIM);
|
||||
|
||||
verify_results_q7(mult_out_q7, mult_ref_q7, NNMULT_DIM);
|
||||
|
||||
// Test q15
|
||||
arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 13, NNMULT_DIM);
|
||||
|
||||
arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 13, NNMULT_DIM);
|
||||
|
||||
verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
|
||||
|
||||
arm_nn_mult_q15(test2, test2+NNMULT_DIM, mult_out_q15, 18, NNMULT_DIM);
|
||||
|
||||
arm_nn_mult_q15_ref(test2, test2+NNMULT_DIM, mult_ref_q15, 18, NNMULT_DIM);
|
||||
|
||||
verify_results_q15(mult_out_q15, mult_ref_q15, NNMULT_DIM);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef TEST_SIGMOID
|
||||
|
||||
#define SIGMOID_DIM 128
|
||||
|
||||
/* This part tests the running of sigmoid functions */
|
||||
|
||||
test1 = new q7_t[SIGMOID_DIM];
|
||||
test2 = new q15_t[SIGMOID_DIM];
|
||||
test3 = new q7_t[SIGMOID_DIM];
|
||||
test4 = new q15_t[SIGMOID_DIM];
|
||||
|
||||
srand(1);
|
||||
|
||||
for (int i = 0; i < SIGMOID_DIM; i++)
|
||||
{
|
||||
test1[i] = (rand() % 256 - 128);
|
||||
test2[i] = (rand() % 65536 - 32768);
|
||||
test3[i] = test1[i];
|
||||
test4[i] = test2[i];
|
||||
}
|
||||
|
||||
arm_nn_activations_direct_q7(test3, SIGMOID_DIM, 3, ARM_SIGMOID);
|
||||
|
||||
for (int i = 0; i < SIGMOID_DIM; i++)
|
||||
{
|
||||
printf("in: %d out: %d\n", test1[i], test3[i]);
|
||||
}
|
||||
|
||||
printf("start testing q15_t sigmoid\n\n");
|
||||
|
||||
arm_nn_activations_direct_q15(test4, SIGMOID_DIM, 3, ARM_SIGMOID);
|
||||
|
||||
for (int i = 0; i < SIGMOID_DIM; i++)
|
||||
{
|
||||
printf("in: %d out: %d\n", test2[i], test4[i]);
|
||||
}
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
delete[]test4;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef TEST_TANH
|
||||
|
||||
#define TANH_DIM 128
|
||||
|
||||
/* This part tests the running of sigmoid functions */
|
||||
|
||||
test1 = new q7_t[TANH_DIM];
|
||||
test2 = new q15_t[TANH_DIM];
|
||||
test3 = new q7_t[TANH_DIM];
|
||||
test4 = new q15_t[TANH_DIM];
|
||||
|
||||
srand(1);
|
||||
|
||||
for (int i = 0; i < TANH_DIM; i++)
|
||||
{
|
||||
test1[i] = (rand() % 256 - 128);
|
||||
test2[i] = (rand() % 65536 - 32768);
|
||||
test3[i] = test1[i];
|
||||
test4[i] = test2[i];
|
||||
}
|
||||
|
||||
arm_nn_activations_direct_q7(test3, TANH_DIM, 3, ARM_TANH);
|
||||
|
||||
printf("start testing q7_t tanh\n\n");
|
||||
|
||||
for (int i = 0; i < TANH_DIM; i++)
|
||||
{
|
||||
printf("in: %d out: %d\n", test1[i], test3[i]);
|
||||
}
|
||||
|
||||
printf("start testing q15_t tanh\n\n");
|
||||
|
||||
arm_nn_activations_direct_q15(test4, TANH_DIM, 3, ARM_TANH);
|
||||
|
||||
for (int i = 0; i < TANH_DIM; i++)
|
||||
{
|
||||
printf("in: %d out: %d\n", test2[i], test4[i]);
|
||||
}
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
delete[]test4;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef TEST_POOL
|
||||
|
||||
#define POOL_IM_DIM 32
|
||||
#define POOL_IM_CH 8
|
||||
|
||||
test1 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH * 2];
|
||||
test2 = new q15_t[POOL_IM_DIM * POOL_IM_CH];
|
||||
test3 = new q7_t[POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH];
|
||||
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
test1[i] = (rand() % 256 - 128);
|
||||
}
|
||||
|
||||
q7_t *img_in = test1 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH;
|
||||
q7_t *pool_out_ref = test3;
|
||||
q7_t *pool_out_opt = test3 + POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH / 2;
|
||||
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
test3[i] = 0;
|
||||
}
|
||||
|
||||
// copy over the img input
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
img_in[i] = test1[i];
|
||||
}
|
||||
|
||||
initialize_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
|
||||
|
||||
printf("Start maxpool reference implementation\n");
|
||||
|
||||
arm_maxpool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
|
||||
|
||||
// copy over the img input
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
img_in[i] = test1[i];
|
||||
}
|
||||
|
||||
printf("Start maxpool opt implementation\n");
|
||||
|
||||
arm_maxpool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
|
||||
|
||||
verify_results_q7(pool_out_ref, pool_out_opt, POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH);
|
||||
|
||||
// copy over the img input
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
img_in[i] = test1[i];
|
||||
}
|
||||
|
||||
// copy over the img input
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
img_in[i] = test1[i];
|
||||
}
|
||||
|
||||
printf("Start avepool ref implementation\n");
|
||||
|
||||
arm_avepool_q7_HWC_ref(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_ref);
|
||||
|
||||
// copy over the img input
|
||||
for (int i = 0; i < POOL_IM_DIM * POOL_IM_DIM * POOL_IM_CH; i++)
|
||||
{
|
||||
img_in[i] = test1[i];
|
||||
}
|
||||
|
||||
printf("Start avepool opt implementation\n");
|
||||
|
||||
arm_avepool_q7_HWC(img_in, POOL_IM_DIM, POOL_IM_CH, 3, 0, 2, POOL_IM_DIM / 2, (q7_t *) test2, pool_out_opt);
|
||||
|
||||
// special check here
|
||||
bool if_ave_pool_match = true;
|
||||
for (int i = 0; i < POOL_IM_DIM / 2 * POOL_IM_DIM / 2 * POOL_IM_CH; i++)
|
||||
{
|
||||
// we tolerate at most difference of 1 here because of rounding errors
|
||||
if (pool_out_ref[i] - pool_out_opt[i] >= 2 || pool_out_opt[i] - pool_out_ref[i] >= 2)
|
||||
{
|
||||
printf("Output mismatch at %d, expected %d, actual %d\n", i, pool_out_ref[i], pool_out_opt[i]);
|
||||
if_ave_pool_match = false;
|
||||
}
|
||||
}
|
||||
if (if_ave_pool_match == true)
|
||||
{
|
||||
printf("Outputs match.\n");
|
||||
}
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef TEST_RELU
|
||||
|
||||
#define RELU_DIM 127
|
||||
|
||||
test1 = new q7_t[RELU_DIM];
|
||||
test2 = new q15_t[RELU_DIM];
|
||||
test3 = new q7_t[RELU_DIM];
|
||||
test4 = new q15_t[RELU_DIM];
|
||||
|
||||
for (int i = 0; i < RELU_DIM; i++)
|
||||
{
|
||||
test1[i] = (rand() % 256 - 128);
|
||||
test2[i] = (rand() % 65536 - 32768);
|
||||
test3[i] = test1[i];
|
||||
test4[i] = test2[i];
|
||||
}
|
||||
|
||||
q7_t *relu_ref_data_q7 = test1;
|
||||
q7_t *relu_opt_data_q7 = test3;
|
||||
q15_t *relu_ref_data_q15 = test2;
|
||||
q15_t *relu_opt_data_q15 = test4;
|
||||
|
||||
printf("Start ref relu q7 implementation\n");
|
||||
|
||||
arm_relu_q7_ref(relu_ref_data_q7, RELU_DIM);
|
||||
|
||||
printf("Start opt relu q7 implementation\n");
|
||||
|
||||
arm_relu_q7(relu_opt_data_q7, RELU_DIM);
|
||||
|
||||
verify_results_q7(relu_ref_data_q7, relu_opt_data_q7, RELU_DIM);
|
||||
|
||||
printf("Start ref relu q15 implementation\n");
|
||||
|
||||
arm_relu_q15_ref(relu_ref_data_q15, RELU_DIM);
|
||||
|
||||
printf("Start opt relu q15 implementation\n");
|
||||
|
||||
arm_relu_q15(relu_opt_data_q15, RELU_DIM);
|
||||
|
||||
verify_results_q15(relu_ref_data_q15, relu_opt_data_q15, RELU_DIM);
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
delete[]test4;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef TEST_IP
|
||||
|
||||
#define IP_ROW_DIM 127
|
||||
#define IP_COL_DIM 127
|
||||
|
||||
q7_t ip_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
|
||||
q7_t ip_q7_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT;
|
||||
q7_t ip_q7_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_q7_q15_WEIGHT;
|
||||
q15_t ip_q15_weights[IP_ROW_DIM * IP_COL_DIM] = IP2_WEIGHT;
|
||||
q15_t ip_q15_opt_weights[IP_ROW_DIM * IP_COL_DIM] = IP4_WEIGHT_Q15;
|
||||
|
||||
test1 = new q7_t[IP_COL_DIM + IP_ROW_DIM];
|
||||
test2 = new q15_t[IP_COL_DIM];
|
||||
test3 = new q7_t[IP_ROW_DIM * 3];
|
||||
test4 = new q15_t[IP_COL_DIM + IP_ROW_DIM * 2];
|
||||
|
||||
for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
|
||||
{
|
||||
test1[i] = rand() % 256 - 100;
|
||||
}
|
||||
for (int i = 0; i < IP_ROW_DIM * 3; i++)
|
||||
{
|
||||
test3[i] = 0;
|
||||
}
|
||||
|
||||
q7_t *ip_bias_q7 = test1 + IP_COL_DIM;
|
||||
|
||||
q7_t *ip_out_q7_ref = test3;
|
||||
q7_t *ip_out_q7_opt = test3 + IP_ROW_DIM;
|
||||
q7_t *ip_out_q7_opt_fast = test3 + 2 * IP_ROW_DIM;
|
||||
q15_t *ip_out_q15_ref = test4 + IP_COL_DIM;
|
||||
q15_t *ip_out_q15_opt = test4 + IP_COL_DIM + IP_ROW_DIM;
|
||||
|
||||
initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
|
||||
initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
|
||||
initialize_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
|
||||
|
||||
printf("Start ref q7 implementation\n");
|
||||
|
||||
arm_fully_connected_q7_ref(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_ref, test2);
|
||||
|
||||
printf("Start q7 implementation\n");
|
||||
|
||||
arm_fully_connected_q7(test1, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt, test2);
|
||||
|
||||
verify_results_q7(ip_out_q7_ref, ip_out_q7_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start q7 ref opt implementation\n");
|
||||
|
||||
arm_fully_connected_q7_opt_ref(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
|
||||
ip_out_q7_opt_fast, test2);
|
||||
|
||||
verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
|
||||
|
||||
printf("Start q7 opt implementation\n");
|
||||
|
||||
arm_fully_connected_q7_opt(test1, ip_q7_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q7_opt_fast,
|
||||
test2);
|
||||
|
||||
verify_results_q7(ip_out_q7_ref, ip_out_q7_opt_fast, IP_ROW_DIM);
|
||||
|
||||
for (int i = 0; i < IP_ROW_DIM + IP_COL_DIM; i++)
|
||||
{
|
||||
test4[i] = (rand() % 65536 - 32768);
|
||||
}
|
||||
|
||||
initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start ref q15 implementation\n");
|
||||
|
||||
arm_fully_connected_q15_ref(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_ref, NULL);
|
||||
|
||||
printf("Start q15 implementation\n");
|
||||
|
||||
arm_fully_connected_q15(test4, ip_q15_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
|
||||
|
||||
verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start ref opt q15 implementation\n");
|
||||
|
||||
arm_fully_connected_q15_opt_ref(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt,
|
||||
NULL);
|
||||
|
||||
verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start opt q15 implementation\n");
|
||||
|
||||
arm_fully_connected_q15_opt(test4, ip_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, test2, ip_out_q15_opt, NULL);
|
||||
|
||||
verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
initialize_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start ref q7_q15 implementation\n");
|
||||
|
||||
arm_fully_connected_mat_q7_vec_q15_ref(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_ref,
|
||||
test2);
|
||||
|
||||
printf("Start q7_q15 implementation\n");
|
||||
|
||||
arm_fully_connected_mat_q7_vec_q15(test4, ip_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7, ip_out_q15_opt,
|
||||
test2);
|
||||
|
||||
verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start ref opt q7_q15 implementation\n");
|
||||
|
||||
arm_fully_connected_mat_q7_vec_q15_opt_ref(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
|
||||
ip_out_q15_opt, test2);
|
||||
|
||||
verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
printf("Start opt q7_q15 implementation\n");
|
||||
|
||||
arm_fully_connected_mat_q7_vec_q15_opt(test4, ip_q7_q15_opt_weights, IP_COL_DIM, IP_ROW_DIM, 1, 7, ip_bias_q7,
|
||||
ip_out_q15_opt, test2);
|
||||
|
||||
verify_results_q15(ip_out_q15_ref, ip_out_q15_opt, IP_ROW_DIM);
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
delete[]test4;
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef TEST_NONSQUARE
|
||||
|
||||
/* Use RCONV to differential with square CONV */
|
||||
|
||||
#define RCONV_IM_DIM_X 10
|
||||
#define RCONV_IM_DIM_Y 8
|
||||
#define RCONV_IM_CH 4
|
||||
#define RCONV_KER_DIM_X 5
|
||||
#define RCONV_KER_DIM_Y 3
|
||||
#define RCONV_STRIDE_X 1
|
||||
#define RCONV_STRIDE_Y 1
|
||||
#define RCONV_PADDING_X 2
|
||||
#define RCONV_PADDING_Y 1
|
||||
#define RCONV_OUT_CH 4
|
||||
#define RCONV_OUT_DIM_X 10
|
||||
#define RCONV_OUT_DIM_Y 8
|
||||
|
||||
test1 = new q7_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH];
|
||||
test2 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH];
|
||||
test3 =
|
||||
new q7_t[RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH];
|
||||
|
||||
for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
|
||||
{
|
||||
test1[i] = rand() % 256 - 100;
|
||||
}
|
||||
|
||||
for (int i = 0;
|
||||
i < RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH; i++)
|
||||
{
|
||||
test3[i] = rand() % 256 - 100;
|
||||
}
|
||||
|
||||
q7_t *rconv_weight_q7 = test1;
|
||||
q7_t *rconv_bias_q7 = test1 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
|
||||
|
||||
q15_t *rconv_buf = test2;
|
||||
|
||||
q7_t *rconv_im_in_q7 = test3;
|
||||
q7_t *rconv_im_out_ref_q7 = test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
|
||||
q7_t *rconv_im_out_opt_q7 =
|
||||
test3 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
|
||||
|
||||
initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
printf("start conv q7 nonsquare ref implementation\n");
|
||||
arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
|
||||
RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
|
||||
RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
|
||||
RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
printf("start conv q7 nonsquare opt implementation\n");
|
||||
arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
|
||||
RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
|
||||
RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
|
||||
RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
printf("start conv q7 nonsquare ref implementation\n");
|
||||
arm_convolve_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
|
||||
RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
|
||||
RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7,
|
||||
RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
printf("start conv q7 nonsquare basic implementation\n");
|
||||
arm_convolve_HWC_q7_basic_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
|
||||
RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
|
||||
RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7,
|
||||
RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
initialize_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
printf("start 1x1 conv q7 nonsquare fast implementation\n");
|
||||
arm_convolve_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
|
||||
RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
|
||||
RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
|
||||
RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
printf("start 1x1 conv q7 nonsquare dedicated function implementation\n");
|
||||
arm_convolve_1x1_HWC_q7_fast_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q7,
|
||||
RCONV_OUT_CH, 1, 1, 0, 0, RCONV_STRIDE_X,
|
||||
RCONV_STRIDE_Y, rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
|
||||
RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
printf("start depthwise separable conv q7 nonsquare ref implementation\n");
|
||||
arm_depthwise_separable_conv_HWC_q7_ref_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
|
||||
rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
|
||||
RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
|
||||
rconv_bias_q7, 1, 7, rconv_im_out_ref_q7, RCONV_OUT_DIM_X,
|
||||
RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
printf("start depthwise separable conv q7 nonsquare opt implementation\n");
|
||||
arm_depthwise_separable_conv_HWC_q7_nonsquare(rconv_im_in_q7, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH,
|
||||
rconv_weight_q7, RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y,
|
||||
RCONV_PADDING_X, RCONV_PADDING_Y, RCONV_STRIDE_X, RCONV_STRIDE_Y,
|
||||
rconv_bias_q7, 1, 7, rconv_im_out_opt_q7, RCONV_OUT_DIM_X,
|
||||
RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
verify_results_q7(rconv_im_out_ref_q7, rconv_im_out_opt_q7, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
|
||||
test2 = new q15_t[RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH]; // weights + bias
|
||||
test4 = new q15_t[2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH //buffer
|
||||
+ RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH]; // i/o
|
||||
|
||||
for (int i = 0; i < RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH + RCONV_OUT_CH; i++)
|
||||
{
|
||||
test2[i] = rand() % 256 - 100;
|
||||
}
|
||||
|
||||
for (int i = 0;
|
||||
i < 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH
|
||||
+ RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH + 2 * RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
|
||||
i++)
|
||||
{
|
||||
test4[i] = rand() % 256 - 100;
|
||||
}
|
||||
|
||||
q15_t *rconv_weight_q15 = test2;
|
||||
q15_t *rconv_bias_q15 = test2 + RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH * RCONV_OUT_CH;
|
||||
|
||||
rconv_buf = test4;
|
||||
|
||||
q15_t *rconv_im_in_q15 = test4 + 2 * RCONV_KER_DIM_Y * RCONV_KER_DIM_X * RCONV_IM_CH;
|
||||
q15_t *rconv_im_out_ref_q15 = rconv_im_in_q15 + RCONV_IM_DIM_Y * RCONV_IM_DIM_X * RCONV_IM_CH;
|
||||
q15_t *rconv_im_out_opt_q15 = rconv_im_out_ref_q15 + RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH;
|
||||
|
||||
initialize_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
printf("start conv q15 nonsquare ref implementation\n");
|
||||
arm_convolve_HWC_q15_nonsquare_ref(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
|
||||
RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
|
||||
RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_ref_q15,
|
||||
RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
printf("start conv q5 nonsquare opt implementation\n");
|
||||
arm_convolve_HWC_q15_fast_nonsquare(rconv_im_in_q15, RCONV_IM_DIM_X, RCONV_IM_DIM_Y, RCONV_IM_CH, rconv_weight_q15,
|
||||
RCONV_OUT_CH, RCONV_KER_DIM_X, RCONV_KER_DIM_Y, RCONV_PADDING_X, RCONV_PADDING_Y,
|
||||
RCONV_STRIDE_X, RCONV_STRIDE_Y, rconv_bias_q15, 1, 7, rconv_im_out_opt_q15,
|
||||
RCONV_OUT_DIM_X, RCONV_OUT_DIM_Y, rconv_buf, NULL);
|
||||
|
||||
verify_results_q15(rconv_im_out_ref_q15, rconv_im_out_opt_q15, RCONV_OUT_DIM_Y * RCONV_OUT_DIM_X * RCONV_OUT_CH);
|
||||
|
||||
delete [] test2;
|
||||
delete [] test4;
|
||||
#endif
|
||||
|
||||
#ifdef TEST_CONV
|
||||
|
||||
#define CONV_IM_DIM 16
|
||||
#define CONV_IM_CH 16
|
||||
#define CONV_KER_DIM 5
|
||||
#define CONV_OUT_CH 16
|
||||
#define CONV_OUT_DIM 16
|
||||
|
||||
test1 = new q7_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
|
||||
test2 =
|
||||
new q15_t[CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
|
||||
2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH];
|
||||
test3 = new q7_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
|
||||
test4 = new q15_t[CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH];
|
||||
|
||||
for (int i = 0; i < CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
|
||||
{
|
||||
test1[i] = rand() % 256 - 100;
|
||||
}
|
||||
|
||||
for (int i = 0;
|
||||
i <
|
||||
CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
|
||||
2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH + CONV_OUT_CH; i++)
|
||||
{
|
||||
test2[i] = (rand() % 65536 - 32768);
|
||||
}
|
||||
|
||||
for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
|
||||
{
|
||||
test3[i] = rand() % 256 - 100;
|
||||
}
|
||||
|
||||
for (int i = 0; i < CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + 2 * CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH; i++)
|
||||
{
|
||||
test4[i] = (rand() % 65536 - 32768);
|
||||
}
|
||||
|
||||
q7_t *conv_weight_q7 = test1;
|
||||
q7_t *conv_bias_q7 = test1 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
|
||||
|
||||
q15_t *conv_weight_q15 = test2;
|
||||
q15_t *conv_buf = test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
|
||||
q15_t *conv_bias_q15 =
|
||||
test2 + CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH +
|
||||
2 * CONV_KER_DIM * CONV_KER_DIM * CONV_IM_CH * CONV_OUT_CH;
|
||||
|
||||
q7_t *conv_im_in_q7 = test3;
|
||||
q7_t *conv_im_out_ref_q7 = test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
|
||||
q7_t *conv_im_out_opt_q7 =
|
||||
test3 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
|
||||
|
||||
q15_t *conv_im_in_q15 = test4;
|
||||
q15_t *conv_im_out_ref_q15 = test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH;
|
||||
q15_t *conv_im_out_opt_q15 =
|
||||
test4 + CONV_IM_DIM * CONV_IM_DIM * CONV_IM_CH + CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH;
|
||||
|
||||
initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
printf("start q7 ref implementation\n");
|
||||
|
||||
arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
printf("start q7 basic implementation\n");
|
||||
|
||||
arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
printf("start q7 fast implementation\n");
|
||||
|
||||
arm_convolve_HWC_q7_fast(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
// testing with RGB
|
||||
printf("start q7 ref implementation for RGB\n");
|
||||
|
||||
arm_convolve_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
printf("start q7 basic implementation for RGB\n");
|
||||
|
||||
arm_convolve_HWC_q7_basic(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
printf("start q7 RGB implementation for RGB\n");
|
||||
|
||||
arm_convolve_HWC_q7_RGB(conv_im_in_q7, CONV_IM_DIM, 3, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
// testing q15
|
||||
initialize_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
printf("start q15 ref implementation\n");
|
||||
|
||||
arm_convolve_HWC_q15_ref(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_ref_q15,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
printf("start q15 basic implementation\n");
|
||||
|
||||
arm_convolve_HWC_q15_basic(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
printf("start q15 fast implementation\n");
|
||||
|
||||
arm_convolve_HWC_q15_fast(conv_im_in_q15, CONV_IM_DIM, CONV_IM_CH, conv_weight_q15,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q15, 0, 15, conv_im_out_opt_q15,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q15(conv_im_out_ref_q15, conv_im_out_opt_q15, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
// depthwise separable conv
|
||||
initialize_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
printf("start q7 depthwise_separable_conv ref implementation\n");
|
||||
|
||||
arm_depthwise_separable_conv_HWC_q7_ref(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_ref_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
printf("start q7 depthwise_separable_conv implementation\n");
|
||||
|
||||
arm_depthwise_separable_conv_HWC_q7(conv_im_in_q7, CONV_IM_DIM, CONV_IM_CH, conv_weight_q7,
|
||||
CONV_OUT_CH, CONV_KER_DIM, 2, 1, conv_bias_q7, 1, 7, conv_im_out_opt_q7,
|
||||
CONV_OUT_DIM, conv_buf, NULL);
|
||||
|
||||
verify_results_q7(conv_im_out_ref_q7, conv_im_out_opt_q7, CONV_OUT_DIM * CONV_OUT_DIM * CONV_OUT_CH);
|
||||
|
||||
delete[]test1;
|
||||
delete[]test2;
|
||||
delete[]test3;
|
||||
delete[]test4;
|
||||
|
||||
#endif
|
||||
|
||||
test_pass = true;
|
||||
test_index = 0;
|
||||
while (test_flags[test_index] != -1) {
|
||||
if (test_flags[test_index]) {
|
||||
test_pass = false;
|
||||
}
|
||||
test_index ++;
|
||||
}
|
||||
if (test_pass) {
|
||||
printf("All tests passed\n");
|
||||
} else {
|
||||
printf("Test failed passed\n");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,78 @@
|
||||
#ifndef _MAIN_H_
|
||||
#define _MAIN_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "arm_math.h"
|
||||
|
||||
#include "arm_nnfunctions.h"
|
||||
#include "ref_functions.h"
|
||||
|
||||
extern int test_index;
|
||||
extern q7_t test_flags[50];
|
||||
|
||||
void initialize_results_q7(q7_t * ref, q7_t * opt, int length)
|
||||
{
|
||||
arm_fill_q7(0, ref, length);
|
||||
arm_fill_q7(37, opt, length);
|
||||
}
|
||||
|
||||
void initialize_results_q15(q15_t * ref, q15_t * opt, int length)
|
||||
{
|
||||
arm_fill_q15(0, ref, length);
|
||||
arm_fill_q15(0x5F5, opt, length);
|
||||
}
|
||||
|
||||
void verify_results_q7(q7_t * ref, q7_t * opt, int length)
|
||||
{
|
||||
|
||||
bool if_match = true;
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
if (ref[i] != opt[i])
|
||||
{
|
||||
printf("Output mismatch at %d, expected %d, actual %d\r\n", i, ref[i], opt[i]);
|
||||
|
||||
if_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (if_match == true)
|
||||
{
|
||||
printf("Outputs match.\r\n\r\n");
|
||||
test_flags[test_index++] = 0;
|
||||
} else {
|
||||
test_flags[test_index++] = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void verify_results_q15(q15_t * ref, q15_t * opt, int length)
|
||||
{
|
||||
|
||||
bool if_match = true;
|
||||
|
||||
for (int i = 0; i < length; i++)
|
||||
{
|
||||
if (ref[i] != opt[i])
|
||||
{
|
||||
printf("Output mismatch at %d, expected %d, actual %d\r\n", i, ref[i], opt[i]);
|
||||
|
||||
if_match = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (if_match == true)
|
||||
{
|
||||
printf("Outputs match.\r\n\r\n");
|
||||
test_flags[test_index++] = 0;
|
||||
} else {
|
||||
test_flags[test_index++] = 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
4
Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/readme.txt
Normal file
4
Drivers/CMSIS/NN/NN_Lib_Tests/nn_test/readme.txt
Normal file
@ -0,0 +1,4 @@
|
||||
CMSIS DSP_Lib example arm_nnexample_nn_test for
|
||||
Cortex-M3, Cortex-M4 and Cortex-M7.
|
||||
|
||||
The example is configured for uVision Simulator.
|
Reference in New Issue
Block a user