Initial commit

This commit is contained in:
Julien Chevalley
2023-12-11 14:43:05 +01:00
commit 902141e8b6
1103 changed files with 810796 additions and 0 deletions

View File

@ -0,0 +1,230 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_bitreversal.c
* Description: Bitreversal functions
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftSize length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
* @param[in] *pBitRevTab points to the bit reversal table.
* @return none.
*/
void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab)
{
uint16_t fftLenBy2, fftLenBy2p1;
uint16_t i, j;
float32_t in;
/* Initializations */
j = 0U;
fftLenBy2 = fftSize >> 1U;
fftLenBy2p1 = (fftSize >> 1U) + 1U;
/* Bit Reversal Implementation */
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
{
if (i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2U * i];
pSrc[2U * i] = pSrc[2U * j];
pSrc[2U * j] = in;
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[(2U * i) + 1U];
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
pSrc[(2U * j) + 1U] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2U * (i + fftLenBy2p1)];
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
pSrc[2U * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
}
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[2U * (i + 1U)];
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
pSrc[2U * (j + fftLenBy2)] = in;
/* pSrc[i+2U] <-> pSrc[j+2U] */
in = pSrc[(2U * (i + 1U)) + 1U];
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of Q31 data type.
* @param[in] fftLen length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
* @param[in] *pBitRevTab points to bit reversal table.
* @return none.
*/
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTable)
{
uint32_t fftLenBy2, fftLenBy2p1, i, j;
q31_t in;
/* Initializations */
j = 0U;
fftLenBy2 = fftLen / 2U;
fftLenBy2p1 = (fftLen / 2U) + 1U;
/* Bit Reversal Implementation */
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
{
if (i < j)
{
/* pSrc[i] <-> pSrc[j]; */
in = pSrc[2U * i];
pSrc[2U * i] = pSrc[2U * j];
pSrc[2U * j] = in;
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[(2U * i) + 1U];
pSrc[(2U * i) + 1U] = pSrc[(2U * j) + 1U];
pSrc[(2U * j) + 1U] = in;
/* pSrc[i+fftLenBy2p1] <-> pSrc[j+fftLenBy2p1] */
in = pSrc[2U * (i + fftLenBy2p1)];
pSrc[2U * (i + fftLenBy2p1)] = pSrc[2U * (j + fftLenBy2p1)];
pSrc[2U * (j + fftLenBy2p1)] = in;
/* pSrc[i+fftLenBy2p1+1U] <-> pSrc[j+fftLenBy2p1+1U] */
in = pSrc[(2U * (i + fftLenBy2p1)) + 1U];
pSrc[(2U * (i + fftLenBy2p1)) + 1U] =
pSrc[(2U * (j + fftLenBy2p1)) + 1U];
pSrc[(2U * (j + fftLenBy2p1)) + 1U] = in;
}
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[2U * (i + 1U)];
pSrc[2U * (i + 1U)] = pSrc[2U * (j + fftLenBy2)];
pSrc[2U * (j + fftLenBy2)] = in;
/* pSrc[i+2U] <-> pSrc[j+2U] */
in = pSrc[(2U * (i + 1U)) + 1U];
pSrc[(2U * (i + 1U)) + 1U] = pSrc[(2U * (j + fftLenBy2)) + 1U];
pSrc[(2U * (j + fftLenBy2)) + 1U] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTable;
/* Updating the bit reversal index depending on the fft length */
pBitRevTable += bitRevFactor;
}
}
/*
* @brief In-place bit reversal function.
* @param[in, out] *pSrc points to the in-place buffer of Q15 data type.
* @param[in] fftLen length of the FFT.
* @param[in] bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
* @param[in] *pBitRevTab points to bit reversal table.
* @return none.
*/
void arm_bitreversal_q15(
q15_t * pSrc16,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab)
{
q31_t *pSrc = (q31_t *) pSrc16;
q31_t in;
uint32_t fftLenBy2, fftLenBy2p1;
uint32_t i, j;
/* Initializations */
j = 0U;
fftLenBy2 = fftLen / 2U;
fftLenBy2p1 = (fftLen / 2U) + 1U;
/* Bit Reversal Implementation */
for (i = 0U; i <= (fftLenBy2 - 2U); i += 2U)
{
if (i < j)
{
/* pSrc[i] <-> pSrc[j]; */
/* pSrc[i+1U] <-> pSrc[j+1U] */
in = pSrc[i];
pSrc[i] = pSrc[j];
pSrc[j] = in;
/* pSrc[i + fftLenBy2p1] <-> pSrc[j + fftLenBy2p1]; */
/* pSrc[i + fftLenBy2p1+1U] <-> pSrc[j + fftLenBy2p1+1U] */
in = pSrc[i + fftLenBy2p1];
pSrc[i + fftLenBy2p1] = pSrc[j + fftLenBy2p1];
pSrc[j + fftLenBy2p1] = in;
}
/* pSrc[i+1U] <-> pSrc[j+fftLenBy2]; */
/* pSrc[i+2] <-> pSrc[j+fftLenBy2+1U] */
in = pSrc[i + 1U];
pSrc[i + 1U] = pSrc[j + fftLenBy2];
pSrc[j + fftLenBy2] = in;
/* Reading the index for the bit reversal */
j = *pBitRevTab;
/* Updating the bit reversal index depending on the fft length */
pBitRevTab += bitRevFactor;
}
}

View File

@ -0,0 +1,216 @@
;/* ----------------------------------------------------------------------
; * Project: CMSIS DSP Library
; * Title: arm_bitreversal2.S
; * Description: arm_bitreversal_32 function done in assembly for maximum speed.
; * Called after doing an fft to reorder the output.
; * The function is loop unrolled by 2. arm_bitreversal_16 as well.
; *
; * $Date: 27. January 2017
; * $Revision: V.1.5.1
; *
; * Target Processor: Cortex-M cores
; * -------------------------------------------------------------------- */
;/*
; * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
; *
; * SPDX-License-Identifier: Apache-2.0
; *
; * Licensed under the Apache License, Version 2.0 (the License); you may
; * not use this file except in compliance with the License.
; * You may obtain a copy of the License at
; *
; * www.apache.org/licenses/LICENSE-2.0
; *
; * Unless required by applicable law or agreed to in writing, software
; * distributed under the License is distributed on an AS IS BASIS, WITHOUT
; * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
; * See the License for the specific language governing permissions and
; * limitations under the License.
; */
#if defined ( __CC_ARM ) /* Keil */
#define CODESECT AREA ||.text||, CODE, READONLY, ALIGN=2
#define LABEL
#elif defined ( __IASMARM__ ) /* IAR */
#define CODESECT SECTION `.text`:CODE
#define PROC
#define LABEL
#define ENDP
#define EXPORT PUBLIC
#elif defined ( __CSMC__ ) /* Cosmic */
#define CODESECT switch .text
#define THUMB
#define EXPORT xdef
#define PROC :
#define LABEL :
#define ENDP
#define arm_bitreversal_32 _arm_bitreversal_32
#elif defined ( __TI_ARM__ ) /* TI ARM */
#define THUMB .thumb
#define CODESECT .text
#define EXPORT .global
#define PROC : .asmfunc
#define LABEL :
#define ENDP .endasmfunc
#define END
#elif defined ( __GNUC__ ) /* GCC */
#define THUMB .thumb
#define CODESECT .section .text
#define EXPORT .global
#define PROC :
#define LABEL :
#define ENDP
#define END
.syntax unified
#endif
CODESECT
THUMB
;/*
;* @brief In-place bit reversal function.
;* @param[in, out] *pSrc points to the in-place buffer of unknown 32-bit data type.
;* @param[in] bitRevLen bit reversal table length
;* @param[in] *pBitRevTab points to bit reversal table.
;* @return none.
;*/
EXPORT arm_bitreversal_32
EXPORT arm_bitreversal_16
#if defined ( __CC_ARM ) /* Keil */
#elif defined ( __IASMARM__ ) /* IAR */
#elif defined ( __CSMC__ ) /* Cosmic */
#elif defined ( __TI_ARM__ ) /* TI ARM */
#elif defined ( __GNUC__ ) /* GCC */
.type arm_bitreversal_16, %function
.type arm_bitreversal_32, %function
#endif
#if defined(ARM_MATH_CM0) || defined(ARM_MATH_CM0PLUS) || defined(ARM_MATH_ARMV8MBL)
arm_bitreversal_32 PROC
ADDS r3,r1,#1
PUSH {r4-r6}
ADDS r1,r2,#0
LSRS r3,r3,#1
arm_bitreversal_32_0 LABEL
LDRH r2,[r1,#2]
LDRH r6,[r1,#0]
ADD r2,r0,r2
ADD r6,r0,r6
LDR r5,[r2,#0]
LDR r4,[r6,#0]
STR r5,[r6,#0]
STR r4,[r2,#0]
LDR r5,[r2,#4]
LDR r4,[r6,#4]
STR r5,[r6,#4]
STR r4,[r2,#4]
ADDS r1,r1,#4
SUBS r3,r3,#1
BNE arm_bitreversal_32_0
POP {r4-r6}
BX lr
ENDP
arm_bitreversal_16 PROC
ADDS r3,r1,#1
PUSH {r4-r6}
ADDS r1,r2,#0
LSRS r3,r3,#1
arm_bitreversal_16_0 LABEL
LDRH r2,[r1,#2]
LDRH r6,[r1,#0]
LSRS r2,r2,#1
LSRS r6,r6,#1
ADD r2,r0,r2
ADD r6,r0,r6
LDR r5,[r2,#0]
LDR r4,[r6,#0]
STR r5,[r6,#0]
STR r4,[r2,#0]
ADDS r1,r1,#4
SUBS r3,r3,#1
BNE arm_bitreversal_16_0
POP {r4-r6}
BX lr
ENDP
#else
arm_bitreversal_32 PROC
ADDS r3,r1,#1
CMP r3,#1
IT LS
BXLS lr
PUSH {r4-r9}
ADDS r1,r2,#2
LSRS r3,r3,#2
arm_bitreversal_32_0 LABEL ;/* loop unrolled by 2 */
LDRH r8,[r1,#4]
LDRH r9,[r1,#2]
LDRH r2,[r1,#0]
LDRH r12,[r1,#-2]
ADD r8,r0,r8
ADD r9,r0,r9
ADD r2,r0,r2
ADD r12,r0,r12
LDR r7,[r9,#0]
LDR r6,[r8,#0]
LDR r5,[r2,#0]
LDR r4,[r12,#0]
STR r6,[r9,#0]
STR r7,[r8,#0]
STR r5,[r12,#0]
STR r4,[r2,#0]
LDR r7,[r9,#4]
LDR r6,[r8,#4]
LDR r5,[r2,#4]
LDR r4,[r12,#4]
STR r6,[r9,#4]
STR r7,[r8,#4]
STR r5,[r12,#4]
STR r4,[r2,#4]
ADDS r1,r1,#8
SUBS r3,r3,#1
BNE arm_bitreversal_32_0
POP {r4-r9}
BX lr
ENDP
arm_bitreversal_16 PROC
ADDS r3,r1,#1
CMP r3,#1
IT LS
BXLS lr
PUSH {r4-r9}
ADDS r1,r2,#2
LSRS r3,r3,#2
arm_bitreversal_16_0 LABEL ;/* loop unrolled by 2 */
LDRH r8,[r1,#4]
LDRH r9,[r1,#2]
LDRH r2,[r1,#0]
LDRH r12,[r1,#-2]
ADD r8,r0,r8,LSR #1
ADD r9,r0,r9,LSR #1
ADD r2,r0,r2,LSR #1
ADD r12,r0,r12,LSR #1
LDR r7,[r9,#0]
LDR r6,[r8,#0]
LDR r5,[r2,#0]
LDR r4,[r12,#0]
STR r6,[r9,#0]
STR r7,[r8,#0]
STR r5,[r12,#0]
STR r4,[r2,#0]
ADDS r1,r1,#8
SUBS r3,r3,#1
BNE arm_bitreversal_16_0
POP {r4-r9}
BX lr
ENDP
#endif
END

View File

@ -0,0 +1,620 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_f32.c
* Description: Combined Radix Decimation in Frequency CFFT Floating point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
extern void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_bitreversal_32(
uint32_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
/**
* @ingroup groupTransforms
*/
/**
* @defgroup ComplexFFT Complex FFT Functions
*
* \par
* The Fast Fourier Transform (FFT) is an efficient algorithm for computing the
* Discrete Fourier Transform (DFT). The FFT can be orders of magnitude faster
* than the DFT, especially for long lengths.
* The algorithms described in this section
* operate on complex data. A separate set of functions is devoted to handling
* of real sequences.
* \par
* There are separate algorithms for handling floating-point, Q15, and Q31 data
* types. The algorithms available for each data type are described next.
* \par
* The FFT functions operate in-place. That is, the array holding the input data
* will also be used to hold the corresponding result. The input data is complex
* and contains <code>2*fftLen</code> interleaved values as shown below.
* <pre> {real[0], imag[0], real[1], imag[1],..} </pre>
* The FFT result will be contained in the same array and the frequency domain
* values will have the same interleaving.
*
* \par Floating-point
* The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-8
* stages are performed along with a single radix-2 or radix-4 stage, as needed.
* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
* a different twiddle factor table.
* \par
* The function uses the standard FFT definition and output values may grow by a
* factor of <code>fftLen</code> when computing the forward transform. The
* inverse transform includes a scale of <code>1/fftLen</code> as part of the
* calculation and this matches the textbook definition of the inverse FFT.
* \par
* Pre-initialized data structures containing twiddle factors and bit reversal
* tables are provided and defined in <code>arm_const_structs.h</code>. Include
* this header in your function and then pass one of the constant structures as
* an argument to arm_cfft_f32. For example:
* \par
* <code>arm_cfft_f32(arm_cfft_sR_f32_len64, pSrc, 1, 1)</code>
* \par
* computes a 64-point inverse complex FFT including bit reversal.
* The data structures are treated as constant data and not modified during the
* calculation. The same data structure can be reused for multiple transforms
* including mixing forward and inverse transforms.
* \par
* Earlier releases of the library provided separate radix-2 and radix-4
* algorithms that operated on floating-point data. These functions are still
* provided but are deprecated. The older functions are slower and less general
* than the new functions.
* \par
* An example of initialization of the constants for the arm_cfft_f32 function follows:
* \code
* const static arm_cfft_instance_f32 *S;
* ...
* switch (length) {
* case 16:
* S = &arm_cfft_sR_f32_len16;
* break;
* case 32:
* S = &arm_cfft_sR_f32_len32;
* break;
* case 64:
* S = &arm_cfft_sR_f32_len64;
* break;
* case 128:
* S = &arm_cfft_sR_f32_len128;
* break;
* case 256:
* S = &arm_cfft_sR_f32_len256;
* break;
* case 512:
* S = &arm_cfft_sR_f32_len512;
* break;
* case 1024:
* S = &arm_cfft_sR_f32_len1024;
* break;
* case 2048:
* S = &arm_cfft_sR_f32_len2048;
* break;
* case 4096:
* S = &arm_cfft_sR_f32_len4096;
* break;
* }
* \endcode
* \par Q15 and Q31
* The floating-point complex FFT uses a mixed-radix algorithm. Multiple radix-4
* stages are performed along with a single radix-2 stage, as needed.
* The algorithm supports lengths of [16, 32, 64, ..., 4096] and each length uses
* a different twiddle factor table.
* \par
* The function uses the standard FFT definition and output values may grow by a
* factor of <code>fftLen</code> when computing the forward transform. The
* inverse transform includes a scale of <code>1/fftLen</code> as part of the
* calculation and this matches the textbook definition of the inverse FFT.
* \par
* Pre-initialized data structures containing twiddle factors and bit reversal
* tables are provided and defined in <code>arm_const_structs.h</code>. Include
* this header in your function and then pass one of the constant structures as
* an argument to arm_cfft_q31. For example:
* \par
* <code>arm_cfft_q31(arm_cfft_sR_q31_len64, pSrc, 1, 1)</code>
* \par
* computes a 64-point inverse complex FFT including bit reversal.
* The data structures are treated as constant data and not modified during the
* calculation. The same data structure can be reused for multiple transforms
* including mixing forward and inverse transforms.
* \par
* Earlier releases of the library provided separate radix-2 and radix-4
* algorithms that operated on floating-point data. These functions are still
* provided but are deprecated. The older functions are slower and less general
* than the new functions.
* \par
* An example of initialization of the constants for the arm_cfft_q31 function follows:
* \code
* const static arm_cfft_instance_q31 *S;
* ...
* switch (length) {
* case 16:
* S = &arm_cfft_sR_q31_len16;
* break;
* case 32:
* S = &arm_cfft_sR_q31_len32;
* break;
* case 64:
* S = &arm_cfft_sR_q31_len64;
* break;
* case 128:
* S = &arm_cfft_sR_q31_len128;
* break;
* case 256:
* S = &arm_cfft_sR_q31_len256;
* break;
* case 512:
* S = &arm_cfft_sR_q31_len512;
* break;
* case 1024:
* S = &arm_cfft_sR_q31_len1024;
* break;
* case 2048:
* S = &arm_cfft_sR_q31_len2048;
* break;
* case 4096:
* S = &arm_cfft_sR_q31_len4096;
* break;
* }
* \endcode
*
*/
void arm_cfft_radix8by2_f32( arm_cfft_instance_f32 * S, float32_t * p1)
{
uint32_t L = S->fftLen;
float32_t * pCol1, * pCol2, * pMid1, * pMid2;
float32_t * p2 = p1 + L;
const float32_t * tw = (float32_t *) S->pTwiddle;
float32_t t1[4], t2[4], t3[4], t4[4], twR, twI;
float32_t m0, m1, m2, m3;
uint32_t l;
pCol1 = p1;
pCol2 = p2;
// Define new length
L >>= 1;
// Initialize mid pointers
pMid1 = p1 + L;
pMid2 = p2 + L;
// do two dot Fourier transform
for ( l = L >> 2; l > 0; l-- )
{
t1[0] = p1[0];
t1[1] = p1[1];
t1[2] = p1[2];
t1[3] = p1[3];
t2[0] = p2[0];
t2[1] = p2[1];
t2[2] = p2[2];
t2[3] = p2[3];
t3[0] = pMid1[0];
t3[1] = pMid1[1];
t3[2] = pMid1[2];
t3[3] = pMid1[3];
t4[0] = pMid2[0];
t4[1] = pMid2[1];
t4[2] = pMid2[2];
t4[3] = pMid2[3];
*p1++ = t1[0] + t2[0];
*p1++ = t1[1] + t2[1];
*p1++ = t1[2] + t2[2];
*p1++ = t1[3] + t2[3]; // col 1
t2[0] = t1[0] - t2[0];
t2[1] = t1[1] - t2[1];
t2[2] = t1[2] - t2[2];
t2[3] = t1[3] - t2[3]; // for col 2
*pMid1++ = t3[0] + t4[0];
*pMid1++ = t3[1] + t4[1];
*pMid1++ = t3[2] + t4[2];
*pMid1++ = t3[3] + t4[3]; // col 1
t4[0] = t4[0] - t3[0];
t4[1] = t4[1] - t3[1];
t4[2] = t4[2] - t3[2];
t4[3] = t4[3] - t3[3]; // for col 2
twR = *tw++;
twI = *tw++;
// multiply by twiddle factors
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
// R = R * Tr - I * Ti
*p2++ = m0 + m1;
// I = I * Tr + R * Ti
*p2++ = m2 - m3;
// use vertical symmetry
// 0.9988 - 0.0491i <==> -0.0491 - 0.9988i
m0 = t4[0] * twI;
m1 = t4[1] * twR;
m2 = t4[1] * twI;
m3 = t4[0] * twR;
*pMid2++ = m0 - m1;
*pMid2++ = m2 + m3;
twR = *tw++;
twI = *tw++;
m0 = t2[2] * twR;
m1 = t2[3] * twI;
m2 = t2[3] * twR;
m3 = t2[2] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
m0 = t4[2] * twI;
m1 = t4[3] * twR;
m2 = t4[3] * twI;
m3 = t4[2] * twR;
*pMid2++ = m0 - m1;
*pMid2++ = m2 + m3;
}
// first col
arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 2U);
// second col
arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 2U);
}
void arm_cfft_radix8by4_f32( arm_cfft_instance_f32 * S, float32_t * p1)
{
uint32_t L = S->fftLen >> 1;
float32_t * pCol1, *pCol2, *pCol3, *pCol4, *pEnd1, *pEnd2, *pEnd3, *pEnd4;
const float32_t *tw2, *tw3, *tw4;
float32_t * p2 = p1 + L;
float32_t * p3 = p2 + L;
float32_t * p4 = p3 + L;
float32_t t2[4], t3[4], t4[4], twR, twI;
float32_t p1ap3_0, p1sp3_0, p1ap3_1, p1sp3_1;
float32_t m0, m1, m2, m3;
uint32_t l, twMod2, twMod3, twMod4;
pCol1 = p1; // points to real values by default
pCol2 = p2;
pCol3 = p3;
pCol4 = p4;
pEnd1 = p2 - 1; // points to imaginary values by default
pEnd2 = p3 - 1;
pEnd3 = p4 - 1;
pEnd4 = pEnd3 + L;
tw2 = tw3 = tw4 = (float32_t *) S->pTwiddle;
L >>= 1;
// do four dot Fourier transform
twMod2 = 2;
twMod3 = 4;
twMod4 = 6;
// TOP
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// Twiddle factors are ones
*p2++ = t2[0];
*p2++ = t2[1];
*p3++ = t3[0];
*p3++ = t3[1];
*p4++ = t4[0];
*p4++ = t4[1];
tw2 += twMod2;
tw3 += twMod3;
tw4 += twMod4;
for (l = (L - 2) >> 1; l > 0; l-- )
{
// TOP
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1 - top
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// BOTTOM
p1ap3_1 = pEnd1[-1] + pEnd3[-1];
p1sp3_1 = pEnd1[-1] - pEnd3[-1];
p1ap3_0 = pEnd1[0] + pEnd3[0];
p1sp3_0 = pEnd1[0] - pEnd3[0];
// col 2
t2[2] = pEnd2[0] - pEnd4[0] + p1sp3_1;
t2[3] = pEnd1[0] - pEnd3[0] - pEnd2[-1] + pEnd4[-1];
// col 3
t3[2] = p1ap3_1 - pEnd2[-1] - pEnd4[-1];
t3[3] = p1ap3_0 - pEnd2[0] - pEnd4[0];
// col 4
t4[2] = pEnd2[0] - pEnd4[0] - p1sp3_1;
t4[3] = pEnd4[-1] - pEnd2[-1] - p1sp3_0;
// col 1 - Bottom
*pEnd1-- = p1ap3_0 + pEnd2[0] + pEnd4[0];
*pEnd1-- = p1ap3_1 + pEnd2[-1] + pEnd4[-1];
// COL 2
// read twiddle factors
twR = *tw2++;
twI = *tw2++;
// multiply by twiddle factors
// let Z1 = a + i(b), Z2 = c + i(d)
// => Z1 * Z2 = (a*c - b*d) + i(b*c + a*d)
// Top
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
// use vertical symmetry col 2
// 0.9997 - 0.0245i <==> 0.0245 - 0.9997i
// Bottom
m0 = t2[3] * twI;
m1 = t2[2] * twR;
m2 = t2[2] * twI;
m3 = t2[3] * twR;
*pEnd2-- = m0 - m1;
*pEnd2-- = m2 + m3;
// COL 3
twR = tw3[0];
twI = tw3[1];
tw3 += twMod3;
// Top
m0 = t3[0] * twR;
m1 = t3[1] * twI;
m2 = t3[1] * twR;
m3 = t3[0] * twI;
*p3++ = m0 + m1;
*p3++ = m2 - m3;
// use vertical symmetry col 3
// 0.9988 - 0.0491i <==> -0.9988 - 0.0491i
// Bottom
m0 = -t3[3] * twR;
m1 = t3[2] * twI;
m2 = t3[2] * twR;
m3 = t3[3] * twI;
*pEnd3-- = m0 - m1;
*pEnd3-- = m3 - m2;
// COL 4
twR = tw4[0];
twI = tw4[1];
tw4 += twMod4;
// Top
m0 = t4[0] * twR;
m1 = t4[1] * twI;
m2 = t4[1] * twR;
m3 = t4[0] * twI;
*p4++ = m0 + m1;
*p4++ = m2 - m3;
// use vertical symmetry col 4
// 0.9973 - 0.0736i <==> -0.0736 + 0.9973i
// Bottom
m0 = t4[3] * twI;
m1 = t4[2] * twR;
m2 = t4[2] * twI;
m3 = t4[3] * twR;
*pEnd4-- = m0 - m1;
*pEnd4-- = m2 + m3;
}
//MIDDLE
// Twiddle factors are
// 1.0000 0.7071-0.7071i -1.0000i -0.7071-0.7071i
p1ap3_0 = p1[0] + p3[0];
p1sp3_0 = p1[0] - p3[0];
p1ap3_1 = p1[1] + p3[1];
p1sp3_1 = p1[1] - p3[1];
// col 2
t2[0] = p1sp3_0 + p2[1] - p4[1];
t2[1] = p1sp3_1 - p2[0] + p4[0];
// col 3
t3[0] = p1ap3_0 - p2[0] - p4[0];
t3[1] = p1ap3_1 - p2[1] - p4[1];
// col 4
t4[0] = p1sp3_0 - p2[1] + p4[1];
t4[1] = p1sp3_1 + p2[0] - p4[0];
// col 1 - Top
*p1++ = p1ap3_0 + p2[0] + p4[0];
*p1++ = p1ap3_1 + p2[1] + p4[1];
// COL 2
twR = tw2[0];
twI = tw2[1];
m0 = t2[0] * twR;
m1 = t2[1] * twI;
m2 = t2[1] * twR;
m3 = t2[0] * twI;
*p2++ = m0 + m1;
*p2++ = m2 - m3;
// COL 3
twR = tw3[0];
twI = tw3[1];
m0 = t3[0] * twR;
m1 = t3[1] * twI;
m2 = t3[1] * twR;
m3 = t3[0] * twI;
*p3++ = m0 + m1;
*p3++ = m2 - m3;
// COL 4
twR = tw4[0];
twI = tw4[1];
m0 = t4[0] * twR;
m1 = t4[1] * twI;
m2 = t4[1] * twR;
m3 = t4[0] * twI;
*p4++ = m0 + m1;
*p4++ = m2 - m3;
// first col
arm_radix8_butterfly_f32( pCol1, L, (float32_t *) S->pTwiddle, 4U);
// second col
arm_radix8_butterfly_f32( pCol2, L, (float32_t *) S->pTwiddle, 4U);
// third col
arm_radix8_butterfly_f32( pCol3, L, (float32_t *) S->pTwiddle, 4U);
// fourth col
arm_radix8_butterfly_f32( pCol4, L, (float32_t *) S->pTwiddle, 4U);
}
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the floating-point complex FFT.
* @param[in] *S points to an instance of the floating-point CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_f32(
const arm_cfft_instance_f32 * S,
float32_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen, l;
float32_t invL, * pSrc;
if (ifftFlag == 1U)
{
/* Conjugate input data */
pSrc = p1 + 1;
for(l=0; l<L; l++)
{
*pSrc = -*pSrc;
pSrc += 2;
}
}
switch (L)
{
case 16:
case 128:
case 1024:
arm_cfft_radix8by2_f32 ( (arm_cfft_instance_f32 *) S, p1);
break;
case 32:
case 256:
case 2048:
arm_cfft_radix8by4_f32 ( (arm_cfft_instance_f32 *) S, p1);
break;
case 64:
case 512:
case 4096:
arm_radix8_butterfly_f32( p1, L, (float32_t *) S->pTwiddle, 1);
break;
}
if ( bitReverseFlag )
arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
if (ifftFlag == 1U)
{
invL = 1.0f/(float32_t)L;
/* Conjugate and scale output data */
pSrc = p1;
for(l=0; l<L; l++)
{
*pSrc++ *= invL ;
*pSrc = -(*pSrc) * invL;
pSrc++;
}
}
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,345 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_q15.c
* Description: Combined Radix Decimation in Q15 Frequency CFFT processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
extern void arm_radix4_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_bitreversal_16(
uint16_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
void arm_cfft_radix4by2_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef);
void arm_cfft_radix4by2_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the Q15 complex FFT.
* @param[in] *S points to an instance of the Q15 CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_q15(
const arm_cfft_instance_q15 * S,
q15_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
if (ifftFlag == 1U)
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle );
break;
}
}
else
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle );
break;
}
}
if ( bitReverseFlag )
arm_bitreversal_16((uint16_t*)p1,S->bitRevLength,S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_cfft_radix4by2_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
#if defined (ARM_MATH_DSP)
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
#else
uint32_t ia, l;
q15_t xt, yt, cosVal, sinVal;
#endif
n2 = fftLen >> 1;
#if defined (ARM_MATH_DSP)
for (i = n2; i > 0; i--)
{
coeff = _SIMD32_OFFSET(pC);
pC += 2;
T = _SIMD32_OFFSET(pSi);
T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
S = _SIMD32_OFFSET(pSl);
S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSi) = __SHADD16(T, S);
pSi += 2;
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSl) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSl += 2;
}
#else // #if defined (ARM_MATH_DSP)
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
}
#endif // #if defined (ARM_MATH_DSP)
// first col
arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2U);
// second col
arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
p2 = pSrc[4*i+2];
p3 = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
p2 <<= 1;
p3 <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = p2;
pSrc[4*i+3] = p3;
}
}
void arm_cfft_radix4by2_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
#if defined (ARM_MATH_DSP)
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
#else
uint32_t ia, l;
q15_t xt, yt, cosVal, sinVal;
#endif
n2 = fftLen >> 1;
#if defined (ARM_MATH_DSP)
for (i = n2; i > 0; i--)
{
coeff = _SIMD32_OFFSET(pC);
pC += 2;
T = _SIMD32_OFFSET(pSi);
T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
S = _SIMD32_OFFSET(pSl);
S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSi) = __SHADD16(T, S);
pSi += 2;
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSl) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSl += 2;
}
#else // #if defined (ARM_MATH_DSP)
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
}
#endif // #if defined (ARM_MATH_DSP)
// first col
arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2U);
// second col
arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
p2 = pSrc[4*i+2];
p3 = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
p2 <<= 1;
p3 <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = p2;
pSrc[4*i+3] = p3;
}
}

View File

@ -0,0 +1,252 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_q31.c
* Description: Combined Radix Decimation in Frequency CFFT fixed point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
extern void arm_radix4_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint32_t twidCoefModifier);
extern void arm_bitreversal_32(
uint32_t * pSrc,
const uint16_t bitRevLen,
const uint16_t * pBitRevTable);
void arm_cfft_radix4by2_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef);
void arm_cfft_radix4by2_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point complex FFT in Q31 format.
* @param[in] *S points to an instance of the fixed-point CFFT structure.
* @param[in, out] *p1 points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return none.
*/
void arm_cfft_q31(
const arm_cfft_instance_q31 * S,
q31_t * p1,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
if (ifftFlag == 1U)
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle );
break;
}
}
else
{
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096:
arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048:
arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle );
break;
}
}
if ( bitReverseFlag )
arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_cfft_radix4by2_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef)
{
uint32_t i, l;
uint32_t n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
n2 = fftLen >> 1;
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2*ia];
sinVal = pCoef[2*ia + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 2) - (pSrc[2 * l] >> 2);
pSrc[2 * i] = (pSrc[2 * i] >> 2) + (pSrc[2 * l] >> 2);
yt = (pSrc[2 * i + 1] >> 2) - (pSrc[2 * l + 1] >> 2);
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2) + (pSrc[2 * i + 1] >> 2);
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0 << 1;
pSrc[2U * l + 1U] = p1 << 1;
}
// first col
arm_radix4_butterfly_q31( pSrc, n2, (q31_t*)pCoef, 2U);
// second col
arm_radix4_butterfly_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
xt = pSrc[4*i+2];
yt = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
xt <<= 1;
yt <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = xt;
pSrc[4*i+3] = yt;
}
}
void arm_cfft_radix4by2_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
const q31_t * pCoef)
{
uint32_t i, l;
uint32_t n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
n2 = fftLen >> 1;
ia = 0;
for (i = 0; i < n2; i++)
{
cosVal = pCoef[2*ia];
sinVal = pCoef[2*ia + 1];
ia++;
l = i + n2;
xt = (pSrc[2 * i] >> 2) - (pSrc[2 * l] >> 2);
pSrc[2 * i] = (pSrc[2 * i] >> 2) + (pSrc[2 * l] >> 2);
yt = (pSrc[2 * i + 1] >> 2) - (pSrc[2 * l + 1] >> 2);
pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2) + (pSrc[2 * i + 1] >> 2);
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0 << 1;
pSrc[2U * l + 1U] = p1 << 1;
}
// first col
arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2U);
// second col
arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
xt = pSrc[4*i+2];
yt = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
xt <<= 1;
yt <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = xt;
pSrc[4*i+3] = yt;
}
}

View File

@ -0,0 +1,472 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_f32.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Radix-2 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in] *S points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_f32(
const arm_cfft_radix2_instance_f32 * S,
float32_t * pSrc)
{
if (S->ifftFlag == 1U)
{
/* Complex IFFT radix-2 */
arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier, S->onebyfftLen);
}
else
{
/* Complex FFT radix-2 */
arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
S->twidCoefModifier);
}
if (S->bitReverseFlag == 1U)
{
/* Bit Reversal */
arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
}
/**
* @} end of ComplexFFT group
*/
/* ----------------------------------------------------------------------
** Internal helper function used by the FFTs
** ------------------------------------------------------------------- */
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix2_butterfly_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#if defined (ARM_MATH_DSP)
/* Initializations for the first stage */
n2 = fftLen >> 1;
ia = 0;
i = 0;
// loop for groups
for (k = n2; k > 0; k--)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
/* Twiddle coefficients index modifier */
ia += twidCoefModifier;
/* index calculation for the input as, */
/* pSrc[i + 0], pSrc[i + fftLen/1] */
l = i + n2;
/* Butterfly implementation */
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i++;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = n2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while ( j < n2); // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * i + 2] = xt;
pSrc[2 * i + 3] = yt;
} // groups loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 1; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 + p1;
pSrc[2 * l + 1] = p2 - p3;
i += n1;
} while (i < fftLen);
j++;
} while (j < n2);
twidCoefModifier <<= 1U;
}
#endif // #if defined (ARM_MATH_DSP)
}
void arm_radix2_butterfly_inverse_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen)
{
uint32_t i, j, k, l;
uint32_t n1, n2, ia;
float32_t xt, yt, cosVal, sinVal;
float32_t p0, p1, p2, p3;
float32_t a0, a1;
#if defined (ARM_MATH_DSP)
n2 = fftLen >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia += twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while (j < n2); // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
// loop for butterfly
for (i = 0; i < fftLen; i += 2)
{
a0 = pSrc[2 * i] + pSrc[2 * i + 2];
xt = pSrc[2 * i] - pSrc[2 * i + 2];
a1 = pSrc[2 * i + 3] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * i + 3];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2 * i + 1] = p1;
pSrc[2 * i + 2] = p2;
pSrc[2 * i + 3] = p3;
} // butterfly loop end
#else
n2 = fftLen;
// loop for stage
for (k = fftLen; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
j = 0;
do
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
do
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
p0 = xt * cosVal;
p1 = yt * sinVal;
p2 = yt * cosVal;
p3 = xt * sinVal;
pSrc[2 * i] = a0;
pSrc[2 * i + 1] = a1;
pSrc[2 * l] = p0 - p1;
pSrc[2 * l + 1] = p2 + p3;
i += n1;
} while ( i < fftLen ); // butterfly loop end
j++;
} while ( j < n2 ); // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
a0 = pSrc[2 * i] + pSrc[2 * l];
xt = pSrc[2 * i] - pSrc[2 * l];
a1 = pSrc[2 * l + 1] + pSrc[2 * i + 1];
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
p0 = a0 * onebyfftLen;
p2 = xt * onebyfftLen;
p1 = a1 * onebyfftLen;
p3 = yt * onebyfftLen;
pSrc[2 * i] = p0;
pSrc[2U * l] = p2;
pSrc[2 * i + 1] = p1;
pSrc[2U * l + 1U] = p3;
} // butterfly loop end
#endif // #if defined (ARM_MATH_DSP)
}

View File

@ -0,0 +1,192 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_f32.c
* Description: Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the floating-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in,out] *S points to an instance of the floating-point CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_f32(
arm_cfft_radix2_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 2048U:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.00048828125;
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 512U:
/* Initializations of structure parameters for 512 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.001953125;
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 128U:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32U;
S->bitRevFactor = 32U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
S->onebyfftLen = 0.0078125;
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 32U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 128U;
S->bitRevFactor = 128U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
S->onebyfftLen = 0.03125;
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,177 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q15.c
* Description: Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the Q15 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
* @param[in,out] *S points to an instance of the Q15 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q15(
arm_cfft_radix2_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 2048U:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4U;
S->bitRevFactor = 4U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 512U:
/* Initializations of structure parameters for 512 point FFT */
S->twidCoefModifier = 8U;
S->bitRevFactor = 8U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128U:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32U;
S->bitRevFactor = 32U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32U:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128U;
S->bitRevFactor = 128U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,174 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_init_q31.c
* Description: Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
*
* @brief Initialization function for the Q31 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
* @param[in,out] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix2_init_q31(
arm_cfft_radix2_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 2048 point FFT */
case 2048U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 2U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 2U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[1];
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
/* Initializations of structure parameters for 512 point FFT */
case 512U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 8U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 8U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[7];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 128U:
/* Initializations of structure parameters for 128 point FFT */
S->twidCoefModifier = 32U;
S->bitRevFactor = 32U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[31];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 32U:
/* Initializations of structure parameters for 32 point FFT */
S->twidCoefModifier = 128U;
S->bitRevFactor = 128U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[127];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,729 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q15.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q15(
q15_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
* @param[in] *S points to an instance of the fixed-point CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_q15(
const arm_cfft_radix2_instance_q15 * S,
q15_t * pSrc)
{
if (S->ifftFlag == 1U)
{
arm_radix2_butterfly_inverse_q15(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q15(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_radix2_butterfly_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier)
{
#if defined (ARM_MATH_DSP)
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
// loop for butterfly
i++;
l++;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
#else
out1 = __SMUSDX(R, coeff) >> 16U;
out2 = __SMUAD(coeff, R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2U * l)) = R;
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2U * l)) = R;
} // groups loop end
#else
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
#endif // #if defined (ARM_MATH_DSP)
}
void arm_radix2_butterfly_inverse_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pCoef,
uint16_t twidCoefModifier)
{
#if defined (ARM_MATH_DSP)
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t in;
q31_t T, S, R;
q31_t coeff, out1, out2;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
// loop for butterfly
i++;
l++;
T = _SIMD32_OFFSET(pSrc + (2 * i));
in = ((int16_t) (T & 0xFFFF)) >> 1;
T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);
S = _SIMD32_OFFSET(pSrc + (2 * l));
in = ((int16_t) (S & 0xFFFF)) >> 1;
S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
i += n1;
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
#ifndef ARM_MATH_BIG_ENDIAN
out1 = __SMUSD(coeff, R) >> 16;
out2 = __SMUADX(coeff, R);
#else
out1 = __SMUADX(R, coeff) >> 16U;
out2 = __SMUSD(__QSUB(0, coeff), R);
#endif // #ifndef ARM_MATH_BIG_ENDIAN
_SIMD32_OFFSET(pSrc + (2U * l)) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
T = _SIMD32_OFFSET(pSrc + (2 * i));
S = _SIMD32_OFFSET(pSrc + (2 * l));
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
_SIMD32_OFFSET(pSrc + (2U * l)) = R;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
#else
unsigned i, j, k, l;
unsigned n1, n2, ia;
q15_t xt, yt, cosVal, sinVal;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
((int16_t) (((q31_t) yt * sinVal) >> 16)));
pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
((int16_t) (((q31_t) xt * sinVal) >> 16)));
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
} // groups loop end
#endif // #if defined (ARM_MATH_DSP)
}

View File

@ -0,0 +1,338 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix2_q31.c
* Description: Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier);
void arm_bitreversal_q31(
q31_t * pSrc,
uint32_t fftLen,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @details
* @brief Processing function for the fixed-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
* @param[in] *S points to an instance of the fixed-point CFFT/CIFFT structure.
* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
* @return none.
*/
void arm_cfft_radix2_q31(
const arm_cfft_radix2_instance_q31 * S,
q31_t * pSrc)
{
if (S->ifftFlag == 1U)
{
arm_radix2_butterfly_inverse_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
else
{
arm_radix2_butterfly_q31(pSrc, S->fftLen,
S->pTwiddle, S->twidCoefModifier);
}
arm_bitreversal_q31(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
}
/**
* @} end of ComplexFFT group
*/
void arm_radix2_butterfly_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l, m;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
} // groups loop end
twidCoefModifier <<= 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
i = j;
m = fftLen / n1;
do
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multAcc_32x32_keep32_R(p0, yt, sinVal);
multSub_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
i += n1;
m--;
} while ( m > 0); // butterfly loop end
} // groups loop end
twidCoefModifier <<= 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
} // butterfly loop end
}
void arm_radix2_butterfly_inverse_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pCoef,
uint16_t twidCoefModifier)
{
unsigned i, j, k, l;
unsigned n1, n2, ia;
q31_t xt, yt, cosVal, sinVal;
q31_t p0, p1;
//N = fftLen;
n2 = fftLen;
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (i = 0; i < n2; i++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
l = i + n2;
xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
pSrc[2 * i + 1] =
((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
// loop for stage
for (k = fftLen / 2; k > 2; k = k >> 1)
{
n1 = n2;
n2 = n2 >> 1;
ia = 0;
// loop for groups
for (j = 0; j < n2; j++)
{
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = j; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]) >> 1U;
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;
mult_32x32_keep32_R(p0, xt, cosVal);
mult_32x32_keep32_R(p1, yt, cosVal);
multSub_32x32_keep32_R(p0, yt, sinVal);
multAcc_32x32_keep32_R(p1, xt, sinVal);
pSrc[2U * l] = p0;
pSrc[2U * l + 1U] = p1;
} // butterfly loop end
} // groups loop end
twidCoefModifier = twidCoefModifier << 1U;
} // stages loop end
n1 = n2;
n2 = n2 >> 1;
ia = 0;
cosVal = pCoef[ia * 2];
sinVal = pCoef[(ia * 2) + 1];
ia = ia + twidCoefModifier;
// loop for butterfly
for (i = 0; i < fftLen; i += n1)
{
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
i += n1;
l = i + n2;
xt = pSrc[2 * i] - pSrc[2 * l];
pSrc[2 * i] = (pSrc[2 * i] + pSrc[2 * l]);
yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);
pSrc[2U * l] = xt;
pSrc[2U * l + 1U] = yt;
} // butterfly loop end
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,152 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_f32.c
* Description: Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the floating-point CFFT/CIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed
* in the future.
* @param[in,out] *S points to an instance of the floating-point CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_f32(
arm_cfft_radix4_instance_f32 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (float32_t *) twiddleCoef;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.000244140625;
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
/* Initialise the 1/fftLen Value */
S->onebyfftLen = 0.0009765625f;
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
S->onebyfftLen = 0.00390625f;
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
S->onebyfftLen = 0.015625f;
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
S->onebyfftLen = 0.0625f;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,140 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q15.c
* Description: Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
* @brief Initialization function for the Q15 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
* @param[in,out] *S points to an instance of the Q15 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q15(
arm_cfft_radix4_instance_q15 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q15_t *) twiddleCoef_4096_q15;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of structure parameters depending on the FFT length */
switch (S->fftLen)
{
case 4096U:
/* Initializations of structure parameters for 4096 point FFT */
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
case 1024U:
/* Initializations of structure parameters for 1024 point FFT */
S->twidCoefModifier = 4U;
S->bitRevFactor = 4U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

View File

@ -0,0 +1,136 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix4_init_q31.c
* Description: Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup ComplexFFT
* @{
*/
/**
*
* @brief Initialization function for the Q31 CFFT/CIFFT.
* @deprecated Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed
* @param[in,out] *S points to an instance of the Q31 CFFT/CIFFT structure.
* @param[in] fftLen length of the FFT.
* @param[in] ifftFlag flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
* @param[in] bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
* Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
* \par
* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
* \par
* The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_cfft_radix4_init_q31(
arm_cfft_radix4_instance_q31 * S,
uint16_t fftLen,
uint8_t ifftFlag,
uint8_t bitReverseFlag)
{
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
S->fftLen = fftLen;
/* Initialise the Twiddle coefficient pointer */
S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
/* Initialise the Flag for selection of CFFT or CIFFT */
S->ifftFlag = ifftFlag;
/* Initialise the Flag for calculation Bit reversal or not */
S->bitReverseFlag = bitReverseFlag;
/* Initializations of Instance structure depending on the FFT length */
switch (S->fftLen)
{
/* Initializations of structure parameters for 4096 point FFT */
case 4096U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 1U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 1U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) armBitRevTable;
break;
/* Initializations of structure parameters for 1024 point FFT */
case 1024U:
/* Initialise the twiddle coef modifier value */
S->twidCoefModifier = 4U;
/* Initialise the bit reversal table modifier */
S->bitRevFactor = 4U;
/* Initialise the bit reversal table pointer */
S->pBitRevTable = (uint16_t *) & armBitRevTable[3];
break;
case 256U:
/* Initializations of structure parameters for 256 point FFT */
S->twidCoefModifier = 16U;
S->bitRevFactor = 16U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[15];
break;
case 64U:
/* Initializations of structure parameters for 64 point FFT */
S->twidCoefModifier = 64U;
S->bitRevFactor = 64U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[63];
break;
case 16U:
/* Initializations of structure parameters for 16 point FFT */
S->twidCoefModifier = 256U;
S->bitRevFactor = 256U;
S->pBitRevTable = (uint16_t *) & armBitRevTable[255];
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of ComplexFFT group
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,285 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_radix8_f32.c
* Description: Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal helper function used by the FFTs
* -------------------------------------------------------------------- */
/*
* @brief Core function for the floating-point CFFT butterfly process.
* @param[in, out] *pSrc points to the in-place buffer of floating-point data type.
* @param[in] fftLen length of the FFT.
* @param[in] *pCoef points to the twiddle coefficient buffer.
* @param[in] twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_radix8_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
const float32_t * pCoef,
uint16_t twidCoefModifier)
{
uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
uint32_t id;
uint32_t n1, n2, j;
float32_t r1, r2, r3, r4, r5, r6, r7, r8;
float32_t t1, t2;
float32_t s1, s2, s3, s4, s5, s6, s7, s8;
float32_t p1, p2, p3, p4;
float32_t co2, co3, co4, co5, co6, co7, co8;
float32_t si2, si3, si4, si5, si6, si7, si8;
const float32_t C81 = 0.70710678118f;
n2 = fftLen;
do
{
n1 = n2;
n2 = n2 >> 3;
i1 = 0;
do
{
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
pSrc[2 * i5] = r1 - r2;
r1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
r2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
r4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = r1 - s3;
r1 = r1 + s3;
s3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1 + 1] = r1 + r2;
pSrc[2 * i5 + 1] = r1 - r2;
pSrc[2 * i3] = t1 + s3;
pSrc[2 * i7] = t1 - s3;
pSrc[2 * i3 + 1] = t2 - r3;
pSrc[2 * i7 + 1] = t2 + r3;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
r2 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - r2;
s5 = s5 + r2;
s8 = s7 - s6;
s7 = s7 + s6;
pSrc[2 * i2] = r5 + s7;
pSrc[2 * i8] = r5 - s7;
pSrc[2 * i6] = t1 + s8;
pSrc[2 * i4] = t1 - s8;
pSrc[2 * i2 + 1] = s5 - r7;
pSrc[2 * i8 + 1] = s5 + r7;
pSrc[2 * i6 + 1] = t2 - r8;
pSrc[2 * i4 + 1] = t2 + r8;
i1 += n1;
} while (i1 < fftLen);
if (n2 < 8)
break;
ia1 = 0;
j = 1;
do
{
/* index calculation for the coefficients */
id = ia1 + twidCoefModifier;
ia1 = id;
ia2 = ia1 + id;
ia3 = ia2 + id;
ia4 = ia3 + id;
ia5 = ia4 + id;
ia6 = ia5 + id;
ia7 = ia6 + id;
co2 = pCoef[2 * ia1];
co3 = pCoef[2 * ia2];
co4 = pCoef[2 * ia3];
co5 = pCoef[2 * ia4];
co6 = pCoef[2 * ia5];
co7 = pCoef[2 * ia6];
co8 = pCoef[2 * ia7];
si2 = pCoef[2 * ia1 + 1];
si3 = pCoef[2 * ia2 + 1];
si4 = pCoef[2 * ia3 + 1];
si5 = pCoef[2 * ia4 + 1];
si6 = pCoef[2 * ia5 + 1];
si7 = pCoef[2 * ia6 + 1];
si8 = pCoef[2 * ia7 + 1];
i1 = j;
do
{
/* index calculation for the input */
i2 = i1 + n2;
i3 = i2 + n2;
i4 = i3 + n2;
i5 = i4 + n2;
i6 = i5 + n2;
i7 = i6 + n2;
i8 = i7 + n2;
r1 = pSrc[2 * i1] + pSrc[2 * i5];
r5 = pSrc[2 * i1] - pSrc[2 * i5];
r2 = pSrc[2 * i2] + pSrc[2 * i6];
r6 = pSrc[2 * i2] - pSrc[2 * i6];
r3 = pSrc[2 * i3] + pSrc[2 * i7];
r7 = pSrc[2 * i3] - pSrc[2 * i7];
r4 = pSrc[2 * i4] + pSrc[2 * i8];
r8 = pSrc[2 * i4] - pSrc[2 * i8];
t1 = r1 - r3;
r1 = r1 + r3;
r3 = r2 - r4;
r2 = r2 + r4;
pSrc[2 * i1] = r1 + r2;
r2 = r1 - r2;
s1 = pSrc[2 * i1 + 1] + pSrc[2 * i5 + 1];
s5 = pSrc[2 * i1 + 1] - pSrc[2 * i5 + 1];
s2 = pSrc[2 * i2 + 1] + pSrc[2 * i6 + 1];
s6 = pSrc[2 * i2 + 1] - pSrc[2 * i6 + 1];
s3 = pSrc[2 * i3 + 1] + pSrc[2 * i7 + 1];
s7 = pSrc[2 * i3 + 1] - pSrc[2 * i7 + 1];
s4 = pSrc[2 * i4 + 1] + pSrc[2 * i8 + 1];
s8 = pSrc[2 * i4 + 1] - pSrc[2 * i8 + 1];
t2 = s1 - s3;
s1 = s1 + s3;
s3 = s2 - s4;
s2 = s2 + s4;
r1 = t1 + s3;
t1 = t1 - s3;
pSrc[2 * i1 + 1] = s1 + s2;
s2 = s1 - s2;
s1 = t2 - r3;
t2 = t2 + r3;
p1 = co5 * r2;
p2 = si5 * s2;
p3 = co5 * s2;
p4 = si5 * r2;
pSrc[2 * i5] = p1 + p2;
pSrc[2 * i5 + 1] = p3 - p4;
p1 = co3 * r1;
p2 = si3 * s1;
p3 = co3 * s1;
p4 = si3 * r1;
pSrc[2 * i3] = p1 + p2;
pSrc[2 * i3 + 1] = p3 - p4;
p1 = co7 * t1;
p2 = si7 * t2;
p3 = co7 * t2;
p4 = si7 * t1;
pSrc[2 * i7] = p1 + p2;
pSrc[2 * i7 + 1] = p3 - p4;
r1 = (r6 - r8) * C81;
r6 = (r6 + r8) * C81;
s1 = (s6 - s8) * C81;
s6 = (s6 + s8) * C81;
t1 = r5 - r1;
r5 = r5 + r1;
r8 = r7 - r6;
r7 = r7 + r6;
t2 = s5 - s1;
s5 = s5 + s1;
s8 = s7 - s6;
s7 = s7 + s6;
r1 = r5 + s7;
r5 = r5 - s7;
r6 = t1 + s8;
t1 = t1 - s8;
s1 = s5 - r7;
s5 = s5 + r7;
s6 = t2 - r8;
t2 = t2 + r8;
p1 = co2 * r1;
p2 = si2 * s1;
p3 = co2 * s1;
p4 = si2 * r1;
pSrc[2 * i2] = p1 + p2;
pSrc[2 * i2 + 1] = p3 - p4;
p1 = co8 * r5;
p2 = si8 * s5;
p3 = co8 * s5;
p4 = si8 * r5;
pSrc[2 * i8] = p1 + p2;
pSrc[2 * i8 + 1] = p3 - p4;
p1 = co6 * r6;
p2 = si6 * s6;
p3 = co6 * s6;
p4 = si6 * r6;
pSrc[2 * i6] = p1 + p2;
pSrc[2 * i6 + 1] = p3 - p4;
p1 = co4 * t1;
p2 = si4 * t2;
p3 = co4 * t2;
p4 = si4 * t1;
pSrc[2 * i4] = p1 + p2;
pSrc[2 * i4 + 1] = p3 - p4;
i1 += n1;
} while (i1 < fftLen);
j++;
} while (j < n2);
twidCoefModifier <<= 3;
} while (n2 > 7);
}

View File

@ -0,0 +1,449 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_f32.c
* Description: Processing function of DCT4 & IDCT4 F32
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
* @ingroup groupTransforms
*/
/**
* @defgroup DCT4_IDCT4 DCT Type IV Functions
* Representation of signals by minimum number of values is important for storage and transmission.
* The possibility of large discontinuity between the beginning and end of a period of a signal
* in DFT can be avoided by extending the signal so that it is even-symmetric.
* Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
* spectrum and is very widely used in signal and image coding applications.
* The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
* DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
*
* DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
* Reordering of the input data makes the computation of DCT just a problem of
* computing the DFT of a real signal with a few additional operations.
* This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
*
* DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
* DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
* DCT2 implementation can be described in the following steps:
* - Re-ordering input
* - Calculating Real FFT
* - Multiplication of weights and Real FFT output and getting real part from the product.
*
* This process is explained by the block diagram below:
* \image html DCT4.gif "Discrete Cosine Transform - type-IV"
*
* \par Algorithm:
* The N-point type-IV DCT is defined as a real, linear transformation by the formula:
* \image html DCT4Equation.gif
* where <code>k = 0,1,2,.....N-1</code>
*\par
* Its inverse is defined as follows:
* \image html IDCT4Equation.gif
* where <code>n = 0,1,2,.....N-1</code>
*\par
* The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
* The symmetry of the transform matrix indicates that the fast algorithms for the forward
* and inverse transform computation are identical.
* Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
*
* \par Lengths supported by the transform:
* As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192.
* The library provides separate functions for Q15, Q31, and floating-point data types.
* \par Instance Structure
* The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
* A separate instance structure must be defined for each transform.
* There are separate instance structure declarations for each of the 3 supported data types.
*
* \par Initialization Functions
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes Real FFT as its process function is used internally in DCT4, by calling arm_rfft_init_f32().
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
* To place an instance structure into a const data section, the instance structure must be manually initialized.
* Manually initialize the instance structure as follows:
* <pre>
*arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
*arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
*arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
* </pre>
* where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
* \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
* \c pTwiddle points to the twiddle factor table;
* \c pCosFactor points to the cosFactor table;
* \c pRfft points to the real FFT instance;
* \c pCfft points to the complex FFT instance;
* The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
* and arm_rfft_f32() respectively for details regarding static initialization.
*
* \par Fixed-Point Behavior
* Care must be taken when using the fixed-point versions of the DCT4 transform functions.
* In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
* Refer to the function specific documentation below for usage guidelines.
*/
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the floating-point DCT4/IDCT4.
* @param[in] *S points to an instance of the floating-point DCT4/IDCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
*/
void arm_dct4_f32(
const arm_dct4_instance_f32 * S,
float32_t * pState,
float32_t * pInlineBuffer)
{
uint32_t i; /* Loop counter */
float32_t *weights = S->pTwiddle; /* Pointer to the Weights table */
float32_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
float32_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
float32_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as,
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#if defined (ARM_MATH_DSP)
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = (uint32_t) S->Nby2 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2U;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = ((uint32_t) S->N - 1U) >> 2U;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = ((uint32_t) S->N - 1U) % 0x4U;
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2U;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while (i > 0U);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = (uint32_t) S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = (uint32_t) S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ * (float32_t) 0.5;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = ((uint32_t) S->N - 1U);
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = (uint32_t) S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = in * S->normalize;
/* Decrement the loop counter */
i--;
} while (i > 0U);
#endif /* #if defined (ARM_MATH_DSP) */
}
/**
* @} end of DCT4_IDCT4 group
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,382 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_q15.c
* Description: Processing function of DCT4 & IDCT4 Q15
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the Q15 DCT4/IDCT4.
* @param[in] *S points to an instance of the Q15 DCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
*
* \par Input an output formats:
* Internally inputs are downscaled in the RFFT process function to avoid overflows.
* Number of bits downscaled, depends on the size of the transform.
* The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
*
* \image html dct4FormatsQ15Table.gif
*/
void arm_dct4_q15(
const arm_dct4_instance_q15 * S,
q15_t * pState,
q15_t * pInlineBuffer)
{
uint32_t i; /* Loop counter */
q15_t *weights = S->pTwiddle; /* Pointer to the Weights table */
q15_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q15_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q15_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#if defined (ARM_MATH_DSP)
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = (uint32_t) S->Nby2 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2U;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = ((uint32_t) S->N - 1U) >> 2U;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = ((uint32_t) S->N - 1U) % 0x4U;
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = (uint32_t) S->N >> 2U;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement the loop counter */
i--;
} while (i > 0U);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = (uint32_t) S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = (uint32_t) S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.13 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
arm_shift_q15(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter */
i = ((uint32_t) S->N - 1U);
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = (uint32_t) S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));
/* Decrement the loop counter */
i--;
} while (i > 0U);
#endif /* #if defined (ARM_MATH_DSP) */
}
/**
* @} end of DCT4_IDCT4 group
*/

View File

@ -0,0 +1,383 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_dct4_q31.c
* Description: Processing function of DCT4 & IDCT4 Q31
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/**
* @addtogroup DCT4_IDCT4
* @{
*/
/**
* @brief Processing function for the Q31 DCT4/IDCT4.
* @param[in] *S points to an instance of the Q31 DCT4 structure.
* @param[in] *pState points to state buffer.
* @param[in,out] *pInlineBuffer points to the in-place input and output buffer.
* @return none.
* \par Input an output formats:
* Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
* as the conversion from DCT2 to DCT4 involves one subtraction.
* Internally inputs are downscaled in the RFFT process function to avoid overflows.
* Number of bits downscaled, depends on the size of the transform.
* The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
*
* \image html dct4FormatsQ31Table.gif
*/
void arm_dct4_q31(
const arm_dct4_instance_q31 * S,
q31_t * pState,
q31_t * pInlineBuffer)
{
uint16_t i; /* Loop counter */
q31_t *weights = S->pTwiddle; /* Pointer to the Weights table */
q31_t *cosFact = S->pCosFactor; /* Pointer to the cos factors table */
q31_t *pS1, *pS2, *pbuff; /* Temporary pointers for input buffer and pState buffer */
q31_t in; /* Temporary variable */
/* DCT4 computation involves DCT2 (which is calculated using RFFT)
* along with some pre-processing and post-processing.
* Computational procedure is explained as follows:
* (a) Pre-processing involves multiplying input with cos factor,
* r(n) = 2 * u(n) * cos(pi*(2*n+1)/(4*n))
* where,
* r(n) -- output of preprocessing
* u(n) -- input to preprocessing(actual Source buffer)
* (b) Calculation of DCT2 using FFT is divided into three steps:
* Step1: Re-ordering of even and odd elements of input.
* Step2: Calculating FFT of the re-ordered input.
* Step3: Taking the real part of the product of FFT output and weights.
* (c) Post-processing - DCT4 can be obtained from DCT2 output using the following equation:
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* where,
* Y4 -- DCT4 output, Y2 -- DCT2 output
* (d) Multiplying the output with the normalizing factor sqrt(2/N).
*/
/*-------- Pre-processing ------------*/
/* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N);
arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N);
/* ----------------------------------------------------------------
* Step1: Re-ordering of even and odd elements as
* pState[i] = pInlineBuffer[2*i] and
* pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
---------------------------------------------------------------------*/
/* pS1 initialized to pState */
pS1 = pState;
/* pS2 initialized to pState+N-1, so that it points to the end of the state buffer */
pS2 = pState + (S->N - 1U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
#if defined (ARM_MATH_DSP)
/* Run the below code for Cortex-M4 and Cortex-M3 */
/* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
i = S->Nby2 >> 2U;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
*pS1++ = *pbuff++;
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* Processing with loop unrolling 4 times as N is always multiple of 4.
* Compute 4 outputs at a time */
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
i = (S->N - 1U) >> 2U;
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
do
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
in = *pS1++ - in;
*pbuff++ = in;
pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
i = (S->N - 1U) % 0x4U;
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter to N/4 instead of N for loop unrolling */
i = S->N >> 2U;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
/* Processing with loop unrolling 4 times as N is always multiple of 4. Compute 4 outputs at a time */
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement the loop counter */
i--;
} while (i > 0U);
#else
/* Run the below code for Cortex-M0 */
/* Initializing the loop counter to N/2 */
i = S->Nby2;
do
{
/* Re-ordering of even and odd elements */
/* pState[i] = pInlineBuffer[2*i] */
*pS1++ = *pbuff++;
/* pState[N-i-1] = pInlineBuffer[2*i+1] */
*pS2-- = *pbuff++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* pbuff initialized to input buffer */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Initializing the loop counter */
i = S->N;
do
{
/* Writing the re-ordered output back to inplace input buffer */
*pbuff++ = *pS1++;
/* Decrement the loop counter */
i--;
} while (i > 0U);
/* ---------------------------------------------------------
* Step2: Calculate RFFT for N-point input
* ---------------------------------------------------------- */
/* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
/*----------------------------------------------------------------------
* Step3: Multiply the FFT output with the weights.
*----------------------------------------------------------------------*/
arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
/* The output of complex multiplication is in 3.29 format.
* Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
arm_shift_q31(pState, 2, pState, S->N * 2);
/* ----------- Post-processing ---------- */
/* DCT-IV can be obtained from DCT-II by the equation,
* Y4(k) = Y2(k) - Y4(k-1) and Y4(-1) = Y4(0)
* Hence, Y4(0) = Y2(0)/2 */
/* Getting only real part from the output and Converting to DCT-IV */
/* pbuff initialized to input buffer. */
pbuff = pInlineBuffer;
/* pS1 initialized to pState */
pS1 = pState;
/* Calculating Y4(0) from Y2(0) using Y4(0) = Y2(0)/2 */
in = *pS1++ >> 1U;
/* input buffer acts as inplace, so output values are stored in the input itself. */
*pbuff++ = in;
/* pState pointer is incremented twice as the real values are located alternatively in the array */
pS1++;
/* Initializing the loop counter */
i = (S->N - 1U);
while (i > 0U)
{
/* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
/* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
in = *pS1++ - in;
*pbuff++ = in;
/* points to the next real value */
pS1++;
/* Decrement the loop counter */
i--;
}
/*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
/* Initializing the loop counter */
i = S->N;
/* pbuff initialized to the pInlineBuffer(now contains the output values) */
pbuff = pInlineBuffer;
do
{
/* Multiplying pInlineBuffer with the normalizing factor sqrt(2/N) */
in = *pbuff;
*pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));
/* Decrement the loop counter */
i--;
} while (i > 0U);
#endif /* #if defined (ARM_MATH_DSP) */
}
/**
* @} end of DCT4_IDCT4 group
*/

View File

@ -0,0 +1,318 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
* Description: RFFT & RIFFT Floating point process function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal functions prototypes
* -------------------------------------------------------------------- */
extern void arm_radix4_butterfly_f32(
float32_t * pSrc,
uint16_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier);
extern void arm_radix4_butterfly_inverse_f32(
float32_t * pSrc,
uint16_t fftLen,
float32_t * pCoef,
uint16_t twidCoefModifier,
float32_t onebyfftLen);
extern void arm_bitreversal_f32(
float32_t * pSrc,
uint16_t fftSize,
uint16_t bitRevFactor,
uint16_t * pBitRevTab);
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier);
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the floating-point RFFT/RIFFT.
* @deprecated Do not use this function. It has been superceded by \ref arm_rfft_fast_f32 and will be removed
* in the future.
* @param[in] *S points to an instance of the floating-point RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*/
void arm_rfft_f32(
const arm_rfft_instance_f32 * S,
float32_t * pSrc,
float32_t * pDst)
{
const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;
/* Calculation of Real IFFT of input */
if (S->ifftFlagR == 1U)
{
/* Real IFFT core process */
arm_split_rifft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex radix-4 IFFT process */
arm_radix4_butterfly_inverse_f32(pDst, S_CFFT->fftLen,
S_CFFT->pTwiddle,
S_CFFT->twidCoefModifier,
S_CFFT->onebyfftLen);
/* Bit reversal process */
if (S->bitReverseFlagR == 1U)
{
arm_bitreversal_f32(pDst, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
}
else
{
/* Calculation of RFFT of input */
/* Complex radix-4 FFT process */
arm_radix4_butterfly_f32(pSrc, S_CFFT->fftLen,
S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
/* Bit reversal process */
if (S->bitReverseFlagR == 1U)
{
arm_bitreversal_f32(pSrc, S_CFFT->fftLen,
S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
}
/* Real FFT core process */
arm_split_rfft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rfft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
float32_t outR, outI; /* Temporary variables for output */
float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U]; /* temp pointers for output buffer */
float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U]; /* temp pointers for input buffer */
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2U];
pCoefB = &pBTable[modifier * 2U];
i = fftLen - 1U;
while (i > 0U)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
/* read pATable[2 * i] */
CoefA1 = *pCoefA++;
/* pATable[2 * i + 1] */
CoefA2 = *pCoefA;
/* pSrc[2 * i] * pATable[2 * i] */
outR = *pSrc1 * CoefA1;
/* pSrc[2 * i] * CoefA2 */
outI = *pSrc1++ * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR -= (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += *pSrc1++ * CoefA1;
CoefB1 = *pCoefB;
/* pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI -= *pSrc2 * CoefA2;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2-- * CoefB1;
/* write output */
*pDst1++ = outR;
*pDst1++ = outI;
/* write complex conjugate output */
*pDst2-- = -outI;
*pDst2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2U);
pCoefA = pCoefA + ((modifier * 2U) - 1U);
i--;
}
pDst[2U * fftLen] = pSrc[0] - pSrc[1];
pDst[(2U * fftLen) + 1U] = 0.0f;
pDst[0] = pSrc[0] + pSrc[1];
pDst[1] = 0.0f;
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rifft_f32(
float32_t * pSrc,
uint32_t fftLen,
float32_t * pATable,
float32_t * pBTable,
float32_t * pDst,
uint32_t modifier)
{
float32_t outR, outI; /* Temporary variables for output */
float32_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
float32_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while (fftLen > 0U)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * CoefA1 */
outR = *pSrc1 * CoefA1;
/* - pSrc[2 * i] * CoefA2 */
outI = -(*pSrc1++) * CoefA2;
/* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
outR += (*pSrc1 + *pSrc2) * CoefA2;
/* pSrc[2 * i + 1] * CoefA1 */
outI += (*pSrc1++) * CoefA1;
CoefB1 = *pCoefB;
/* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
outI -= *pSrc2-- * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefB1 */
outR += *pSrc2 * CoefB1;
/* pSrc[2 * fftLen - 2 * i] * CoefA2 */
outI += *pSrc2-- * CoefA2;
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2U);
pCoefA = pCoefA + ((modifier * 2U) - 1U);
/* Decrement loop count */
fftLen--;
}
}

View File

@ -0,0 +1,317 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_f32.c
* Description: RFFT & RIFFT Floating point process function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
void stage_rfft_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
float32_t * pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b; /* temporary variables */
float32_t p0, p1, p2, p3; /* temporary variables */
k = (S->Sint).fftLen - 1;
/* Pack first and last sample of the frequency domain together */
xBR = pB[0];
xBI = pB[1];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++ ;
twI = *pCoeff++ ;
// U1 = XA(1) + XB(1); % It is real
t1a = xBR + xAR ;
// U2 = XB(1) - XA(1); % It is imaginary
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
*pOut++ = 0.5f * ( t1a + t1b );
*pOut++ = 0.5f * ( t1a - t1b );
// XA(1) = 1/2*( U1 - imag(U2) + i*( U1 +imag(U2) ));
pB = p + 2*k;
pA += 2;
do
{
/*
function X = my_split_rfft(X, ifftFlag)
% X is a series of real numbers
L = length(X);
XC = X(1:2:end) +i*X(2:2:end);
XA = fft(XC);
XB = conj(XA([1 end:-1:2]));
TW = i*exp(-2*pi*i*[0:L/2-1]/L).';
for l = 2:L/2
XA(l) = 1/2 * (XA(l) + XB(l) + TW(l) * (XB(l) - XA(l)));
end
XA(1) = 1/2* (XA(1) + XB(1) + TW(1) * (XB(1) - XA(1))) + i*( 1/2*( XA(1) + XB(1) + i*( XA(1) - XB(1))));
X = XA;
*/
xBI = pB[1];
xBR = pB[0];
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xBR - xAR ;
t1b = xBI + xAI ;
// real(tw * (xB - xA)) = twR * (xBR - xAR) - twI * (xBI - xAI);
// imag(tw * (xB - xA)) = twI * (xBR - xAR) + twR * (xBI - xAI);
p0 = twR * t1a;
p1 = twI * t1a;
p2 = twR * t1b;
p3 = twI * t1b;
*pOut++ = 0.5f * (xAR + xBR + p0 + p3 ); //xAR
*pOut++ = 0.5f * (xAI - xBI + p1 - p2 ); //xAI
pA += 2;
pB -= 2;
k--;
} while (k > 0U);
}
/* Prepares data for inverse cfft */
void merge_rfft_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut)
{
uint32_t k; /* Loop Counter */
float32_t twR, twI; /* RFFT Twiddle coefficients */
float32_t *pCoeff = S->pTwiddleRFFT; /* Points to RFFT Twiddle factors */
float32_t *pA = p; /* increasing pointer */
float32_t *pB = p; /* decreasing pointer */
float32_t xAR, xAI, xBR, xBI; /* temporary variables */
float32_t t1a, t1b, r, s, t, u; /* temporary variables */
k = (S->Sint).fftLen - 1;
xAR = pA[0];
xAI = pA[1];
pCoeff += 2 ;
*pOut++ = 0.5f * ( xAR + xAI );
*pOut++ = 0.5f * ( xAR - xAI );
pB = p + 2*k ;
pA += 2 ;
while (k > 0U)
{
/* G is half of the frequency complex spectrum */
//for k = 2:N
// Xk(k) = 1/2 * (G(k) + conj(G(N-k+2)) + Tw(k)*( G(k) - conj(G(N-k+2))));
xBI = pB[1] ;
xBR = pB[0] ;
xAR = pA[0];
xAI = pA[1];
twR = *pCoeff++;
twI = *pCoeff++;
t1a = xAR - xBR ;
t1b = xAI + xBI ;
r = twR * t1a;
s = twI * t1b;
t = twI * t1a;
u = twR * t1b;
// real(tw * (xA - xB)) = twR * (xAR - xBR) - twI * (xAI - xBI);
// imag(tw * (xA - xB)) = twI * (xAR - xBR) + twR * (xAI - xBI);
*pOut++ = 0.5f * (xAR + xBR - r - s ); //xAR
*pOut++ = 0.5f * (xAI - xBI + t - u ); //xAI
pA += 2;
pB -= 2;
k--;
}
}
/**
* @ingroup groupTransforms
*/
/**
* @defgroup RealFFT Real FFT Functions
*
* \par
* The CMSIS DSP library includes specialized algorithms for computing the
* FFT of real data sequences. The FFT is defined over complex data but
* in many applications the input is real. Real FFT algorithms take advantage
* of the symmetry properties of the FFT and have a speed advantage over complex
* algorithms of the same length.
* \par
* The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
* \par
* The real length N forward FFT of a sequence is computed using the steps shown below.
* \par
* \image html RFFT.gif "Real Fast Fourier Transform"
* \par
* The real sequence is initially treated as if it were complex to perform a CFFT.
* Later, a processing stage reshapes the data to obtain half of the frequency spectrum
* in complex format. Except the first complex number that contains the two real numbers
* X[0] and X[N/2] all the data is complex. In other words, the first complex sample
* contains two real values packed.
* \par
* The input for the inverse RFFT should keep the same format as the output of the
* forward RFFT. A first processing stage pre-process the data to later perform an
* inverse CFFT.
* \par
* \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
* \par
* The algorithms for floating-point, Q15, and Q31 data are slightly different
* and we describe each algorithm in turn.
* \par Floating-point
* The main functions are arm_rfft_fast_f32() and arm_rfft_fast_init_f32().
* The older functions arm_rfft_f32() and arm_rfft_init_f32() have been
* deprecated but are still documented.
* \par
* The FFT of a real N-point sequence has even symmetry in the frequency
* domain. The second half of the data equals the conjugate of the first
* half flipped in frequency. Looking at the data, we see that we can
* uniquely represent the FFT using only N/2 complex numbers. These are
* packed into the output array in alternating real and imaginary
* components:
* \par
* X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
* real[(N/2)-1], imag[(N/2)-1 }
* \par
* It happens that the first complex number (real[0], imag[0]) is actually
* all real. real[0] represents the DC offset, and imag[0] should be 0.
* (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
* the first harmonic and so on.
* \par
* The real FFT functions pack the frequency domain data in this fashion.
* The forward transform outputs the data in this form and the inverse
* transform expects input data in this form. The function always performs
* the needed bitreversal so that the input and output data is always in
* normal order. The functions support lengths of [32, 64, 128, ..., 4096]
* samples.
* \par Q15 and Q31
* The real algorithms are defined in a similar manner and utilize N/2 complex
* transforms behind the scenes.
* \par
* The complex transforms used internally include scaling to prevent fixed-point
* overflows. The overall scaling equals 1/(fftLen/2).
* \par
* A separate instance structure must be defined for each transform used but
* twiddle factor and bit reversal tables can be reused.
* \par
* There is also an associated initialization function for each data type.
* The initialization function performs the following operations:
* - Sets the values of the internal structure fields.
* - Initializes twiddle factor table and bit reversal table pointers.
* - Initializes the internal complex FFT data structure.
* \par
* Use of the initialization function is optional.
* However, if the initialization function is used, then the instance structure
* cannot be placed into a const data section. To place an instance structure
* into a const data section, the instance structure should be manually
* initialized as follows:
* <pre>
*arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
*arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
* </pre>
* where <code>fftLenReal</code> is the length of the real transform;
* <code>fftLenBy2</code> length of the internal complex transform.
* <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
* <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
* output (=1).
* <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
* The value is based on the FFT length;
* <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
* <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
* <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
* must also be initialized. Refer to arm_cfft_radix4_f32() for details regarding
* static initialization of the complex FFT instance structure.
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the floating-point real FFT.
* @param[in] *S points to an arm_rfft_fast_instance_f32 structure.
* @param[in] *p points to the input buffer.
* @param[in] *pOut points to the output buffer.
* @param[in] ifftFlag RFFT if flag is 0, RIFFT if flag is 1
* @return none.
*/
void arm_rfft_fast_f32(
arm_rfft_fast_instance_f32 * S,
float32_t * p, float32_t * pOut,
uint8_t ifftFlag)
{
arm_cfft_instance_f32 * Sint = &(S->Sint);
Sint->fftLen = S->fftLenRFFT / 2;
/* Calculation of Real FFT */
if (ifftFlag)
{
/* Real FFT compression */
merge_rfft_f32(S, p, pOut);
/* Complex radix-4 IFFT process */
arm_cfft_f32( Sint, pOut, ifftFlag, 1);
}
else
{
/* Calculation of RFFT of input */
arm_cfft_f32( Sint, p, ifftFlag, 1);
/* Real FFT extraction */
stage_rfft_f32(S, p, pOut);
}
}
/**
* @} end of RealFFT group
*/

View File

@ -0,0 +1,131 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cfft_init_f32.c
* Description: Split Radix Decimation in Frequency CFFT Floating point processing function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
#include "arm_common_tables.h"
/**
* @ingroup groupTransforms
*/
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Initialization function for the floating-point real FFT.
* @param[in,out] *S points to an arm_rfft_fast_instance_f32 structure.
* @param[in] fftLen length of the Real Sequence.
* @return The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
*
* \par Description:
* \par
* The parameter <code>fftLen</code> Specifies length of RFFT/CIFFT process. Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
* \par
* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
*/
arm_status arm_rfft_fast_init_f32(
arm_rfft_fast_instance_f32 * S,
uint16_t fftLen)
{
arm_cfft_instance_f32 * Sint;
/* Initialise the default arm status */
arm_status status = ARM_MATH_SUCCESS;
/* Initialise the FFT length */
Sint = &(S->Sint);
Sint->fftLen = fftLen/2;
S->fftLenRFFT = fftLen;
/* Initializations of structure parameters depending on the FFT length */
switch (Sint->fftLen)
{
case 2048U:
/* Initializations of structure parameters for 2048 point FFT */
/* Initialise the bit reversal table length */
Sint->bitRevLength = ARMBITREVINDEXTABLE_2048_TABLE_LENGTH;
/* Initialise the bit reversal table pointer */
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
/* Initialise the Twiddle coefficient pointers */
Sint->pTwiddle = (float32_t *) twiddleCoef_2048;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_4096;
break;
case 1024U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_1024_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
Sint->pTwiddle = (float32_t *) twiddleCoef_1024;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_2048;
break;
case 512U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
Sint->pTwiddle = (float32_t *) twiddleCoef_512;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_1024;
break;
case 256U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
Sint->pTwiddle = (float32_t *) twiddleCoef_256;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_512;
break;
case 128U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
Sint->pTwiddle = (float32_t *) twiddleCoef_128;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_256;
break;
case 64U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_64_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
Sint->pTwiddle = (float32_t *) twiddleCoef_64;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_128;
break;
case 32U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_32_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
Sint->pTwiddle = (float32_t *) twiddleCoef_32;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_64;
break;
case 16U:
Sint->bitRevLength = ARMBITREVINDEXTABLE_16_TABLE_LENGTH;
Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
Sint->pTwiddle = (float32_t *) twiddleCoef_16;
S->pTwiddleRFFT = (float32_t *) twiddleCoef_rfft_32;
break;
default:
/* Reporting argument error if fftSize is not valid value */
status = ARM_MATH_ARGUMENT_ERROR;
break;
}
return (status);
}
/**
* @} end of RealFFT group
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,426 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_q15.c
* Description: RFFT & RIFFT Q15 process function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal functions prototypes
* -------------------------------------------------------------------- */
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the Q15 RFFT/RIFFT.
* @param[in] *S points to an instance of the Q15 RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*
* \par Input an output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different RFFT sizes.
* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
* \par
* \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
* \par
* \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
*/
void arm_rfft_q15(
const arm_rfft_instance_q15 * S,
q15_t * pSrc,
q15_t * pDst)
{
const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
uint32_t i;
uint32_t L2 = S->fftLenReal >> 1;
/* Calculation of RIFFT of input */
if (S->ifftFlagR == 1U)
{
/* Real IFFT core process */
arm_split_rifft_q15(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex IFFT process */
arm_cfft_q15(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
for(i=0;i<S->fftLenReal;i++)
{
pDst[i] = pDst[i] << 1;
}
}
else
{
/* Calculation of RFFT of input */
/* Complex FFT process */
arm_cfft_q15(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
/* Real FFT core process */
arm_split_rfft_q15(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param *pSrc points to the input buffer.
* @param fftLen length of FFT.
* @param *pATable points to the A twiddle Coef buffer.
* @param *pBTable points to the B twiddle Coef buffer.
* @param *pDst points to the output buffer.
* @param modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
* The function implements a Real FFT
*/
void arm_split_rfft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
#if defined (ARM_MATH_DSP)
q15_t *pD1, *pD2;
#endif
// pSrc[2U * fftLen] = pSrc[0];
// pSrc[(2U * fftLen) + 1U] = pSrc[1];
pCoefA = &pATable[modifier * 2U];
pCoefB = &pBTable[modifier * 2U];
pSrc1 = &pSrc[2];
pSrc2 = &pSrc[(2U * fftLen) - 2U];
#if defined (ARM_MATH_DSP)
/* Run the below code for Cortex-M4 and Cortex-M3 */
i = 1U;
pD1 = pDst + 2;
pD2 = pDst + (4U * fftLen) - 2;
for(i = fftLen - 1; i > 0; i--)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
#ifndef ARM_MATH_BIG_ENDIAN
/* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
#else
/* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 16U;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
#else
outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
/* write output */
*pD1++ = (q15_t) outR;
*pD1++ = outI >> 16U;
/* write complex conjugate output */
pD2[0] = (q15_t) outR;
pD2[1] = -(outI >> 16U);
pD2 -= 2;
/* update coefficient pointer */
pCoefB = pCoefB + (2U * modifier);
pCoefA = pCoefA + (2U * modifier);
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2U * fftLen) + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
#else
/* Run the below code for Cortex-M0 */
i = 1U;
while (i < fftLen)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc1 * *pCoefA;
outR = outR - (*(pSrc1 + 1) * *(pCoefA + 1));
outR = outR + (*pSrc2 * *pCoefB);
outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *pSrc2 * *(pCoefB + 1);
outI = outI - (*(pSrc2 + 1) * *pCoefB);
outI = outI + (*(pSrc1 + 1) * *pCoefA);
outI = outI + (*pSrc1 * *(pCoefA + 1));
/* update input pointers */
pSrc1 += 2U;
pSrc2 -= 2U;
/* write output */
pDst[2U * i] = (q15_t) outR;
pDst[(2U * i) + 1U] = outI >> 16U;
/* write complex conjugate output */
pDst[(4U * fftLen) - (2U * i)] = (q15_t) outR;
pDst[((4U * fftLen) - (2U * i)) + 1U] = -(outI >> 16U);
/* update coefficient pointer */
pCoefB = pCoefB + (2U * modifier);
pCoefA = pCoefA + (2U * modifier);
i++;
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2U * fftLen) + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
#endif /* #if defined (ARM_MATH_DSP) */
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
* The function implements a Real IFFT
*/
void arm_split_rifft_q15(
q15_t * pSrc,
uint32_t fftLen,
q15_t * pATable,
q15_t * pBTable,
q15_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q15_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q15_t *pSrc1, *pSrc2;
q15_t *pDst1 = &pDst[0];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
pSrc1 = &pSrc[0];
pSrc2 = &pSrc[2U * fftLen];
#if defined (ARM_MATH_DSP)
/* Run the below code for Cortex-M4 and Cortex-M3 */
i = fftLen;
while (i > 0U)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
#ifndef ARM_MATH_BIG_ENDIAN
/* pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
#else
/* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] */
outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 16U;
/*
-pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
/* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
#ifndef ARM_MATH_BIG_ENDIAN
outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
#else
outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* write output */
#ifndef ARM_MATH_BIG_ENDIAN
*__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 16U), 16);
#else
*__SIMD32(pDst1)++ = __PKHBT((outI >> 16U), outR, 16);
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
/* update coefficient pointer */
pCoefB = pCoefB + (2U * modifier);
pCoefA = pCoefA + (2U * modifier);
i--;
}
#else
/* Run the below code for Cortex-M0 */
i = fftLen;
while (i > 0U)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
outR = *pSrc2 * *pCoefB;
outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
outR = outR + (*pSrc1 * *pCoefA);
outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
/*
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
outI = *(pSrc1 + 1) * *pCoefA;
outI = outI - (*pSrc1 * *(pCoefA + 1));
outI = outI - (*pSrc2 * *(pCoefB + 1));
outI = outI - (*(pSrc2 + 1) * *(pCoefB));
/* update input pointers */
pSrc1 += 2U;
pSrc2 -= 2U;
/* write output */
*pDst1++ = (q15_t) outR;
*pDst1++ = (q15_t) (outI >> 16);
/* update coefficient pointer */
pCoefB = pCoefB + (2U * modifier);
pCoefA = pCoefA + (2U * modifier);
i--;
}
#endif /* #if defined (ARM_MATH_DSP) */
}

View File

@ -0,0 +1,283 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_rfft_q31.c
* Description: FFT & RIFFT Q31 process function
*
* $Date: 27. January 2017
* $Revision: V.1.5.1
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_math.h"
/* ----------------------------------------------------------------------
* Internal functions prototypes
* -------------------------------------------------------------------- */
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier);
/**
* @addtogroup RealFFT
* @{
*/
/**
* @brief Processing function for the Q31 RFFT/RIFFT.
* @param[in] *S points to an instance of the Q31 RFFT/RIFFT structure.
* @param[in] *pSrc points to the input buffer.
* @param[out] *pDst points to the output buffer.
* @return none.
*
* \par Input an output formats:
* \par
* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
* Hence the output format is different for different RFFT sizes.
* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
* \par
* \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
*
* \par
* \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
*/
void arm_rfft_q31(
const arm_rfft_instance_q31 * S,
q31_t * pSrc,
q31_t * pDst)
{
const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
uint32_t i;
uint32_t L2 = S->fftLenReal >> 1;
/* Calculation of RIFFT of input */
if (S->ifftFlagR == 1U)
{
/* Real IFFT core process */
arm_split_rifft_q31(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
/* Complex IFFT process */
arm_cfft_q31(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
for(i=0;i<S->fftLenReal;i++)
{
pDst[i] = pDst[i] << 1;
}
}
else
{
/* Calculation of RFFT of input */
/* Complex FFT process */
arm_cfft_q31(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
/* Real FFT core process */
arm_split_rfft_q31(pSrc, L2, S->pTwiddleAReal,
S->pTwiddleBReal, pDst, S->twidCoefRModifier);
}
}
/**
* @} end of RealFFT group
*/
/**
* @brief Core Real FFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rfft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
uint32_t i; /* Loop Counter */
q31_t outR, outI; /* Temporary variables for output */
q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[(4U * fftLen) - 1U];
q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[(2U * fftLen) - 1U];
/* Init coefficient pointers */
pCoefA = &pATable[modifier * 2U];
pCoefB = &pBTable[modifier * 2U];
i = fftLen - 1U;
while (i > 0U)
{
/*
outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
+ pSrc[2 * n - 2 * i] * pBTable[2 * i] +
pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
*/
/* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pSrc[2 * i] * pATable[2 * i] */
mult_32x32_keep32_R(outR, *pIn1, CoefA1);
/* outI = pIn[2 * i] * pATable[2 * i + 1] */
mult_32x32_keep32_R(outI, *pIn1++, CoefA2);
/* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
multSub_32x32_keep32_R(outR, *pIn1, CoefA2);
/* (pIn[2 * i + 1] * pATable[2 * i] */
multAcc_32x32_keep32_R(outI, *pIn1++, CoefA1);
/* pSrc[2 * n - 2 * i] * pBTable[2 * i] */
multSub_32x32_keep32_R(outR, *pIn2, CoefA2);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
multSub_32x32_keep32_R(outI, *pIn2--, CoefB1);
/* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
multAcc_32x32_keep32_R(outR, *pIn2, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
multSub_32x32_keep32_R(outI, *pIn2--, CoefA2);
/* write output */
*pOut1++ = outR;
*pOut1++ = outI;
/* write complex conjugate output */
*pOut2-- = -outI;
*pOut2-- = outR;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2U);
pCoefA = pCoefA + ((modifier * 2U) - 1U);
i--;
}
pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
pDst[(2U * fftLen) + 1U] = 0;
pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
pDst[1] = 0;
}
/**
* @brief Core Real IFFT process
* @param[in] *pSrc points to the input buffer.
* @param[in] fftLen length of FFT.
* @param[in] *pATable points to the twiddle Coef A buffer.
* @param[in] *pBTable points to the twiddle Coef B buffer.
* @param[out] *pDst points to the output buffer.
* @param[in] modifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
* @return none.
*/
void arm_split_rifft_q31(
q31_t * pSrc,
uint32_t fftLen,
q31_t * pATable,
q31_t * pBTable,
q31_t * pDst,
uint32_t modifier)
{
q31_t outR, outI; /* Temporary variables for output */
q31_t *pCoefA, *pCoefB; /* Temporary pointers for twiddle factors */
q31_t CoefA1, CoefA2, CoefB1; /* Temporary variables for twiddle coefficients */
q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[(2U * fftLen) + 1U];
pCoefA = &pATable[0];
pCoefB = &pBTable[0];
while (fftLen > 0U)
{
/*
outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
pIn[2 * n - 2 * i] * pBTable[2 * i] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
*/
CoefA1 = *pCoefA++;
CoefA2 = *pCoefA;
/* outR = (pIn[2 * i] * pATable[2 * i] */
mult_32x32_keep32_R(outR, *pIn1, CoefA1);
/* - pIn[2 * i] * pATable[2 * i + 1] */
mult_32x32_keep32_R(outI, *pIn1++, -CoefA2);
/* pIn[2 * i + 1] * pATable[2 * i + 1] */
multAcc_32x32_keep32_R(outR, *pIn1, CoefA2);
/* pIn[2 * i + 1] * pATable[2 * i] */
multAcc_32x32_keep32_R(outI, *pIn1++, CoefA1);
/* pIn[2 * n - 2 * i] * pBTable[2 * i] */
multAcc_32x32_keep32_R(outR, *pIn2, CoefA2);
CoefB1 = *pCoefB;
/* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
multSub_32x32_keep32_R(outI, *pIn2--, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
multAcc_32x32_keep32_R(outR, *pIn2, CoefB1);
/* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
multAcc_32x32_keep32_R(outI, *pIn2--, CoefA2);
/* write output */
*pDst++ = outR;
*pDst++ = outI;
/* update coefficient pointer */
pCoefB = pCoefB + (modifier * 2U);
pCoefA = pCoefA + ((modifier * 2U) - 1U);
/* Decrement loop count */
fftLen--;
}
}