Official ARM version: v5.6.0

2020-07-28 11:24:49 +01:00 · 2020-07-28 11:24:49 +01:00 · 96d6da4e25
commit 96d6da4e25
parent 9f95ff5b6b
2939 changed files with 339304 additions and 113320 deletions
--- a/DSP/Source/TransformFunctions/CMakeLists.txt
+++ b/DSP/Source/TransformFunctions/CMakeLists.txt
@ -0,0 +1,116 @@
+cmake_minimum_required (VERSION 3.6)
+
+project(CMSISDSPTransform)
+
+
+
+add_library(CMSISDSPTransform STATIC)
+
+include(fft)
+fft(CMSISDSPTransform)
+
+if (CONFIGTABLE AND ALLFFT)
+target_compile_definitions(CMSISDSPTransform PUBLIC ARM_ALL_FFT_TABLES) 
+endif() 
+
+target_sources(CMSISDSPTransform PRIVATE arm_bitreversal.c)
+target_sources(CMSISDSPTransform PRIVATE arm_bitreversal2.c)
+
+if (NOT CONFIGTABLE OR ALLFFT OR CFFT_F32_16 OR CFFT_F32_32 OR CFFT_F32_64 OR CFFT_F32_128 OR CFFT_F32_256 OR CFFT_F32_512 
+    OR CFFT_F32_1024 OR CFFT_F32_2048 OR CFFT_F32_4096)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q15_16 OR CFFT_Q15_32 OR CFFT_Q15_64 OR CFFT_Q15_128 OR CFFT_Q15_256 OR CFFT_Q15_512 
+    OR CFFT_Q15_1024 OR CFFT_Q15_2048 OR CFFT_Q15_4096)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR CFFT_Q31_16 OR CFFT_Q31_32 OR CFFT_Q31_64 OR CFFT_Q31_128 OR CFFT_Q31_256 OR CFFT_Q31_512 
+    OR CFFT_Q31_1024 OR CFFT_Q31_2048 OR CFFT_Q31_4096)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix2_init_q31.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR DCT4_F32_128 OR DCT4_F32_512 OR DCT4_F32_2048 OR DCT4_F32_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_f32.c)
+
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR DCT4_Q31_128 OR DCT4_Q31_512 OR DCT4_Q31_2048 OR DCT4_Q31_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q31.c)
+
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR ALLFFT OR DCT4_Q15_128 OR DCT4_Q15_512 OR DCT4_Q15_2048 OR DCT4_Q15_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_dct4_q15.c)
+
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_FAST_F32_32 OR RFFT_FAST_F32_64 OR RFFT_FAST_F32_128
+   OR RFFT_FAST_F32_256 OR RFFT_FAST_F32_512 OR RFFT_FAST_F32_1024 OR RFFT_FAST_F32_2048
+   OR RFFT_FAST_F32_4096 )
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_fast_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix8_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_F32_128 OR RFFT_F32_512 OR RFFT_F32_2048 OR RFFT_F32_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_init_f32.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_f32.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q15_32 OR RFFT_Q15_64 OR RFFT_Q15_128 OR RFFT_Q15_256
+     OR RFFT_Q15_512 OR RFFT_Q15_1024 OR RFFT_Q15_2048 OR RFFT_Q15_4096 OR RFFT_Q15_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q15.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q15.c)
+endif()
+
+if (NOT CONFIGTABLE OR ALLFFT OR RFFT_Q31_32 OR RFFT_Q31_64 OR RFFT_Q31_128 OR RFFT_Q31_256
+     OR RFFT_Q31_512 OR RFFT_Q31_1024 OR RFFT_Q31_2048 OR RFFT_Q31_4096 OR RFFT_Q31_8192)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_init_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_rfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_q31.c)
+target_sources(CMSISDSPTransform PRIVATE arm_cfft_radix4_q31.c)
+endif()
+
+configdsp(CMSISDSPTransform ..)
+
+### Includes
+target_include_directories(CMSISDSPTransform PUBLIC "${DSP}/../../Include")
+
+
+
--- a/DSP/Source/TransformFunctions/TransformFunctions.c
+++ b/DSP/Source/TransformFunctions/TransformFunctions.c
@ -0,0 +1,60 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        TransformFunctions.c
+ * Description:  Combination of all transform function source files.
+ *
+ * $Date:        18. March 2019
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_bitreversal.c"
+#include "arm_bitreversal2.c"
+#include "arm_cfft_f32.c"
+#include "arm_cfft_q15.c"
+#include "arm_cfft_q31.c"
+#include "arm_cfft_radix2_f32.c"
+#include "arm_cfft_radix2_init_f32.c"
+#include "arm_cfft_radix2_init_q15.c"
+#include "arm_cfft_radix2_init_q31.c"
+#include "arm_cfft_radix2_q15.c"
+#include "arm_cfft_radix2_q31.c"
+#include "arm_cfft_radix4_f32.c"
+#include "arm_cfft_radix4_init_f32.c"
+#include "arm_cfft_radix4_init_q15.c"
+#include "arm_cfft_radix4_init_q31.c"
+#include "arm_cfft_radix4_q15.c"
+#include "arm_cfft_radix4_q31.c"
+#include "arm_cfft_radix8_f32.c"
+#include "arm_dct4_f32.c"
+#include "arm_dct4_init_f32.c"
+#include "arm_dct4_init_q15.c"
+#include "arm_dct4_init_q31.c"
+#include "arm_dct4_q15.c"
+#include "arm_dct4_q31.c"
+#include "arm_rfft_f32.c"
+#include "arm_rfft_fast_f32.c"
+#include "arm_rfft_fast_init_f32.c"
+#include "arm_rfft_init_f32.c"
+#include "arm_rfft_init_q15.c"
+#include "arm_rfft_init_q31.c"
+#include "arm_rfft_q15.c"
+#include "arm_rfft_q31.c"
--- a/DSP/Source/TransformFunctions/arm_bitreversal.c
+++ b/DSP/Source/TransformFunctions/arm_bitreversal.c
@ -3,13 +3,13 @@
 * Title:        arm_bitreversal.c
 * Description:  Bitreversal functions
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,20 +29,20 @@
 #include "arm_math.h"
 #include "arm_common_tables.h"

-/*
-* @brief  In-place bit reversal function.
-* @param[in, out] *pSrc        points to the in-place buffer of floating-point data type.
-* @param[in]      fftSize      length of the FFT.
-* @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table.
-* @param[in]      *pBitRevTab  points to the bit reversal table.
-* @return none.
-*/
+/**
+  @brief         In-place floating-point bit reversal function.
+  @param[in,out] pSrc         points to in-place floating-point data buffer
+  @param[in]     fftSize      length of FFT
+  @param[in]     bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+  @param[in]     pBitRevTab   points to bit reversal table
+  @return        none
+ */

 void arm_bitreversal_f32(
-float32_t * pSrc,
-uint16_t fftSize,
-uint16_t bitRevFactor,
-uint16_t * pBitRevTab)
+        float32_t * pSrc,
+        uint16_t fftSize,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab)
 {
   uint16_t fftLenBy2, fftLenBy2p1;
   uint16_t i, j;
@ -100,21 +100,20 @@ uint16_t * pBitRevTab)
 }


-
-/*
-* @brief  In-place bit reversal function.
-* @param[in, out] *pSrc        points to the in-place buffer of Q31 data type.
-* @param[in]      fftLen       length of the FFT.
-* @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
-* @param[in]      *pBitRevTab  points to bit reversal table.
-* @return none.
+/**
+  @brief         In-place Q31 bit reversal function.
+  @param[in,out] pSrc         points to in-place Q31 data buffer.
+  @param[in]     fftLen       length of FFT.
+  @param[in]     bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+  @param[in]     pBitRevTab   points to bit reversal table
+  @return        none
 */

 void arm_bitreversal_q31(
-q31_t * pSrc,
-uint32_t fftLen,
-uint16_t bitRevFactor,
-uint16_t * pBitRevTable)
+        q31_t * pSrc,
+        uint32_t fftLen,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab)
 {
   uint32_t fftLenBy2, fftLenBy2p1, i, j;
   q31_t in;
@ -163,29 +162,29 @@ uint16_t * pBitRevTable)
      pSrc[(2U * (j + fftLenBy2)) + 1U] = in;

      /*  Reading the index for the bit reversal */
-      j = *pBitRevTable;
+      j = *pBitRevTab;

      /*  Updating the bit reversal index depending on the fft length */
-      pBitRevTable += bitRevFactor;
+      pBitRevTab += bitRevFactor;
   }
 }



-/*
-   * @brief  In-place bit reversal function.
-   * @param[in, out] *pSrc        points to the in-place buffer of Q15 data type.
-   * @param[in]      fftLen       length of the FFT.
-   * @param[in]      bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
-   * @param[in]      *pBitRevTab  points to bit reversal table.
-   * @return none.
+/**
+  @brief         In-place Q15 bit reversal function.
+  @param[in,out] pSrc16       points to in-place Q15 data buffer
+  @param[in]     fftLen       length of FFT
+  @param[in]     bitRevFactor bit reversal modifier that supports different size FFTs with the same bit reversal table
+  @param[in]     pBitRevTab   points to bit reversal table
+  @return        none
 */

 void arm_bitreversal_q15(
-q15_t * pSrc16,
-uint32_t fftLen,
-uint16_t bitRevFactor,
-uint16_t * pBitRevTab)
+        q15_t * pSrc16,
+        uint32_t fftLen,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab)
 {
   q31_t *pSrc = (q31_t *) pSrc16;
   q31_t in;
--- a/DSP/Source/TransformFunctions/arm_bitreversal2.S
+++ b/DSP/Source/TransformFunctions/arm_bitreversal2.S
@ -5,13 +5,13 @@
 ; *               Called after doing an fft to reorder the output.
 ; *               The function is loop unrolled by 2. arm_bitreversal_16 as well.
 ; *
-; * $Date:        27. January 2017
-; * $Revision:    V.1.5.1
+; * $Date:        18. March 2019
+; * $Revision:    V1.5.2
 ; *
 ; * Target Processor: Cortex-M cores
 ; * -------------------------------------------------------------------- */
 ;/*
-; * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+; * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 ; *
 ; * SPDX-License-Identifier: Apache-2.0
 ; *
@ -68,13 +68,13 @@
 	CODESECT
 	THUMB

-;/*
-;* @brief  In-place bit reversal function.
-;* @param[in, out] *pSrc        points to the in-place buffer of unknown 32-bit data type.
-;* @param[in]      bitRevLen    bit reversal table length
-;* @param[in]      *pBitRevTab  points to bit reversal table.
-;* @return none.
-;*/
+;/**
+;  @brief         In-place bit reversal function.
+;  @param[in,out] pSrc        points to the in-place buffer of unknown 32-bit data type
+;  @param[in]     bitRevLen   bit reversal table length
+;  @param[in]     pBitRevTab  points to bit reversal table
+;  @return        none
+; */
 	EXPORT arm_bitreversal_32
 	EXPORT arm_bitreversal_16

@ -87,7 +87,7 @@
 	.type   arm_bitreversal_32, %function
 #endif

-#if defined(ARM_MATH_CM0) || defined(ARM_MATH_CM0PLUS) || defined(ARM_MATH_ARMV8MBL)
+#if defined (ARM_MATH_CM0_FAMILY)

 arm_bitreversal_32 PROC
 	ADDS     r3,r1,#1
--- a/DSP/Source/TransformFunctions/arm_bitreversal2.c
+++ b/DSP/Source/TransformFunctions/arm_bitreversal2.c
@ -0,0 +1,99 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_bitreversal2.c
+ * Description:  Bitreversal functions
+ *
+ * $Date:        18. March 2019
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_math.h"
+#include "arm_common_tables.h"
+
+/**
+  @brief         In-place 32 bit reversal function.
+  @param[in,out] pSrc        points to in-place buffer of unknown 32-bit data type
+  @param[in]     bitRevLen   bit reversal table length
+  @param[in]     pBitRevTab  points to bit reversal table
+  @return        none
+*/
+
+void arm_bitreversal_32(
+        uint32_t *pSrc, 
+  const uint16_t bitRevLen, 
+  const uint16_t *pBitRevTab)
+{
+  uint32_t a, b, i, tmp;
+
+  for (i = 0; i < bitRevLen; )
+  {
+     a = pBitRevTab[i    ] >> 2;
+     b = pBitRevTab[i + 1] >> 2;
+
+     //real
+     tmp = pSrc[a];
+     pSrc[a] = pSrc[b];
+     pSrc[b] = tmp;
+
+     //complex
+     tmp = pSrc[a+1];
+     pSrc[a+1] = pSrc[b+1];
+     pSrc[b+1] = tmp;
+
+    i += 2;
+  }
+}
+
+
+/**
+  @brief         In-place 16 bit reversal function.
+  @param[in,out] pSrc        points to in-place buffer of unknown 16-bit data type
+  @param[in]     bitRevLen   bit reversal table length
+  @param[in]     pBitRevTab  points to bit reversal table
+  @return        none
+*/
+
+void arm_bitreversal_16(
+        uint16_t *pSrc, 
+  const uint16_t bitRevLen, 
+  const uint16_t *pBitRevTab)
+{
+  uint16_t a, b, i, tmp;
+
+  for (i = 0; i < bitRevLen; )
+  {
+     a = pBitRevTab[i    ] >> 2;
+     b = pBitRevTab[i + 1] >> 2;
+
+     //real
+     tmp = pSrc[a];
+     pSrc[a] = pSrc[b];
+     pSrc[b] = tmp;
+
+     //complex
+     tmp = pSrc[a+1];
+     pSrc[a+1] = pSrc[b+1];
+     pSrc[b+1] = tmp;
+
+    i += 2;
+  }
+}
--- a/DSP/Source/TransformFunctions/arm_cfft_f32.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_f32.c
--- a/DSP/Source/TransformFunctions/arm_cfft_q15.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_q15.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_q15.c
 * Description:  Combined Radix Decimation in Q15 Frequency CFFT processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,317 +29,304 @@
 #include "arm_math.h"

 extern void arm_radix4_butterfly_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    q15_t * pCoef,
-    uint32_t twidCoefModifier);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef,
+        uint32_t twidCoefModifier);

 extern void arm_radix4_butterfly_inverse_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    q15_t * pCoef,
-    uint32_t twidCoefModifier);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef,
+        uint32_t twidCoefModifier);

 extern void arm_bitreversal_16(
-    uint16_t * pSrc,
-    const uint16_t bitRevLen,
-    const uint16_t * pBitRevTable);
+        uint16_t * pSrc,
+  const uint16_t bitRevLen,
+  const uint16_t * pBitRevTable);

 void arm_cfft_radix4by2_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    const q15_t * pCoef);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef);

 void arm_cfft_radix4by2_inverse_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    const q15_t * pCoef);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef);

 /**
-* @ingroup groupTransforms
-*/
+  @ingroup groupTransforms
+ */

 /**
-* @addtogroup ComplexFFT
-* @{
-*/
+  @addtogroup ComplexFFT
+  @{
+ */

 /**
-* @details
-* @brief       Processing function for the Q15 complex FFT.
-* @param[in]      *S    points to an instance of the Q15 CFFT structure.
-* @param[in, out] *p1   points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return none.
-*/
+  @brief         Processing function for Q15 complex FFT.
+  @param[in]     S               points to an instance of Q15 CFFT structure
+  @param[in,out] p1              points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        none
+ */

 void arm_cfft_q15(
-    const arm_cfft_instance_q15 * S,
-    q15_t * p1,
-    uint8_t ifftFlag,
-    uint8_t bitReverseFlag)
+  const arm_cfft_instance_q15 * S,
+        q15_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag)
 {
-    uint32_t L = S->fftLen;
+  uint32_t L = S->fftLen;

-    if (ifftFlag == 1U)
-    {
-        switch (L)
-        {
-        case 16:
-        case 64:
-        case 256:
-        case 1024:
-        case 4096:
-            arm_radix4_butterfly_inverse_q15  ( p1, L, (q15_t*)S->pTwiddle, 1 );
-            break;
+  if (ifftFlag == 1U)
+  {
+     switch (L)
+     {
+     case 16:
+     case 64:
+     case 256:
+     case 1024:
+     case 4096:
+       arm_radix4_butterfly_inverse_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
+       break;

-        case 32:
-        case 128:
-        case 512:
-        case 2048:
-            arm_cfft_radix4by2_inverse_q15  ( p1, L, S->pTwiddle );
-            break;
-        }
-    }
-    else
-    {
-        switch (L)
-        {
-        case 16:
-        case 64:
-        case 256:
-        case 1024:
-        case 4096:
-            arm_radix4_butterfly_q15  ( p1, L, (q15_t*)S->pTwiddle, 1 );
-            break;
+     case 32:
+     case 128:
+     case 512:
+     case 2048:
+       arm_cfft_radix4by2_inverse_q15 ( p1, L, S->pTwiddle );
+       break;
+     }
+  }
+  else
+  {
+     switch (L)
+     {
+     case 16:
+     case 64:
+     case 256:
+     case 1024:
+     case 4096:
+       arm_radix4_butterfly_q15  ( p1, L, (q15_t*)S->pTwiddle, 1 );
+       break;

-        case 32:
-        case 128:
-        case 512:
-        case 2048:
-            arm_cfft_radix4by2_q15  ( p1, L, S->pTwiddle );
-            break;
-        }
-    }
+     case 32:
+     case 128:
+     case 512:
+     case 2048:
+       arm_cfft_radix4by2_q15  ( p1, L, S->pTwiddle );
+       break;
+     }
+  }

-    if ( bitReverseFlag )
-        arm_bitreversal_16((uint16_t*)p1,S->bitRevLength,S->pBitRevTable);
+  if ( bitReverseFlag )
+    arm_bitreversal_16 ((uint16_t*) p1, S->bitRevLength, S->pBitRevTable);
 }

 /**
-* @} end of ComplexFFT group
-*/
+  @} end of ComplexFFT group
+ */

 void arm_cfft_radix4by2_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    const q15_t * pCoef)
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef)
 {
-    uint32_t i;
-    uint32_t n2;
-    q15_t p0, p1, p2, p3;
+        uint32_t i;
+        uint32_t n2;
+        q15_t p0, p1, p2, p3;
 #if defined (ARM_MATH_DSP)
-    q31_t T, S, R;
-    q31_t coeff, out1, out2;
-    const q15_t *pC = pCoef;
-    q15_t *pSi = pSrc;
-    q15_t *pSl = pSrc + fftLen;
+        q31_t T, S, R;
+        q31_t coeff, out1, out2;
+  const q15_t *pC = pCoef;
+        q15_t *pSi = pSrc;
+        q15_t *pSl = pSrc + fftLen;
 #else
-    uint32_t ia, l;
-    q15_t xt, yt, cosVal, sinVal;
+        uint32_t l;
+        q15_t xt, yt, cosVal, sinVal;
 #endif

-    n2 = fftLen >> 1;
+  n2 = fftLen >> 1U;

 #if defined (ARM_MATH_DSP)

-    for (i = n2; i > 0; i--)
-    {
-        coeff = _SIMD32_OFFSET(pC);
-        pC += 2;
+  for (i = n2; i > 0; i--)
+  {
+      coeff = read_q15x2_ia ((q15_t **) &pC);

-        T = _SIMD32_OFFSET(pSi);
-        T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
+      T = read_q15x2 (pSi);
+      T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */

-        S = _SIMD32_OFFSET(pSl);
-        S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
+      S = read_q15x2 (pSl);
+      S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */

-        R = __QSUB16(T, S);
+      R = __QSUB16(T, S);

-        _SIMD32_OFFSET(pSi) = __SHADD16(T, S);
-        pSi += 2;
+      write_q15x2_ia (&pSi, __SHADD16(T, S));

-    #ifndef ARM_MATH_BIG_ENDIAN
+#ifndef ARM_MATH_BIG_ENDIAN
+      out1 = __SMUAD(coeff, R) >> 16U;
+      out2 = __SMUSDX(coeff, R);
+#else
+      out1 = __SMUSDX(R, coeff) >> 16U;
+      out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-        out1 = __SMUAD(coeff, R) >> 16;
-        out2 = __SMUSDX(coeff, R);
+      write_q15x2_ia (&pSl, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+  }

-    #else
+#else /* #if defined (ARM_MATH_DSP) */

-        out1 = __SMUSDX(R, coeff) >> 16U;
-        out2 = __SMUAD(coeff, R);
+  for (i = 0; i < n2; i++)
+  {
+     cosVal = pCoef[2 * i];
+     sinVal = pCoef[2 * i + 1];

-    #endif //     #ifndef ARM_MATH_BIG_ENDIAN
+     l = i + n2;

-        _SIMD32_OFFSET(pSl) =
-        (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
-        pSl += 2;
-    }
+     xt =           (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+     pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;

-#else //    #if defined (ARM_MATH_DSP)
+     yt =               (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+     pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;

-    ia = 0;
-    for (i = 0; i < n2; i++)
-    {
-        cosVal = pCoef[ia * 2];
-        sinVal = pCoef[(ia * 2) + 1];
-        ia++;
+     pSrc[2 * l]     = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) +
+                        ((int16_t) (((q31_t) yt * sinVal) >> 16U))  );

-        l = i + n2;
+     pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) -
+                        ((int16_t) (((q31_t) xt * sinVal) >> 16U))   );
+  }

-        xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
-        pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+#endif /* #if defined (ARM_MATH_DSP) */

-        yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
-        pSrc[2 * i + 1] =
-        ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+  /* first col */
+  arm_radix4_butterfly_q15( pSrc,          n2, (q15_t*)pCoef, 2U);

-        pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
-                  ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+  /* second col */
+  arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);

-        pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
-                       ((int16_t) (((q31_t) xt * sinVal) >> 16)));
-    }
+  n2 = fftLen >> 1U;
+  for (i = 0; i < n2; i++)
+  {
+     p0 = pSrc[4 * i + 0];
+     p1 = pSrc[4 * i + 1];
+     p2 = pSrc[4 * i + 2];
+     p3 = pSrc[4 * i + 3];

-#endif //    #if defined (ARM_MATH_DSP)
+     p0 <<= 1U;
+     p1 <<= 1U;
+     p2 <<= 1U;
+     p3 <<= 1U;

-    // first col
-    arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2U);
-    // second col
-    arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
+     pSrc[4 * i + 0] = p0;
+     pSrc[4 * i + 1] = p1;
+     pSrc[4 * i + 2] = p2;
+     pSrc[4 * i + 3] = p3;
+  }

-    for (i = 0; i < fftLen >> 1; i++)
-    {
-        p0 = pSrc[4*i+0];
-        p1 = pSrc[4*i+1];
-        p2 = pSrc[4*i+2];
-        p3 = pSrc[4*i+3];
-
-        p0 <<= 1;
-        p1 <<= 1;
-        p2 <<= 1;
-        p3 <<= 1;
-
-        pSrc[4*i+0] = p0;
-        pSrc[4*i+1] = p1;
-        pSrc[4*i+2] = p2;
-        pSrc[4*i+3] = p3;
-    }
 }

 void arm_cfft_radix4by2_inverse_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    const q15_t * pCoef)
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef)
 {
-    uint32_t i;
-    uint32_t n2;
-    q15_t p0, p1, p2, p3;
+        uint32_t i;
+        uint32_t n2;
+        q15_t p0, p1, p2, p3;
 #if defined (ARM_MATH_DSP)
-    q31_t T, S, R;
-    q31_t coeff, out1, out2;
-    const q15_t *pC = pCoef;
-    q15_t *pSi = pSrc;
-    q15_t *pSl = pSrc + fftLen;
+        q31_t T, S, R;
+        q31_t coeff, out1, out2;
+  const q15_t *pC = pCoef;
+        q15_t *pSi = pSrc;
+        q15_t *pSl = pSrc + fftLen;
 #else
-    uint32_t ia, l;
-    q15_t xt, yt, cosVal, sinVal;
+        uint32_t l;
+        q15_t xt, yt, cosVal, sinVal;
 #endif

-    n2 = fftLen >> 1;
+  n2 = fftLen >> 1U;

 #if defined (ARM_MATH_DSP)

-    for (i = n2; i > 0; i--)
-    {
-        coeff = _SIMD32_OFFSET(pC);
-        pC += 2;
+  for (i = n2; i > 0; i--)
+  {
+     coeff = read_q15x2_ia ((q15_t **) &pC);

-        T = _SIMD32_OFFSET(pSi);
-        T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
+     T = read_q15x2 (pSi);
+     T = __SHADD16(T, 0); /* this is just a SIMD arithmetic shift right by 1 */

-        S = _SIMD32_OFFSET(pSl);
-        S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
+     S = read_q15x2 (pSl);
+     S = __SHADD16(S, 0); /* this is just a SIMD arithmetic shift right by 1 */

-        R = __QSUB16(T, S);
+     R = __QSUB16(T, S);

-        _SIMD32_OFFSET(pSi) = __SHADD16(T, S);
-        pSi += 2;
+     write_q15x2_ia (&pSi, __SHADD16(T, S));

-    #ifndef ARM_MATH_BIG_ENDIAN
+#ifndef ARM_MATH_BIG_ENDIAN
+     out1 = __SMUSD(coeff, R) >> 16U;
+     out2 = __SMUADX(coeff, R);
+#else
+     out1 = __SMUADX(R, coeff) >> 16U;
+     out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-        out1 = __SMUSD(coeff, R) >> 16;
-        out2 = __SMUADX(coeff, R);
-    #else
+     write_q15x2_ia (&pSl, (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));
+  }

-        out1 = __SMUADX(R, coeff) >> 16U;
-        out2 = __SMUSD(__QSUB(0, coeff), R);
+#else /* #if defined (ARM_MATH_DSP) */

-    #endif //     #ifndef ARM_MATH_BIG_ENDIAN
+  for (i = 0; i < n2; i++)
+  {
+     cosVal = pCoef[2 * i];
+     sinVal = pCoef[2 * i + 1];

-        _SIMD32_OFFSET(pSl) =
-        (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
-        pSl += 2;
-    }
+     l = i + n2;

-#else //    #if defined (ARM_MATH_DSP)
+     xt =           (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
+     pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;

-    ia = 0;
-    for (i = 0; i < n2; i++)
-    {
-        cosVal = pCoef[ia * 2];
-        sinVal = pCoef[(ia * 2) + 1];
-        ia++;
+     yt =               (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
+     pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;

-        l = i + n2;
-        xt = (pSrc[2 * i] >> 1U) - (pSrc[2 * l] >> 1U);
-        pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;
+     pSrc[2 * l]      = (((int16_t) (((q31_t) xt * cosVal) >> 16U)) -
+                         ((int16_t) (((q31_t) yt * sinVal) >> 16U))  );

-        yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
-        pSrc[2 * i + 1] =
-          ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+     pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16U)) +
+                        ((int16_t) (((q31_t) xt * sinVal) >> 16U))  );
+  }

-        pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
-                        ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+#endif /* #if defined (ARM_MATH_DSP) */

-        pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
-                           ((int16_t) (((q31_t) xt * sinVal) >> 16)));
-    }
+  /* first col */
+  arm_radix4_butterfly_inverse_q15( pSrc,          n2, (q15_t*)pCoef, 2U);

-#endif //    #if defined (ARM_MATH_DSP)
+  /* second col */
+  arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);

-    // first col
-    arm_radix4_butterfly_inverse_q15( pSrc, n2, (q15_t*)pCoef, 2U);
-    // second col
-    arm_radix4_butterfly_inverse_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2U);
+  n2 = fftLen >> 1U;
+  for (i = 0; i < n2; i++)
+  {
+     p0 = pSrc[4 * i + 0];
+     p1 = pSrc[4 * i + 1];
+     p2 = pSrc[4 * i + 2];
+     p3 = pSrc[4 * i + 3];

-    for (i = 0; i < fftLen >> 1; i++)
-    {
-        p0 = pSrc[4*i+0];
-        p1 = pSrc[4*i+1];
-        p2 = pSrc[4*i+2];
-        p3 = pSrc[4*i+3];
+     p0 <<= 1U;
+     p1 <<= 1U;
+     p2 <<= 1U;
+     p3 <<= 1U;

-        p0 <<= 1;
-        p1 <<= 1;
-        p2 <<= 1;
-        p3 <<= 1;
-
-        pSrc[4*i+0] = p0;
-        pSrc[4*i+1] = p1;
-        pSrc[4*i+2] = p2;
-        pSrc[4*i+3] = p3;
-    }
+     pSrc[4 * i + 0] = p0;
+     pSrc[4 * i + 1] = p1;
+     pSrc[4 * i + 2] = p2;
+     pSrc[4 * i + 3] = p3;
+  }
 }
-
--- a/DSP/Source/TransformFunctions/arm_cfft_q31.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_q31.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_q31.c
 * Description:  Combined Radix Decimation in Frequency CFFT fixed point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,224 +29,226 @@
 #include "arm_math.h"

 extern void arm_radix4_butterfly_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    q31_t * pCoef,
-    uint32_t twidCoefModifier);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef,
+        uint32_t twidCoefModifier);

 extern void arm_radix4_butterfly_inverse_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    q31_t * pCoef,
-    uint32_t twidCoefModifier);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef,
+        uint32_t twidCoefModifier);

 extern void arm_bitreversal_32(
-    uint32_t * pSrc,
-    const uint16_t bitRevLen,
-    const uint16_t * pBitRevTable);
+        uint32_t * pSrc,
+  const uint16_t bitRevLen,
+  const uint16_t * pBitRevTable);

 void arm_cfft_radix4by2_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    const q31_t * pCoef);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef);

 void arm_cfft_radix4by2_inverse_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    const q31_t * pCoef);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef);

 /**
-* @ingroup groupTransforms
-*/
+  @ingroup groupTransforms
+ */

 /**
-* @addtogroup ComplexFFT
-* @{
-*/
+  @addtogroup ComplexFFT
+  @{
+ */

 /**
-* @details
-* @brief       Processing function for the fixed-point complex FFT in Q31 format.
-* @param[in]      *S    points to an instance of the fixed-point CFFT structure.
-* @param[in, out] *p1   points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return none.
-*/
+  @brief         Processing function for the Q31 complex FFT.
+  @param[in]     S               points to an instance of the fixed-point CFFT structure
+  @param[in,out] p1              points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        none
+ */

 void arm_cfft_q31(
-    const arm_cfft_instance_q31 * S,
-    q31_t * p1,
-    uint8_t ifftFlag,
-    uint8_t bitReverseFlag)
+  const arm_cfft_instance_q31 * S,
+        q31_t * p1,
+        uint8_t ifftFlag,
+        uint8_t bitReverseFlag)
 {
-    uint32_t L = S->fftLen;
+  uint32_t L = S->fftLen;

-    if (ifftFlag == 1U)
-    {
-        switch (L)
-        {
-        case 16:
-        case 64:
-        case 256:
-        case 1024:
-        case 4096:
-            arm_radix4_butterfly_inverse_q31  ( p1, L, (q31_t*)S->pTwiddle, 1 );
-            break;
+  if (ifftFlag == 1U)
+  {
+     switch (L)
+     {
+     case 16:
+     case 64:
+     case 256:
+     case 1024:
+     case 4096:
+       arm_radix4_butterfly_inverse_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
+       break;

-        case 32:
-        case 128:
-        case 512:
-        case 2048:
-            arm_cfft_radix4by2_inverse_q31  ( p1, L, S->pTwiddle );
-            break;
-        }
-    }
-    else
-    {
-        switch (L)
-        {
-        case 16:
-        case 64:
-        case 256:
-        case 1024:
-        case 4096:
-            arm_radix4_butterfly_q31  ( p1, L, (q31_t*)S->pTwiddle, 1 );
-            break;
+     case 32:
+     case 128:
+     case 512:
+     case 2048:
+       arm_cfft_radix4by2_inverse_q31 ( p1, L, S->pTwiddle );
+       break;
+     }
+  }
+  else
+  {
+     switch (L)
+     {
+     case 16:
+     case 64:
+     case 256:
+     case 1024:
+     case 4096:
+       arm_radix4_butterfly_q31 ( p1, L, (q31_t*)S->pTwiddle, 1 );
+       break;

-        case 32:
-        case 128:
-        case 512:
-        case 2048:
-            arm_cfft_radix4by2_q31  ( p1, L, S->pTwiddle );
-            break;
-        }
-    }
+     case 32:
+     case 128:
+     case 512:
+     case 2048:
+       arm_cfft_radix4by2_q31 ( p1, L, S->pTwiddle );
+       break;
+     }
+  }

-    if ( bitReverseFlag )
-        arm_bitreversal_32((uint32_t*)p1,S->bitRevLength,S->pBitRevTable);
+  if ( bitReverseFlag )
+    arm_bitreversal_32 ((uint32_t*) p1, S->bitRevLength, S->pBitRevTable);
 }

 /**
-* @} end of ComplexFFT group
-*/
+  @} end of ComplexFFT group
+ */

 void arm_cfft_radix4by2_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    const q31_t * pCoef)
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef)
 {
-    uint32_t i, l;
-    uint32_t n2, ia;
-    q31_t xt, yt, cosVal, sinVal;
-    q31_t p0, p1;
+        uint32_t i, l;
+        uint32_t n2;
+        q31_t xt, yt, cosVal, sinVal;
+        q31_t p0, p1;

-    n2 = fftLen >> 1;
-    ia = 0;
-    for (i = 0; i < n2; i++)
-    {
-        cosVal = pCoef[2*ia];
-        sinVal = pCoef[2*ia + 1];
-        ia++;
+  n2 = fftLen >> 1U;
+  for (i = 0; i < n2; i++)
+  {
+     cosVal = pCoef[2 * i];
+     sinVal = pCoef[2 * i + 1];

-        l = i + n2;
-        xt = (pSrc[2 * i] >> 2) - (pSrc[2 * l] >> 2);
-        pSrc[2 * i] = (pSrc[2 * i] >> 2) + (pSrc[2 * l] >> 2);
+     l = i + n2;

-        yt = (pSrc[2 * i + 1] >> 2) - (pSrc[2 * l + 1] >> 2);
-        pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2) + (pSrc[2 * i + 1] >> 2);
+     xt =          (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
+     pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);

-        mult_32x32_keep32_R(p0, xt, cosVal);
-        mult_32x32_keep32_R(p1, yt, cosVal);
-        multAcc_32x32_keep32_R(p0, yt, sinVal);
-        multSub_32x32_keep32_R(p1, xt, sinVal);
+     yt =              (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
+     pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);

-        pSrc[2U * l] = p0 << 1;
-        pSrc[2U * l + 1U] = p1 << 1;
+     mult_32x32_keep32_R(p0, xt, cosVal);
+     mult_32x32_keep32_R(p1, yt, cosVal);
+     multAcc_32x32_keep32_R(p0, yt, sinVal);
+     multSub_32x32_keep32_R(p1, xt, sinVal);

-    }
+     pSrc[2 * l]     = p0 << 1;
+     pSrc[2 * l + 1] = p1 << 1;
+  }

-    // first col
-    arm_radix4_butterfly_q31( pSrc, n2, (q31_t*)pCoef, 2U);
-    // second col
-    arm_radix4_butterfly_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
+  /* first col */
+  arm_radix4_butterfly_q31 (pSrc,          n2, (q31_t*)pCoef, 2U);

-    for (i = 0; i < fftLen >> 1; i++)
-    {
-        p0 = pSrc[4*i+0];
-        p1 = pSrc[4*i+1];
-        xt = pSrc[4*i+2];
-        yt = pSrc[4*i+3];
+  /* second col */
+  arm_radix4_butterfly_q31 (pSrc + fftLen, n2, (q31_t*)pCoef, 2U);

-        p0 <<= 1;
-        p1 <<= 1;
-        xt <<= 1;
-        yt <<= 1;
+  n2 = fftLen >> 1U;
+  for (i = 0; i < n2; i++)
+  {
+     p0 = pSrc[4 * i + 0];
+     p1 = pSrc[4 * i + 1];
+     xt = pSrc[4 * i + 2];
+     yt = pSrc[4 * i + 3];

-        pSrc[4*i+0] = p0;
-        pSrc[4*i+1] = p1;
-        pSrc[4*i+2] = xt;
-        pSrc[4*i+3] = yt;
-    }
+     p0 <<= 1U;
+     p1 <<= 1U;
+     xt <<= 1U;
+     yt <<= 1U;
+
+     pSrc[4 * i + 0] = p0;
+     pSrc[4 * i + 1] = p1;
+     pSrc[4 * i + 2] = xt;
+     pSrc[4 * i + 3] = yt;
+  }

 }

 void arm_cfft_radix4by2_inverse_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    const q31_t * pCoef)
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef)
 {
-    uint32_t i, l;
-    uint32_t n2, ia;
-    q31_t xt, yt, cosVal, sinVal;
-    q31_t p0, p1;
+  uint32_t i, l;
+  uint32_t n2;
+  q31_t xt, yt, cosVal, sinVal;
+  q31_t p0, p1;

-    n2 = fftLen >> 1;
-    ia = 0;
-    for (i = 0; i < n2; i++)
-    {
-        cosVal = pCoef[2*ia];
-        sinVal = pCoef[2*ia + 1];
-        ia++;
+  n2 = fftLen >> 1U;
+  for (i = 0; i < n2; i++)
+  {
+     cosVal = pCoef[2 * i];
+     sinVal = pCoef[2 * i + 1];

-        l = i + n2;
-        xt = (pSrc[2 * i] >> 2) - (pSrc[2 * l] >> 2);
-        pSrc[2 * i] = (pSrc[2 * i] >> 2) + (pSrc[2 * l] >> 2);
+     l = i + n2;

-        yt = (pSrc[2 * i + 1] >> 2) - (pSrc[2 * l + 1] >> 2);
-        pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2) + (pSrc[2 * i + 1] >> 2);
+     xt =          (pSrc[2 * i] >> 2U) - (pSrc[2 * l] >> 2U);
+     pSrc[2 * i] = (pSrc[2 * i] >> 2U) + (pSrc[2 * l] >> 2U);

-        mult_32x32_keep32_R(p0, xt, cosVal);
-        mult_32x32_keep32_R(p1, yt, cosVal);
-        multSub_32x32_keep32_R(p0, yt, sinVal);
-        multAcc_32x32_keep32_R(p1, xt, sinVal);
+     yt =              (pSrc[2 * i + 1] >> 2U) - (pSrc[2 * l + 1] >> 2U);
+     pSrc[2 * i + 1] = (pSrc[2 * l + 1] >> 2U) + (pSrc[2 * i + 1] >> 2U);

-        pSrc[2U * l] = p0 << 1;
-        pSrc[2U * l + 1U] = p1 << 1;
+     mult_32x32_keep32_R(p0, xt, cosVal);
+     mult_32x32_keep32_R(p1, yt, cosVal);
+     multSub_32x32_keep32_R(p0, yt, sinVal);
+     multAcc_32x32_keep32_R(p1, xt, sinVal);

-    }
+     pSrc[2 * l]     = p0 << 1U;
+     pSrc[2 * l + 1] = p1 << 1U;
+  }

-    // first col
-    arm_radix4_butterfly_inverse_q31( pSrc, n2, (q31_t*)pCoef, 2U);
-    // second col
-    arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);
+  /* first col */
+  arm_radix4_butterfly_inverse_q31( pSrc,          n2, (q31_t*)pCoef, 2U);

-    for (i = 0; i < fftLen >> 1; i++)
-    {
-        p0 = pSrc[4*i+0];
-        p1 = pSrc[4*i+1];
-        xt = pSrc[4*i+2];
-        yt = pSrc[4*i+3];
+  /* second col */
+  arm_radix4_butterfly_inverse_q31( pSrc + fftLen, n2, (q31_t*)pCoef, 2U);

-        p0 <<= 1;
-        p1 <<= 1;
-        xt <<= 1;
-        yt <<= 1;
+  n2 = fftLen >> 1U;
+  for (i = 0; i < n2; i++)
+  {
+     p0 = pSrc[4 * i + 0];
+     p1 = pSrc[4 * i + 1];
+     xt = pSrc[4 * i + 2];
+     yt = pSrc[4 * i + 3];

-        pSrc[4*i+0] = p0;
-        pSrc[4*i+1] = p1;
-        pSrc[4*i+2] = xt;
-        pSrc[4*i+3] = yt;
-    }
+     p0 <<= 1U;
+     p1 <<= 1U;
+     xt <<= 1U;
+     yt <<= 1U;
+
+     pSrc[4 * i + 0] = p0;
+     pSrc[4 * i + 1] = p1;
+     pSrc[4 * i + 2] = xt;
+     pSrc[4 * i + 3] = yt;
+  }
 }
-
--- a/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix2_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix2_f32.c
 * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Floating point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,64 +29,62 @@
 #include "arm_math.h"

 void arm_radix2_butterfly_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pCoef,
-  uint16_t twidCoefModifier);
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier);

 void arm_radix2_butterfly_inverse_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pCoef,
-  uint16_t twidCoefModifier,
-  float32_t onebyfftLen);
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier,
+        float32_t onebyfftLen);

 extern void arm_bitreversal_f32(
-    float32_t * pSrc,
-    uint16_t fftSize,
-    uint16_t bitRevFactor,
-    uint16_t * pBitRevTab);
+        float32_t * pSrc,
+        uint16_t fftSize,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab);

 /**
-* @ingroup groupTransforms
-*/
+  @ingroup groupTransforms
+ */

 /**
-* @addtogroup ComplexFFT
-* @{
-*/
+  @addtogroup ComplexFFT
+  @{
+ */

 /**
-* @details
-* @brief Radix-2 CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_f32 and will be removed
-* in the future.
-* @param[in]      *S    points to an instance of the floating-point Radix-2 CFFT/CIFFT structure.
-* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
-* @return none.
-*/
+  @brief         Radix-2 CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future
+  @param[in]     S    points to an instance of the floating-point Radix-2 CFFT/CIFFT structure
+  @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+  @return        none
+ */

 void arm_cfft_radix2_f32(
 const arm_cfft_radix2_instance_f32 * S,
-float32_t * pSrc)
+      float32_t * pSrc)
 {

   if (S->ifftFlag == 1U)
   {
-      /*  Complex IFFT radix-2  */
+      /* Complex IFFT radix-2 */
      arm_radix2_butterfly_inverse_f32(pSrc, S->fftLen, S->pTwiddle,
      S->twidCoefModifier, S->onebyfftLen);
   }
   else
   {
-      /*  Complex FFT radix-2  */
+      /* Complex FFT radix-2 */
      arm_radix2_butterfly_f32(pSrc, S->fftLen, S->pTwiddle,
      S->twidCoefModifier);
   }

   if (S->bitReverseFlag == 1U)
   {
-      /*  Bit Reversal */
+      /* Bit Reversal */
      arm_bitreversal_f32(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
   }

@ -94,36 +92,36 @@ float32_t * pSrc)


 /**
-* @} end of ComplexFFT group
-*/
+  @} end of ComplexFFT group
+ */



 /* ----------------------------------------------------------------------
-** Internal helper function used by the FFTs
-** ------------------------------------------------------------------- */
+ ** Internal helper function used by the FFTs
+ ** ------------------------------------------------------------------- */

-/*
-* @brief  Core function for the floating-point CFFT butterfly process.
-* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
-* @param[in]      fftLen           length of the FFT.
-* @param[in]      *pCoef           points to the twiddle coefficient buffer.
-* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
-*/
+/**
+  brief  Core function for the floating-point CFFT butterfly process.
+  param[in,out] pSrc             points to in-place buffer of floating-point data type
+  param[in]     fftLen           length of the FFT
+  param[in]     pCoef            points to twiddle coefficient buffer
+  param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  return        none
+ */

 void arm_radix2_butterfly_f32(
-float32_t * pSrc,
-uint32_t fftLen,
-float32_t * pCoef,
-uint16_t twidCoefModifier)
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier)
 {

-   uint32_t i, j, k, l;
-   uint32_t n1, n2, ia;
-   float32_t xt, yt, cosVal, sinVal;
-   float32_t p0, p1, p2, p3;
-   float32_t a0, a1;
+        uint32_t i, j, k, l;
+        uint32_t n1, n2, ia;
+        float32_t xt, yt, cosVal, sinVal;
+        float32_t p0, p1, p2, p3;
+        float32_t a0, a1;

 #if defined (ARM_MATH_DSP)

@ -227,7 +225,7 @@ uint16_t twidCoefModifier)
      pSrc[2 * i + 3] = yt;
   }                             // groups loop end

-#else
+#else /* #if defined (ARM_MATH_DSP) */

   n2 = fftLen;

@ -275,24 +273,24 @@ uint16_t twidCoefModifier)
      twidCoefModifier <<= 1U;
   }

-#endif //    #if defined (ARM_MATH_DSP)
+#endif /* #if defined (ARM_MATH_DSP) */

 }


 void arm_radix2_butterfly_inverse_f32(
-float32_t * pSrc,
-uint32_t fftLen,
-float32_t * pCoef,
-uint16_t twidCoefModifier,
-float32_t onebyfftLen)
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier,
+        float32_t onebyfftLen)
 {

-   uint32_t i, j, k, l;
-   uint32_t n1, n2, ia;
-   float32_t xt, yt, cosVal, sinVal;
-   float32_t p0, p1, p2, p3;
-   float32_t a0, a1;
+        uint32_t i, j, k, l;
+        uint32_t n1, n2, ia;
+        float32_t xt, yt, cosVal, sinVal;
+        float32_t p0, p1, p2, p3;
+        float32_t a0, a1;

 #if defined (ARM_MATH_DSP)

@ -392,7 +390,7 @@ float32_t onebyfftLen)
      pSrc[2 * i + 3] = p3;
   }                             // butterfly loop end

-#else
+#else /* #if defined (ARM_MATH_DSP) */

   n2 = fftLen;

@ -461,12 +459,12 @@ float32_t onebyfftLen)
      p3 = yt * onebyfftLen;

      pSrc[2 * i] = p0;
-      pSrc[2U * l] = p2;
+      pSrc[2 * l] = p2;

      pSrc[2 * i + 1] = p1;
-      pSrc[2U * l + 1U] = p3;
+      pSrc[2 * l + 1] = p3;
   }                             // butterfly loop end

-#endif //      #if defined (ARM_MATH_DSP)
+#endif /* #if defined (ARM_MATH_DSP) */

 }
--- a/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix2_init_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix2_init_f32.c
 * Description:  Radix-2 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,36 +30,41 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */

 /**
-* @brief  Initialization function for the floating-point CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_f32 and will be removed
-* in the future.
-* @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.
-* @param[in]     fftLen         length of the FFT.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
-* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
-* \par
-* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
-* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
-* \par
-* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+  @brief         Initialization function for the floating-point CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
+  @param[in,out] S              points to an instance of the floating-point CFFT/CIFFT structure
+  @param[in]     fftLen         length of the FFT
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Details
+                   The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+                   Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
+  @par
+                   The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+                   Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+  @par
+                   The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
 */
+
 arm_status arm_cfft_radix2_init_f32(
  arm_cfft_radix2_instance_f32 * S,
  uint16_t fftLen,
@ -188,5 +193,5 @@ arm_status arm_cfft_radix2_init_f32(
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q15.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix2_init_q15.c
 * Description:  Radix-2 Decimation in Frequency Q15 FFT & IFFT initialization function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,35 +30,40 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */


 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */

 /**
-* @brief Initialization function for the Q15 CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q15 and will be removed
-* @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.
-* @param[in]     fftLen         length of the FFT.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
-* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
-* \par
-* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
-* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
-* \par
-* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+  @brief                        Initialization function for the Q15 CFFT/CIFFT.
+  @deprecated                   Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed
+  @param[in,out] S              points to an instance of the Q15 CFFT/CIFFT structure.
+  @param[in]     fftLen         length of the FFT.
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Details
+                   The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+                   Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
+  @par
+                   The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+                   Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+  @par
+                   The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
 */

 arm_status arm_cfft_radix2_init_q15(
@ -173,5 +178,5 @@ arm_status arm_cfft_radix2_init_q15(
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix2_init_q31.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix2_init_q31.c
 * Description:  Radix-2 Decimation in Frequency Fixed-point CFFT & CIFFT Initialization function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,36 +30,39 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */

-
 /**
-*
-* @brief  Initialization function for the Q31 CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q31 and will be removed
-* @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.
-* @param[in]     fftLen         length of the FFT.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
-* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
-* \par
-* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
-* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
-* \par
-* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+  @brief         Initialization function for the Q31 CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+  @param[in,out] S              points to an instance of the Q31 CFFT/CIFFT structure
+  @param[in]     fftLen         length of the FFT
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Details
+                   The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+                   Set(=1) ifftFlag for calculation of CIFFT otherwise CFFT is calculated
+  @par
+                   The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+                   Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+  @par
+                   The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
 */

 arm_status arm_cfft_radix2_init_q31(
@ -76,8 +79,10 @@ arm_status arm_cfft_radix2_init_q31(

  /*  Initialise the Twiddle coefficient pointer */
  S->pTwiddle = (q31_t *) twiddleCoef_4096_q31;
+
  /*  Initialise the Flag for selection of CFFT or CIFFT */
  S->ifftFlag = ifftFlag;
+
  /*  Initialise the Flag for calculation Bit reversal or not */
  S->bitReverseFlag = bitReverseFlag;

@ -170,5 +175,5 @@ arm_status arm_cfft_radix2_init_q31(
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix2_q15.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix2_q15.c
 * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,74 +29,71 @@
 #include "arm_math.h"

 void arm_radix2_butterfly_q15(
-  q15_t * pSrc,
-  uint32_t fftLen,
-  q15_t * pCoef,
-  uint16_t twidCoefModifier);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef,
+        uint16_t twidCoefModifier);

 void arm_radix2_butterfly_inverse_q15(
-  q15_t * pSrc,
-  uint32_t fftLen,
-  q15_t * pCoef,
-  uint16_t twidCoefModifier);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef,
+        uint16_t twidCoefModifier);

 void arm_bitreversal_q15(
-  q15_t * pSrc,
-  uint32_t fftLen,
-  uint16_t bitRevFactor,
-  uint16_t * pBitRevTab);
+        q15_t * pSrc,
+        uint32_t fftLen,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab);

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */

 /**
- * @details
- * @brief Processing function for the fixed-point CFFT/CIFFT.
- * @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q15 and will be removed
- * @param[in]      *S    points to an instance of the fixed-point CFFT/CIFFT structure.
- * @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
- * @return none.
+  @brief         Processing function for the fixed-point CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
+  @param[in]     S    points to an instance of the fixed-point CFFT/CIFFT structure
+  @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+  @return        none
 */

 void arm_cfft_radix2_q15(
  const arm_cfft_radix2_instance_q15 * S,
-  q15_t * pSrc)
+        q15_t * pSrc)
 {

  if (S->ifftFlag == 1U)
  {
-    arm_radix2_butterfly_inverse_q15(pSrc, S->fftLen,
-                                     S->pTwiddle, S->twidCoefModifier);
+    arm_radix2_butterfly_inverse_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  }
  else
  {
-    arm_radix2_butterfly_q15(pSrc, S->fftLen,
-                             S->pTwiddle, S->twidCoefModifier);
+    arm_radix2_butterfly_q15 (pSrc, S->fftLen, S->pTwiddle, S->twidCoefModifier);
  }

  arm_bitreversal_q15(pSrc, S->fftLen, S->bitRevFactor, S->pBitRevTable);
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */

 void arm_radix2_butterfly_q15(
-  q15_t * pSrc,
-  uint32_t fftLen,
-  q15_t * pCoef,
-  uint16_t twidCoefModifier)
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef,
+        uint16_t twidCoefModifier)
 {
 #if defined (ARM_MATH_DSP)

-  unsigned i, j, k, l;
-  unsigned n1, n2, ia;
+  uint32_t i, j, k, l;
+  uint32_t n1, n2, ia;
  q15_t in;
  q31_t T, S, R;
  q31_t coeff, out1, out2;
@ -111,215 +108,196 @@ void arm_radix2_butterfly_q15(
  // loop for groups
  for (i = 0; i < n2; i++)
  {
-    coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+    coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

    ia = ia + twidCoefModifier;

    l = i + n2;

-    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    T = read_q15x2 (pSrc + (2 * i));
    in = ((int16_t) (T & 0xFFFF)) >> 1;
    T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);

-    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    S = read_q15x2 (pSrc + (2 * l));
    in = ((int16_t) (S & 0xFFFF)) >> 1;
    S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);

    R = __QSUB16(T, S);

-    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+    write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
    out1 = __SMUAD(coeff, R) >> 16;
    out2 = __SMUSDX(coeff, R);
-
 #else
-
    out1 = __SMUSDX(R, coeff) >> 16U;
    out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+    write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

-    _SIMD32_OFFSET(pSrc + (2U * l)) =
-      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
-
-    coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+    coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

    ia = ia + twidCoefModifier;

-    // loop for butterfly
+    /* loop for butterfly */
    i++;
    l++;

-    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    T = read_q15x2 (pSrc + (2 * i));
    in = ((int16_t) (T & 0xFFFF)) >> 1;
    T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);

-    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    S = read_q15x2 (pSrc + (2 * l));
    in = ((int16_t) (S & 0xFFFF)) >> 1;
    S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);

    R = __QSUB16(T, S);

-    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+    write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
    out1 = __SMUAD(coeff, R) >> 16;
    out2 = __SMUSDX(coeff, R);
-
 #else

    out1 = __SMUSDX(R, coeff) >> 16U;
    out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+    write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

-    _SIMD32_OFFSET(pSrc + (2U * l)) =
-      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
-
-  }                             // groups loop end
+  } /* groups loop end */

  twidCoefModifier = twidCoefModifier << 1U;

-  // loop for stage
+  /* loop for stage */
  for (k = fftLen / 2; k > 2; k = k >> 1)
  {
    n1 = n2;
    n2 = n2 >> 1;
    ia = 0;

-    // loop for groups
+    /* loop for groups */
    for (j = 0; j < n2; j++)
    {
-      coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+      coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

      ia = ia + twidCoefModifier;

-      // loop for butterfly
+      /* loop for butterfly */
      for (i = j; i < fftLen; i += n1)
      {
        l = i + n2;

-        T = _SIMD32_OFFSET(pSrc + (2 * i));
+        T = read_q15x2 (pSrc + (2 * i));

-        S = _SIMD32_OFFSET(pSrc + (2 * l));
+        S = read_q15x2 (pSrc + (2 * l));

        R = __QSUB16(T, S);

-        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+        write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
        out1 = __SMUAD(coeff, R) >> 16;
        out2 = __SMUSDX(coeff, R);
-
 #else
-
        out1 = __SMUSDX(R, coeff) >> 16U;
        out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
-
-        _SIMD32_OFFSET(pSrc + (2U * l)) =
-          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+        write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

        i += n1;

        l = i + n2;

-        T = _SIMD32_OFFSET(pSrc + (2 * i));
+        T = read_q15x2 (pSrc + (2 * i));

-        S = _SIMD32_OFFSET(pSrc + (2 * l));
+        S = read_q15x2 (pSrc + (2 * l));

        R = __QSUB16(T, S);

-        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+        write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
        out1 = __SMUAD(coeff, R) >> 16;
        out2 = __SMUSDX(coeff, R);
-
 #else
-
        out1 = __SMUSDX(R, coeff) >> 16U;
        out2 = __SMUAD(coeff, R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+        write_q15x2 (pSrc + (2U * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

-        _SIMD32_OFFSET(pSrc + (2U * l)) =
-          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+      } /* butterfly loop end */

-      }                         // butterfly loop end
-
-    }                           // groups loop end
+    } /* groups loop end */

    twidCoefModifier = twidCoefModifier << 1U;
-  }                             // stages loop end
+  } /* stages loop end */

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+  coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

  ia = ia + twidCoefModifier;

-  // loop for butterfly
+  /* loop for butterfly */
  for (i = 0; i < fftLen; i += n1)
  {
    l = i + n2;

-    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    T = read_q15x2 (pSrc + (2 * i));

-    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    S = read_q15x2 (pSrc + (2 * l));

    R = __QSUB16(T, S);

-    _SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
+    write_q15x2 (pSrc + (2 * i), __QADD16(T, S));

-    _SIMD32_OFFSET(pSrc + (2U * l)) = R;
+    write_q15x2 (pSrc + (2 * l), R);

    i += n1;
    l = i + n2;

-    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    T = read_q15x2 (pSrc + (2 * i));

-    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    S = read_q15x2 (pSrc + (2 * l));

    R = __QSUB16(T, S);

-    _SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
+    write_q15x2 (pSrc + (2 * i), __QADD16(T, S));

-    _SIMD32_OFFSET(pSrc + (2U * l)) = R;
+    write_q15x2 (pSrc + (2 * l), R);

-  }                             // groups loop end
+  } /* groups loop end */


-#else
+#else /* #if defined (ARM_MATH_DSP) */

-  unsigned i, j, k, l;
-  unsigned n1, n2, ia;
+  uint32_t i, j, k, l;
+  uint32_t n1, n2, ia;
  q15_t xt, yt, cosVal, sinVal;


-  //N = fftLen;
+  // N = fftLen;
  n2 = fftLen;

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  // loop for groups
+  /* loop for groups */
  for (j = 0; j < n2; j++)
  {
-    cosVal = pCoef[ia * 2];
+    cosVal = pCoef[(ia * 2)];
    sinVal = pCoef[(ia * 2) + 1];
    ia = ia + twidCoefModifier;

-    // loop for butterfly
+    /* loop for butterfly */
    for (i = j; i < fftLen; i += n1)
    {
      l = i + n2;
@ -327,36 +305,36 @@ void arm_radix2_butterfly_q15(
      pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;

      yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
-      pSrc[2 * i + 1] =
-        ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+      pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
+                         (pSrc[2 * i + 1] >> 1U)  ) >> 1U;

-      pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
-                      ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+      pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
+                     ((int16_t) (((q31_t) yt * sinVal) >> 16)));

-      pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
-                           ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+      pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
+                          ((int16_t) (((q31_t) xt * sinVal) >> 16)));

-    }                           // butterfly loop end
+    } /* butterfly loop end */

-  }                             // groups loop end
+  } /* groups loop end */

  twidCoefModifier = twidCoefModifier << 1U;

-  // loop for stage
+  /* loop for stage */
  for (k = fftLen / 2; k > 2; k = k >> 1)
  {
    n1 = n2;
    n2 = n2 >> 1;
    ia = 0;

-    // loop for groups
+    /* loop for groups */
    for (j = 0; j < n2; j++)
    {
      cosVal = pCoef[ia * 2];
      sinVal = pCoef[(ia * 2) + 1];
      ia = ia + twidCoefModifier;

-      // loop for butterfly
+      /* loop for butterfly */
      for (i = j; i < fftLen; i += n1)
      {
        l = i + n2;
@ -366,24 +344,24 @@ void arm_radix2_butterfly_q15(
        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
        pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;

-        pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
-                        ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+        pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) +
+                       ((int16_t) (((q31_t) yt * sinVal) >> 16)));

-        pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
-                             ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+        pSrc[2U * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) -
+                            ((int16_t) (((q31_t) xt * sinVal) >> 16)));

-      }                         // butterfly loop end
+      } /* butterfly loop end */

-    }                           // groups loop end
+    } /* groups loop end */

    twidCoefModifier = twidCoefModifier << 1U;
-  }                             // stages loop end
+  } /* stages loop end */

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  // loop for groups
+  /* loop for groups */
  for (j = 0; j < n2; j++)
  {
    cosVal = pCoef[ia * 2];
@ -391,7 +369,7 @@ void arm_radix2_butterfly_q15(

    ia = ia + twidCoefModifier;

-    // loop for butterfly
+    /* loop for butterfly */
    for (i = j; i < fftLen; i += n1)
    {
      l = i + n2;
@ -401,244 +379,226 @@ void arm_radix2_butterfly_q15(
      yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
      pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);

-      pSrc[2U * l] = xt;
+      pSrc[2 * l] = xt;

-      pSrc[2U * l + 1U] = yt;
+      pSrc[2 * l + 1] = yt;

-    }                           // butterfly loop end
+    } /* butterfly loop end */

-  }                             // groups loop end
+  } /* groups loop end */

  twidCoefModifier = twidCoefModifier << 1U;

-#endif //             #if defined (ARM_MATH_DSP)
+#endif /* #if defined (ARM_MATH_DSP) */

 }


 void arm_radix2_butterfly_inverse_q15(
-  q15_t * pSrc,
-  uint32_t fftLen,
-  q15_t * pCoef,
-  uint16_t twidCoefModifier)
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pCoef,
+        uint16_t twidCoefModifier)
 {
 #if defined (ARM_MATH_DSP)

-  unsigned i, j, k, l;
-  unsigned n1, n2, ia;
-  q15_t in;
-  q31_t T, S, R;
-  q31_t coeff, out1, out2;
+        uint32_t i, j, k, l;
+        uint32_t n1, n2, ia;
+        q15_t in;
+        q31_t T, S, R;
+        q31_t coeff, out1, out2;

-  //N = fftLen;
+  // N = fftLen;
  n2 = fftLen;

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  // loop for groups
+  /* loop for groups */
  for (i = 0; i < n2; i++)
  {
-    coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+    coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

    ia = ia + twidCoefModifier;

    l = i + n2;

-    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    T = read_q15x2 (pSrc + (2 * i));
    in = ((int16_t) (T & 0xFFFF)) >> 1;
    T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);

-    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    S = read_q15x2 (pSrc + (2 * l));
    in = ((int16_t) (S & 0xFFFF)) >> 1;
    S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);

    R = __QSUB16(T, S);

-    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+    write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
    out1 = __SMUSD(coeff, R) >> 16;
    out2 = __SMUADX(coeff, R);
 #else
-
    out1 = __SMUADX(R, coeff) >> 16U;
    out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+    write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

-    _SIMD32_OFFSET(pSrc + (2U * l)) =
-      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
-
-    coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+    coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

    ia = ia + twidCoefModifier;

-    // loop for butterfly
+    /* loop for butterfly */
    i++;
    l++;

-    T = _SIMD32_OFFSET(pSrc + (2 * i));
+    T = read_q15x2 (pSrc + (2 * i));
    in = ((int16_t) (T & 0xFFFF)) >> 1;
    T = ((T >> 1) & 0xFFFF0000) | (in & 0xFFFF);

-    S = _SIMD32_OFFSET(pSrc + (2 * l));
+    S = read_q15x2 (pSrc + (2 * l));
    in = ((int16_t) (S & 0xFFFF)) >> 1;
    S = ((S >> 1) & 0xFFFF0000) | (in & 0xFFFF);

    R = __QSUB16(T, S);

-    _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+    write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
    out1 = __SMUSD(coeff, R) >> 16;
    out2 = __SMUADX(coeff, R);
 #else
-
    out1 = __SMUADX(R, coeff) >> 16U;
    out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+    write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

-    _SIMD32_OFFSET(pSrc + (2U * l)) =
-      (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
-
-  }                             // groups loop end
+  } /* groups loop end */

  twidCoefModifier = twidCoefModifier << 1U;

-  // loop for stage
+  /* loop for stage */
  for (k = fftLen / 2; k > 2; k = k >> 1)
  {
    n1 = n2;
    n2 = n2 >> 1;
    ia = 0;

-    // loop for groups
+    /* loop for groups */
    for (j = 0; j < n2; j++)
    {
-      coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+      coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

      ia = ia + twidCoefModifier;

-      // loop for butterfly
+      /* loop for butterfly */
      for (i = j; i < fftLen; i += n1)
      {
        l = i + n2;

-        T = _SIMD32_OFFSET(pSrc + (2 * i));
+        T = read_q15x2 (pSrc + (2 * i));

-        S = _SIMD32_OFFSET(pSrc + (2 * l));
+        S = read_q15x2 (pSrc + (2 * l));

        R = __QSUB16(T, S);

-        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+        write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
        out1 = __SMUSD(coeff, R) >> 16;
        out2 = __SMUADX(coeff, R);
-
 #else
-
        out1 = __SMUADX(R, coeff) >> 16U;
        out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
-
-        _SIMD32_OFFSET(pSrc + (2U * l)) =
-          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+        write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

        i += n1;

        l = i + n2;

-        T = _SIMD32_OFFSET(pSrc + (2 * i));
+        T = read_q15x2 (pSrc + (2 * i));

-        S = _SIMD32_OFFSET(pSrc + (2 * l));
+        S = read_q15x2 (pSrc + (2 * l));

        R = __QSUB16(T, S);

-        _SIMD32_OFFSET(pSrc + (2 * i)) = __SHADD16(T, S);
+        write_q15x2 (pSrc + (2 * i), __SHADD16(T, S));

 #ifndef ARM_MATH_BIG_ENDIAN
-
        out1 = __SMUSD(coeff, R) >> 16;
        out2 = __SMUADX(coeff, R);
 #else
-
        out1 = __SMUADX(R, coeff) >> 16U;
        out2 = __SMUSD(__QSUB(0, coeff), R);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif //     #ifndef ARM_MATH_BIG_ENDIAN
+        write_q15x2 (pSrc + (2 * l), (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF));

-        _SIMD32_OFFSET(pSrc + (2U * l)) =
-          (q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
+      } /* butterfly loop end */

-      }                         // butterfly loop end
-
-    }                           // groups loop end
+    } /* groups loop end */

    twidCoefModifier = twidCoefModifier << 1U;
-  }                             // stages loop end
+  } /* stages loop end */

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  // loop for groups
+  /* loop for groups */
  for (j = 0; j < n2; j++)
  {
-    coeff = _SIMD32_OFFSET(pCoef + (ia * 2U));
+    coeff = read_q15x2 ((q15_t *)pCoef + (ia * 2U));

    ia = ia + twidCoefModifier;

-    // loop for butterfly
+    /* loop for butterfly */
    for (i = j; i < fftLen; i += n1)
    {
      l = i + n2;

-      T = _SIMD32_OFFSET(pSrc + (2 * i));
+      T = read_q15x2 (pSrc + (2 * i));

-      S = _SIMD32_OFFSET(pSrc + (2 * l));
+      S = read_q15x2 (pSrc + (2 * l));

      R = __QSUB16(T, S);

-      _SIMD32_OFFSET(pSrc + (2 * i)) = __QADD16(T, S);
+      write_q15x2 (pSrc + (2 * i), __QADD16(T, S));

-      _SIMD32_OFFSET(pSrc + (2U * l)) = R;
+      write_q15x2 (pSrc + (2 * l), R);

-    }                           // butterfly loop end
+    } /* butterfly loop end */

-  }                             // groups loop end
+  } /* groups loop end */

  twidCoefModifier = twidCoefModifier << 1U;

-#else
+#else /* #if defined (ARM_MATH_DSP) */

+        uint32_t i, j, k, l;
+        uint32_t n1, n2, ia;
+        q15_t xt, yt, cosVal, sinVal;

-  unsigned i, j, k, l;
-  unsigned n1, n2, ia;
-  q15_t xt, yt, cosVal, sinVal;
-
-  //N = fftLen;
+  // N = fftLen;
  n2 = fftLen;

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  // loop for groups
+  /* loop for groups */
  for (j = 0; j < n2; j++)
  {
-    cosVal = pCoef[ia * 2];
+    cosVal = pCoef[(ia * 2)];
    sinVal = pCoef[(ia * 2) + 1];
    ia = ia + twidCoefModifier;

-    // loop for butterfly
+    /* loop for butterfly */
    for (i = j; i < fftLen; i += n1)
    {
      l = i + n2;
@ -646,36 +606,36 @@ void arm_radix2_butterfly_inverse_q15(
      pSrc[2 * i] = ((pSrc[2 * i] >> 1U) + (pSrc[2 * l] >> 1U)) >> 1U;

      yt = (pSrc[2 * i + 1] >> 1U) - (pSrc[2 * l + 1] >> 1U);
-      pSrc[2 * i + 1] =
-        ((pSrc[2 * l + 1] >> 1U) + (pSrc[2 * i + 1] >> 1U)) >> 1U;
+      pSrc[2 * i + 1] = ((pSrc[2 * l + 1] >> 1U) +
+                         (pSrc[2 * i + 1] >> 1U)  ) >> 1U;

-      pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
-                      ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+      pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
+                     ((int16_t) (((q31_t) yt * sinVal) >> 16)));

-      pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
-                           ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+      pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
+                         ((int16_t) (((q31_t) xt * sinVal) >> 16)));

-    }                           // butterfly loop end
+    } /* butterfly loop end */

-  }                             // groups loop end
+  } /* groups loop end */

  twidCoefModifier = twidCoefModifier << 1U;

-  // loop for stage
+  /* loop for stage */
  for (k = fftLen / 2; k > 2; k = k >> 1)
  {
    n1 = n2;
    n2 = n2 >> 1;
    ia = 0;

-    // loop for groups
+    /* loop for groups */
    for (j = 0; j < n2; j++)
    {
-      cosVal = pCoef[ia * 2];
+      cosVal = pCoef[(ia * 2)];
      sinVal = pCoef[(ia * 2) + 1];
      ia = ia + twidCoefModifier;

-      // loop for butterfly
+      /* loop for butterfly */
      for (i = j; i < fftLen; i += n1)
      {
        l = i + n2;
@ -685,29 +645,29 @@ void arm_radix2_butterfly_inverse_q15(
        yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
        pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]) >> 1U;

-        pSrc[2U * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
-                        ((int16_t) (((q31_t) yt * sinVal) >> 16)));
+        pSrc[2 * l] = (((int16_t) (((q31_t) xt * cosVal) >> 16)) -
+                       ((int16_t) (((q31_t) yt * sinVal) >> 16))  );

-        pSrc[2U * l + 1U] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
-                             ((int16_t) (((q31_t) xt * sinVal) >> 16)));
+        pSrc[2 * l + 1] = (((int16_t) (((q31_t) yt * cosVal) >> 16)) +
+                           ((int16_t) (((q31_t) xt * sinVal) >> 16))  );

-      }                         // butterfly loop end
+      } /* butterfly loop end */

-    }                           // groups loop end
+    } /* groups loop end */

    twidCoefModifier = twidCoefModifier << 1U;
-  }                             // stages loop end
+  } /* stages loop end */

  n1 = n2;
  n2 = n2 >> 1;
  ia = 0;

-  cosVal = pCoef[ia * 2];
+  cosVal = pCoef[(ia * 2)];
  sinVal = pCoef[(ia * 2) + 1];

  ia = ia + twidCoefModifier;

-  // loop for butterfly
+  /* loop for butterfly */
  for (i = 0; i < fftLen; i += n1)
  {
    l = i + n2;
@ -717,13 +677,13 @@ void arm_radix2_butterfly_inverse_q15(
    yt = pSrc[2 * i + 1] - pSrc[2 * l + 1];
    pSrc[2 * i + 1] = (pSrc[2 * l + 1] + pSrc[2 * i + 1]);

-    pSrc[2U * l] = xt;
+    pSrc[2 * l] = xt;

-    pSrc[2U * l + 1U] = yt;
+    pSrc[2 * l + 1] = yt;

-  }                             // groups loop end
+  } /* groups loop end */


-#endif //             #if defined (ARM_MATH_DSP)
+#endif /* #if defined (ARM_MATH_DSP) */

 }
--- a/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix2_q31.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix2_q31.c
 * Description:  Radix-2 Decimation in Frequency CFFT & CIFFT Fixed point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,44 +29,43 @@
 #include "arm_math.h"

 void arm_radix2_butterfly_q31(
-  q31_t * pSrc,
-  uint32_t fftLen,
-  q31_t * pCoef,
-  uint16_t twidCoefModifier);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef,
+        uint16_t twidCoefModifier);

 void arm_radix2_butterfly_inverse_q31(
-  q31_t * pSrc,
-  uint32_t fftLen,
-  q31_t * pCoef,
-  uint16_t twidCoefModifier);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef,
+        uint16_t twidCoefModifier);

 void arm_bitreversal_q31(
-  q31_t * pSrc,
-  uint32_t fftLen,
-  uint16_t bitRevFactor,
-  uint16_t * pBitRevTab);
+        q31_t * pSrc,
+        uint32_t fftLen,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab);

 /**
-* @ingroup groupTransforms
-*/
+  @ingroup groupTransforms
+ */

 /**
-* @addtogroup ComplexFFT
-* @{
-*/
+  @addtogroup ComplexFFT
+  @{
+ */

 /**
-* @details
-* @brief Processing function for the fixed-point CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q31 and will be removed
-* @param[in]      *S    points to an instance of the fixed-point CFFT/CIFFT structure.
-* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
-* @return none.
-*/
+  @brief         Processing function for the fixed-point CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+  @param[in]     S    points to an instance of the fixed-point CFFT/CIFFT structure
+  @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+  @return        none
+ */

 void arm_cfft_radix2_q31(
-const arm_cfft_radix2_instance_q31 * S,
-q31_t * pSrc)
+  const arm_cfft_radix2_instance_q31 * S,
+        q31_t * pSrc)
 {

   if (S->ifftFlag == 1U)
@ -84,14 +83,14 @@ q31_t * pSrc)
 }

 /**
-* @} end of ComplexFFT group
-*/
+  @} end of ComplexFFT group
+ */

 void arm_radix2_butterfly_q31(
-q31_t * pSrc,
-uint32_t fftLen,
-q31_t * pCoef,
-uint16_t twidCoefModifier)
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef,
+        uint16_t twidCoefModifier)
 {

   unsigned i, j, k, l, m;
@ -216,10 +215,10 @@ uint16_t twidCoefModifier)


 void arm_radix2_butterfly_inverse_q31(
-q31_t * pSrc,
-uint32_t fftLen,
-q31_t * pCoef,
-uint16_t twidCoefModifier)
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pCoef,
+        uint16_t twidCoefModifier)
 {

   unsigned i, j, k, l;
--- a/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix4_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix4_f32.c
 * Description:  Radix-4 Decimation in Frequency CFFT & CIFFT Floating point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,47 +29,45 @@
 #include "arm_math.h"

 extern void arm_bitreversal_f32(
-float32_t * pSrc,
-uint16_t fftSize,
-uint16_t bitRevFactor,
-uint16_t * pBitRevTab);
+        float32_t * pSrc,
+        uint16_t fftSize,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab);

 void arm_radix4_butterfly_f32(
-float32_t * pSrc,
-uint16_t fftLen,
-float32_t * pCoef,
-uint16_t twidCoefModifier);
+        float32_t * pSrc,
+        uint16_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier);

 void arm_radix4_butterfly_inverse_f32(
-float32_t * pSrc,
-uint16_t fftLen,
-float32_t * pCoef,
-uint16_t twidCoefModifier,
-float32_t onebyfftLen);
+        float32_t * pSrc,
+        uint16_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier,
+        float32_t onebyfftLen);


 /**
-* @ingroup groupTransforms
-*/
+  @ingroup groupTransforms
+ */

 /**
-* @addtogroup ComplexFFT
-* @{
-*/
+  @addtogroup ComplexFFT
+  @{
+ */

 /**
-* @details
-* @brief Processing function for the floating-point Radix-4 CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_f32 and will be removed
-* in the future.
-* @param[in]      *S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure.
-* @param[in, out] *pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place.
-* @return none.
-*/
+  @brief         Processing function for the floating-point Radix-4 CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_f32 and will be removed in the future.
+  @param[in]     S    points to an instance of the floating-point Radix-4 CFFT/CIFFT structure
+  @param[in,out] pSrc points to the complex data buffer of size <code>2*fftLen</code>. Processing occurs in-place
+  @return        none
+ */

 void arm_cfft_radix4_f32(
  const arm_cfft_radix4_instance_f32 * S,
-  float32_t * pSrc)
+        float32_t * pSrc)
 {
   if (S->ifftFlag == 1U)
   {
@ -91,46 +89,43 @@ void arm_cfft_radix4_f32(
 }

 /**
-* @} end of ComplexFFT group
-*/
+  @} end of ComplexFFT group
+ */

 /* ----------------------------------------------------------------------
 * Internal helper function used by the FFTs
 * ---------------------------------------------------------------------- */

-/*
-* @brief  Core function for the floating-point CFFT butterfly process.
-* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
-* @param[in]      fftLen           length of the FFT.
-* @param[in]      *pCoef           points to the twiddle coefficient buffer.
-* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
-*/
+/**
+  brief         Core function for the floating-point CFFT butterfly process.
+  param[in,out] pSrc             points to the in-place buffer of floating-point data type
+  param[in]     fftLen           length of the FFT
+  param[in]     pCoef            points to the twiddle coefficient buffer
+  param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  return        none
+ */

 void arm_radix4_butterfly_f32(
-float32_t * pSrc,
-uint16_t fftLen,
-float32_t * pCoef,
-uint16_t twidCoefModifier)
+        float32_t * pSrc,
+        uint16_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier)
 {
+        float32_t co1, co2, co3, si1, si2, si3;
+        uint32_t ia1, ia2, ia3;
+        uint32_t i0, i1, i2, i3;
+        uint32_t n1, n2, j, k;

-   float32_t co1, co2, co3, si1, si2, si3;
-   uint32_t ia1, ia2, ia3;
-   uint32_t i0, i1, i2, i3;
-   uint32_t n1, n2, j, k;
+#if defined (ARM_MATH_LOOPUNROLL)

-#if defined (ARM_MATH_DSP)
-
-   /* Run the below code for Cortex-M4 and Cortex-M3 */
-
-   float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
-   float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
-   Ybminusd;
-   float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
-   float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
-   float32_t *ptr1;
-   float32_t p0,p1,p2,p3,p4,p5;
-   float32_t a0,a1,a2,a3,a4,a5,a6,a7;
+        float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
+        float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
+        Ybminusd;
+        float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
+        float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
+        float32_t *ptr1;
+        float32_t p0,p1,p2,p3,p4,p5;
+        float32_t a0,a1,a2,a3,a4,a5,a6,a7;

   /*  Initializations for the first stage */
   n2 = fftLen;
@ -290,11 +285,11 @@ uint16_t twidCoefModifier)
         /*  index calculation for the coefficients */
         ia2 = ia1 + ia1;
         ia3 = ia2 + ia1;
-         co1 = pCoef[ia1 * 2U];
+         co1 = pCoef[(ia1 * 2U)];
         si1 = pCoef[(ia1 * 2U) + 1U];
-         co2 = pCoef[ia2 * 2U];
+         co2 = pCoef[(ia2 * 2U)];
         si2 = pCoef[(ia2 * 2U) + 1U];
-         co3 = pCoef[ia3 * 2U];
+         co3 = pCoef[(ia3 * 2U)];
         si3 = pCoef[(ia3 * 2U) + 1U];

         /*  Twiddle coefficients index modifier */
@ -484,11 +479,9 @@ uint16_t twidCoefModifier)

 #else

-   float32_t t1, t2, r1, r2, s1, s2;
+        float32_t t1, t2, r1, r2, s1, s2;

-   /* Run the below code for Cortex-M0 */
-
-   /*  Initializations for the fft calculation */
+   /* Initializations for the fft calculation */
   n2 = fftLen;
   n1 = n2;
   for (k = fftLen; k > 1U; k >>= 2U)
@ -597,42 +590,42 @@ uint16_t twidCoefModifier)
      twidCoefModifier <<= 2U;
   }

-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */

 }

-/*
-* @brief  Core function for the floating-point CIFFT butterfly process.
-* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
-* @param[in]      fftLen           length of the FFT.
-* @param[in]      *pCoef           points to twiddle coefficient buffer.
-* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @param[in]      onebyfftLen      value of 1/fftLen.
-* @return none.
-*/
+/**
+  brief         Core function for the floating-point CIFFT butterfly process.
+  param[in,out] pSrc             points to the in-place buffer of floating-point data type
+  param[in]     fftLen           length of the FFT
+  param[in]     pCoef            points to twiddle coefficient buffer
+  param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+  param[in]     onebyfftLen      value of 1/fftLen
+  return        none
+ */

 void arm_radix4_butterfly_inverse_f32(
-float32_t * pSrc,
-uint16_t fftLen,
-float32_t * pCoef,
-uint16_t twidCoefModifier,
-float32_t onebyfftLen)
+        float32_t * pSrc,
+        uint16_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier,
+        float32_t onebyfftLen)
 {
-   float32_t co1, co2, co3, si1, si2, si3;
-   uint32_t ia1, ia2, ia3;
-   uint32_t i0, i1, i2, i3;
-   uint32_t n1, n2, j, k;
+        float32_t co1, co2, co3, si1, si2, si3;
+        uint32_t ia1, ia2, ia3;
+        uint32_t i0, i1, i2, i3;
+        uint32_t n1, n2, j, k;

-#if defined (ARM_MATH_DSP)
+#if defined (ARM_MATH_LOOPUNROLL)

-   float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
-   float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
-   Ybminusd;
-   float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
-   float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
-   float32_t *ptr1;
-   float32_t p0,p1,p2,p3,p4,p5,p6,p7;
-   float32_t a0,a1,a2,a3,a4,a5,a6,a7;
+        float32_t xaIn, yaIn, xbIn, ybIn, xcIn, ycIn, xdIn, ydIn;
+        float32_t Xaplusc, Xbplusd, Yaplusc, Ybplusd, Xaminusc, Xbminusd, Yaminusc,
+        Ybminusd;
+        float32_t Xb12C_out, Yb12C_out, Xc12C_out, Yc12C_out, Xd12C_out, Yd12C_out;
+        float32_t Xb12_out, Yb12_out, Xc12_out, Yc12_out, Xd12_out, Yd12_out;
+        float32_t *ptr1;
+        float32_t p0,p1,p2,p3,p4,p5,p6,p7;
+        float32_t a0,a1,a2,a3,a4,a5,a6,a7;


   /*  Initializations for the first stage */
@ -1008,9 +1001,7 @@ float32_t onebyfftLen)

 #else

-   float32_t t1, t2, r1, r2, s1, s2;
-
-   /* Run the below code for Cortex-M0 */
+        float32_t t1, t2, r1, r2, s1, s2;

   /*  Initializations for the first stage */
   n2 = fftLen;
@ -1203,7 +1194,7 @@ float32_t onebyfftLen)
      pSrc[(2U * i3) + 1U] = s2 * onebyfftLen;
   }

-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
 }


--- a/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix4_init_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix4_init_f32.c
 * Description:  Radix-4 Decimation in Frequency Floating-point CFFT & CIFFT Initialization function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,36 +30,40 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */

 /**
-* @brief  Initialization function for the floating-point CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superceded by \ref arm_cfft_f32 and will be removed
-* in the future.
-* @param[in,out] *S             points to an instance of the floating-point CFFT/CIFFT structure.
-* @param[in]     fftLen         length of the FFT.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
-* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
-* \par
-* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
-* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
-* \par
-* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
-*/
+  @brief         Initialization function for the floating-point CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superceded by \ref arm_cfft_f32 and will be removed in the future.
+  @param[in,out] S              points to an instance of the floating-point CFFT/CIFFT structure
+  @param[in]     fftLen         length of the FFT
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Details
+                   The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+                   Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
+  @par
+                   The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+                   Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+  @par
+                   The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */

 arm_status arm_cfft_radix4_init_f32(
  arm_cfft_radix4_instance_f32 * S,
@ -148,5 +152,5 @@ arm_status arm_cfft_radix4_init_f32(
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q15.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix4_init_q15.c
 * Description:  Radix-4 Decimation in Frequency Q15 FFT & IFFT initialization function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,37 +30,42 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */


 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */


 /**
-* @brief Initialization function for the Q15 CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q15 and will be removed
-* @param[in,out] *S             points to an instance of the Q15 CFFT/CIFFT structure.
-* @param[in]     fftLen         length of the FFT.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
-* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
-* \par
-* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
-* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
-* \par
-* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
-*/
+  @brief Initialization function for the Q15 CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_q15 and will be removed in the future.
+  @param[in,out] S              points to an instance of the Q15 CFFT/CIFFT structure
+  @param[in]     fftLen         length of the FFT
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Details
+                   The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+                   Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
+  @par
+                   The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+                   Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+  @par
+                   The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */

 arm_status arm_cfft_radix4_init_q15(
  arm_cfft_radix4_instance_q15 * S,
@ -136,5 +141,5 @@ arm_status arm_cfft_radix4_init_q15(
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix4_init_q31.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix4_init_q31.c
 * Description:  Radix-4 Decimation in Frequency Q31 FFT & IFFT initialization function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,35 +30,40 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @addtogroup ComplexFFT
- * @{
+  @addtogroup ComplexFFT
+  @{
 */

 /**
-*
-* @brief  Initialization function for the Q31 CFFT/CIFFT.
-* @deprecated Do not use this function.  It has been superseded by \ref arm_cfft_q31 and will be removed
-* @param[in,out] *S             points to an instance of the Q31 CFFT/CIFFT structure.
-* @param[in]     fftLen         length of the FFT.
-* @param[in]     ifftFlag       flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform.
-* @param[in]     bitReverseFlag flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
-* Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
-* \par
-* The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
-* Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
-* \par
-* The parameter <code>fftLen</code>	Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+
+  @brief         Initialization function for the Q31 CFFT/CIFFT.
+  @deprecated    Do not use this function. It has been superseded by \ref arm_cfft_q31 and will be removed in the future.
+  @param[in,out] S              points to an instance of the Q31 CFFT/CIFFT structure.
+  @param[in]     fftLen         length of the FFT.
+  @param[in]     ifftFlag       flag that selects transform direction
+                   - value = 0: forward transform
+                   - value = 1: inverse transform
+  @param[in]     bitReverseFlag flag that enables / disables bit reversal of output
+                   - value = 0: disables bit reversal of output
+                   - value = 1: enables bit reversal of output
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Details
+                   The parameter <code>ifftFlag</code> controls whether a forward or inverse transform is computed.
+                   Set(=1) ifftFlag for calculation of CIFFT otherwise  CFFT is calculated
+  @par
+                   The parameter <code>bitReverseFlag</code> controls whether output is in normal order or bit reversed order.
+                   Set(=1) bitReverseFlag for output to be in normal order otherwise output is in bit reversed order.
+  @par
+                   The parameter <code>fftLen</code> Specifies length of CFFT/CIFFT process. Supported FFT Lengths are 16, 64, 256, 1024.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
 */

 arm_status arm_cfft_radix4_init_q31(
@ -132,5 +137,5 @@ arm_status arm_cfft_radix4_init_q31(
 }

 /**
- * @} end of ComplexFFT group
+  @} end of ComplexFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix4_q15.c
--- a/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix4_q31.c
--- a/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c
+++ b/DSP/Source/TransformFunctions/arm_cfft_radix8_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_radix8_f32.c
 * Description:  Radix-8 Decimation in Frequency CFFT & CIFFT Floating point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -33,20 +33,20 @@
 * Internal helper function used by the FFTs
 * -------------------------------------------------------------------- */

-/*
-* @brief  Core function for the floating-point CFFT butterfly process.
-* @param[in, out] *pSrc            points to the in-place buffer of floating-point data type.
-* @param[in]      fftLen           length of the FFT.
-* @param[in]      *pCoef           points to the twiddle coefficient buffer.
-* @param[in]      twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
+/**
+  brief         Core function for the floating-point CFFT butterfly process.
+  param[in,out] pSrc             points to the in-place buffer of floating-point data type.
+  param[in]     fftLen           length of the FFT.
+  param[in]     pCoef            points to the twiddle coefficient buffer.
+  param[in]     twidCoefModifier twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
+  return        none
 */

 void arm_radix8_butterfly_f32(
-float32_t * pSrc,
-uint16_t fftLen,
-const float32_t * pCoef,
-uint16_t twidCoefModifier)
+  float32_t * pSrc,
+  uint16_t fftLen,
+  const float32_t * pCoef,
+  uint16_t twidCoefModifier)
 {
   uint32_t ia1, ia2, ia3, ia4, ia5, ia6, ia7;
   uint32_t i1, i2, i3, i4, i5, i6, i7, i8;
--- a/DSP/Source/TransformFunctions/arm_dct4_f32.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_dct4_f32.c
 * Description:  Processing function of DCT4 & IDCT4 F32
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,109 +29,111 @@
 #include "arm_math.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @defgroup DCT4_IDCT4 DCT Type IV Functions
- * Representation of signals by minimum number of values is important for storage and transmission.
- * The possibility of large discontinuity between the beginning and end of a period of a signal
- * in DFT can be avoided by extending the signal so that it is even-symmetric.
- * Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
- * spectrum and is very widely used in signal and image coding applications.
- * The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
- * DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
- *
- * DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
- * Reordering of the input data makes the computation of DCT just a problem of
- * computing the DFT of a real signal with a few additional operations.
- * This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
- *
- * DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
- * DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
- * DCT2 implementation can be described in the following steps:
- * - Re-ordering input
- * - Calculating Real FFT
- * - Multiplication of weights and Real FFT output and getting real part from the product.
- *
- * This process is explained by the block diagram below:
- * \image html DCT4.gif "Discrete Cosine Transform - type-IV"
- *
- * \par Algorithm:
- * The N-point type-IV DCT is defined as a real, linear transformation by the formula:
- * \image html DCT4Equation.gif
- * where <code>k = 0,1,2,.....N-1</code>
- *\par
- * Its inverse is defined as follows:
- * \image html IDCT4Equation.gif
- * where <code>n = 0,1,2,.....N-1</code>
- *\par
- * The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
- * The symmetry of the transform matrix indicates that the fast algorithms for the forward
- * and inverse transform computation are identical.
- * Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
- *
- * \par Lengths supported by the transform:
- *  As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192.
- * The library provides separate functions for Q15, Q31, and floating-point data types.
- * \par Instance Structure
- * The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
- * A separate instance structure must be defined for each transform.
- * There are separate instance structure declarations for each of the 3 supported data types.
- *
- * \par Initialization Functions
- * There is also an associated initialization function for each data type.
- * The initialization function performs the following operations:
- * - Sets the values of the internal structure fields.
- * - Initializes Real FFT as its process function is used internally in DCT4, by calling arm_rfft_init_f32().
- * \par
- * Use of the initialization function is optional.
- * However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
- * To place an instance structure into a const data section, the instance structure must be manually initialized.
- * Manually initialize the instance structure as follows:
- * <pre>
- *arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
- *arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
- *arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
- * </pre>
- * where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
- * \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
- * \c pTwiddle points to the twiddle factor table;
- * \c pCosFactor points to the cosFactor table;
- * \c pRfft points to the real FFT instance;
- * \c pCfft points to the complex FFT instance;
- * The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
- * and arm_rfft_f32() respectively for details regarding static initialization.
- *
- * \par Fixed-Point Behavior
- * Care must be taken when using the fixed-point versions of the DCT4 transform functions.
- * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
- * Refer to the function specific documentation below for usage guidelines.
+  @defgroup DCT4_IDCT4 DCT Type IV Functions
+
+  Representation of signals by minimum number of values is important for storage and transmission.
+  The possibility of large discontinuity between the beginning and end of a period of a signal
+  in DFT can be avoided by extending the signal so that it is even-symmetric.
+  Discrete Cosine Transform (DCT) is constructed such that its energy is heavily concentrated in the lower part of the
+  spectrum and is very widely used in signal and image coding applications.
+  The family of DCTs (DCT type- 1,2,3,4) is the outcome of different combinations of homogeneous boundary conditions.
+  DCT has an excellent energy-packing capability, hence has many applications and in data compression in particular.
+  
+  DCT is essentially the Discrete Fourier Transform(DFT) of an even-extended real signal.
+  Reordering of the input data makes the computation of DCT just a problem of
+  computing the DFT of a real signal with a few additional operations.
+  This approach provides regular, simple, and very efficient DCT algorithms for practical hardware and software implementations.
+  
+  DCT type-II can be implemented using Fast fourier transform (FFT) internally, as the transform is applied on real values, Real FFT can be used.
+  DCT4 is implemented using DCT2 as their implementations are similar except with some added pre-processing and post-processing.
+  DCT2 implementation can be described in the following steps:
+  - Re-ordering input
+  - Calculating Real FFT
+  - Multiplication of weights and Real FFT output and getting real part from the product.
+  
+  This process is explained by the block diagram below:
+  \image html DCT4.gif "Discrete Cosine Transform - type-IV"
+ 
+  @par           Algorithm
+                   The N-point type-IV DCT is defined as a real, linear transformation by the formula:
+                   \image html DCT4Equation.gif
+                   where <code>k = 0, 1, 2, ..., N-1</code>
+  @par
+                   Its inverse is defined as follows:
+                   \image html IDCT4Equation.gif
+                   where <code>n = 0, 1, 2, ..., N-1</code>
+  @par
+                   The DCT4 matrices become involutory (i.e. they are self-inverse) by multiplying with an overall scale factor of sqrt(2/N).
+                   The symmetry of the transform matrix indicates that the fast algorithms for the forward
+                   and inverse transform computation are identical.
+                   Note that the implementation of Inverse DCT4 and DCT4 is same, hence same process function can be used for both.
+ 
+  @par           Lengths supported by the transform:
+                   As DCT4 internally uses Real FFT, it supports all the lengths 128, 512, 2048 and 8192.
+                   The library provides separate functions for Q15, Q31, and floating-point data types.
+
+  @par           Instance Structure
+                   The instances for Real FFT and FFT, cosine values table and twiddle factor table are stored in an instance data structure.
+                   A separate instance structure must be defined for each transform.
+                   There are separate instance structure declarations for each of the 3 supported data types.
+                 
+  @par           Initialization Functions
+                   There is also an associated initialization function for each data type.
+                   The initialization function performs the following operations:
+                   - Sets the values of the internal structure fields.
+                   - Initializes Real FFT as its process function is used internally in DCT4, by calling \ref arm_rfft_init_f32().
+  @par
+                   Use of the initialization function is optional.
+                   However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
+                   To place an instance structure into a const data section, the instance structure must be manually initialized.
+                   Manually initialize the instance structure as follows:
+  <pre>
+      arm_dct4_instance_f32 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
+      arm_dct4_instance_q31 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
+      arm_dct4_instance_q15 S = {N, Nby2, normalize, pTwiddle, pCosFactor, pRfft, pCfft};
+  </pre>
+                   where \c N is the length of the DCT4; \c Nby2 is half of the length of the DCT4;
+                   \c normalize is normalizing factor used and is equal to <code>sqrt(2/N)</code>;
+                   \c pTwiddle points to the twiddle factor table;
+                   \c pCosFactor points to the cosFactor table;
+                   \c pRfft points to the real FFT instance;
+                   \c pCfft points to the complex FFT instance;
+                   The CFFT and RFFT structures also needs to be initialized, refer to arm_cfft_radix4_f32()
+                   and arm_rfft_f32() respectively for details regarding static initialization.
+ 
+  @par           Fixed-Point Behavior
+                   Care must be taken when using the fixed-point versions of the DCT4 transform functions.
+                   In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
+                   Refer to the function specific documentation below for usage guidelines.
 */

 /**
- * @addtogroup DCT4_IDCT4
- * @{
+  @addtogroup DCT4_IDCT4
+  @{
 */

 /**
- * @brief Processing function for the floating-point DCT4/IDCT4.
- * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
- * @param[in]       *pState        points to state buffer.
- * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
- * @return none.
+  @brief         Processing function for the floating-point DCT4/IDCT4.
+  @param[in]     S             points to an instance of the floating-point DCT4/IDCT4 structure
+  @param[in]     pState        points to state buffer
+  @param[in,out] pInlineBuffer points to the in-place input and output buffer
+  @return        none
 */

 void arm_dct4_f32(
  const arm_dct4_instance_f32 * S,
-  float32_t * pState,
-  float32_t * pInlineBuffer)
+        float32_t * pState,
+        float32_t * pInlineBuffer)
 {
-  uint32_t i;                                    /* Loop counter */
-  float32_t *weights = S->pTwiddle;              /* Pointer to the Weights table */
-  float32_t *cosFact = S->pCosFactor;            /* Pointer to the cos factors table */
-  float32_t *pS1, *pS2, *pbuff;                  /* Temporary pointers for input buffer and pState buffer */
-  float32_t in;                                  /* Temporary variable */
+  const float32_t *weights = S->pTwiddle;              /* Pointer to the Weights table */
+  const float32_t *cosFact = S->pCosFactor;            /* Pointer to the cos factors table */
+        float32_t *pS1, *pS2, *pbuff;                  /* Temporary pointers for input buffer and pState buffer */
+        float32_t in;                                  /* Temporary variable */
+        uint32_t i;                                    /* Loop counter */


  /* DCT4 computation involves DCT2 (which is calculated using RFFT)
@ -153,13 +155,13 @@ void arm_dct4_f32(
   * (d) Multiplying the output with the normalizing factor sqrt(2/N).
   */

-        /*-------- Pre-processing ------------*/
+  /*-------- Pre-processing ------------*/
  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
  arm_scale_f32(pInlineBuffer, 2.0f, pInlineBuffer, S->N);
  arm_mult_f32(pInlineBuffer, cosFact, pInlineBuffer, S->N);

  /* ----------------------------------------------------------------
-   * Step1: Re-ordering of even and odd elements as,
+   * Step1: Re-ordering of even and odd elements as
   *             pState[i] =  pInlineBuffer[2*i] and
   *             pState[N-i-1] = pInlineBuffer[2*i+1] where i = 0 to N/2
   ---------------------------------------------------------------------*/
@ -173,12 +175,11 @@ void arm_dct4_f32(
  /* pbuff initialized to input buffer */
  pbuff = pInlineBuffer;

-#if defined (ARM_MATH_DSP)

-  /* Run the below code for Cortex-M4 and Cortex-M3 */
+#if defined (ARM_MATH_LOOPUNROLL)

  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
-  i = (uint32_t) S->Nby2 >> 2U;
+  i = S->Nby2 >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
@ -199,7 +200,7 @@ void arm_dct4_f32(
    *pS1++ = *pbuff++;
    *pS2-- = *pbuff++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

@ -210,7 +211,7 @@ void arm_dct4_f32(
  pS1 = pState;

  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
-  i = (uint32_t) S->N >> 2U;
+  i = S->N >> 2U;

  /* Processing with loop unrolling 4 times as N is always multiple of 4.
   * Compute 4 outputs at a time */
@ -231,12 +232,12 @@ void arm_dct4_f32(
   *     Step2: Calculate RFFT for N-point input
   * ---------------------------------------------------------- */
  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);

-        /*----------------------------------------------------------------------
-	 *  Step3: Multiply the FFT output with the weights.
-	 *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
+  /*----------------------------------------------------------------------
+   *  Step3: Multiply the FFT output with the weights.
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);

  /* ----------- Post-processing ---------- */
  /* DCT-IV can be obtained from DCT-II by the equation,
@ -245,7 +246,7 @@ void arm_dct4_f32(
  /* Getting only real part from the output and Converting to DCT-IV */

  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
-  i = ((uint32_t) S->N - 1U) >> 2U;
+  i = (S->N - 1U) >> 2U;

  /* pbuff initialized to input buffer. */
  pbuff = pInlineBuffer;
@ -290,7 +291,7 @@ void arm_dct4_f32(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  i = ((uint32_t) S->N - 1U) % 0x4U;
+  i = (S->N - 1U) % 0x4U;

  while (i > 0U)
  {
@ -298,6 +299,7 @@ void arm_dct4_f32(
    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
    in = *pS1++ - in;
    *pbuff++ = in;
+
    /* points to the next real value */
    pS1++;

@ -306,10 +308,10 @@ void arm_dct4_f32(
  }


-        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/

  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
-  i = (uint32_t) S->N >> 2U;
+  i = S->N >> 2U;

  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
  pbuff = pInlineBuffer;
@ -337,10 +339,8 @@ void arm_dct4_f32(

 #else

-  /* Run the below code for Cortex-M0 */
-
  /* Initializing the loop counter to N/2 */
-  i = (uint32_t) S->Nby2;
+  i = S->Nby2;

  do
  {
@ -361,7 +361,7 @@ void arm_dct4_f32(
  pS1 = pState;

  /* Initializing the loop counter */
-  i = (uint32_t) S->N;
+  i = S->N;

  do
  {
@ -377,12 +377,12 @@ void arm_dct4_f32(
   *     Step2: Calculate RFFT for N-point input
   * ---------------------------------------------------------- */
  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_f32(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_f32 (S->pRfft, pInlineBuffer, pState);

-        /*----------------------------------------------------------------------
-	 *  Step3: Multiply the FFT output with the weights.
-	 *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_f32(pState, weights, pState, S->N);
+  /*----------------------------------------------------------------------
+   *  Step3: Multiply the FFT output with the weights.
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_f32 (pState, weights, pState, S->N);

  /* ----------- Post-processing ---------- */
  /* DCT-IV can be obtained from DCT-II by the equation,
@ -405,7 +405,7 @@ void arm_dct4_f32(
  pS1++;

  /* Initializing the loop counter */
-  i = ((uint32_t) S->N - 1U);
+  i = (S->N - 1U);

  do
  {
@ -413,21 +413,20 @@ void arm_dct4_f32(
    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
    in = *pS1++ - in;
    *pbuff++ = in;
+
    /* points to the next real value */
    pS1++;

-
-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/

-        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /* Initializing loop counter */
+  i = S->N;

-  /* Initializing the loop counter */
-  i = (uint32_t) S->N;
-
-  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  /* pbuff initialized to the pInlineBuffer (now contains the output values) */
  pbuff = pInlineBuffer;

  do
@ -436,14 +435,14 @@ void arm_dct4_f32(
    in = *pbuff;
    *pbuff++ = in * S->normalize;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */

 }

 /**
-   * @} end of DCT4_IDCT4 group
-   */
+  @} end of DCT4_IDCT4 group
+ */
--- a/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_init_f32.c
--- a/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_init_q15.c
--- a/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_init_q31.c
--- a/DSP/Source/TransformFunctions/arm_dct4_q15.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_q15.c
@ -3,13 +3,13 @@
 * Title:        arm_dct4_q15.c
 * Description:  Processing function of DCT4 & IDCT4 Q15
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,35 +29,35 @@
 #include "arm_math.h"

 /**
- * @addtogroup DCT4_IDCT4
- * @{
+  @addtogroup DCT4_IDCT4
+  @{
 */

 /**
- * @brief Processing function for the Q15 DCT4/IDCT4.
- * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
- * @param[in]       *pState        points to state buffer.
- * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
- * @return none.
- *
- * \par Input an output formats:
- * Internally inputs are downscaled in the RFFT process function to avoid overflows.
- * Number of bits downscaled, depends on the size of the transform.
- * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
- *
- * \image html dct4FormatsQ15Table.gif
+  @brief         Processing function for the Q15 DCT4/IDCT4.
+  @param[in]     S             points to an instance of the Q15 DCT4 structure.
+  @param[in]     pState        points to state buffer.
+  @param[in,out] pInlineBuffer points to the in-place input and output buffer.
+  @return        none
+ 
+  @par           Input an output formats
+                   Internally inputs are downscaled in the RFFT process function to avoid overflows.
+                   Number of bits downscaled, depends on the size of the transform. The input and output
+                   formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
+
+                   \image html dct4FormatsQ15Table.gif
 */

 void arm_dct4_q15(
  const arm_dct4_instance_q15 * S,
-  q15_t * pState,
-  q15_t * pInlineBuffer)
+        q15_t * pState,
+        q15_t * pInlineBuffer)
 {
-  uint32_t i;                                    /* Loop counter */
-  q15_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
-  q15_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
-  q15_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
-  q15_t in;                                      /* Temporary variable */
+  const q15_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
+  const q15_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
+        q15_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
+        q15_t in;                                      /* Temporary variable */
+        uint32_t i;                                    /* Loop counter */


  /* DCT4 computation involves DCT2 (which is calculated using RFFT)
@ -79,10 +79,10 @@ void arm_dct4_q15(
   * (d) Multiplying the output with the normalizing factor sqrt(2/N).
   */

-        /*-------- Pre-processing ------------*/
+  /*-------- Pre-processing ------------*/
  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
-  arm_mult_q15(pInlineBuffer, cosFact, pInlineBuffer, S->N);
-  arm_shift_q15(pInlineBuffer, 1, pInlineBuffer, S->N);
+  arm_mult_q15 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
+  arm_shift_q15 (pInlineBuffer, 1, pInlineBuffer, S->N);

  /* ----------------------------------------------------------------
   * Step1: Re-ordering of even and odd elements as
@ -100,12 +100,10 @@ void arm_dct4_q15(
  pbuff = pInlineBuffer;


-#if defined (ARM_MATH_DSP)
-
-  /* Run the below code for Cortex-M4 and Cortex-M3 */
+#if defined (ARM_MATH_LOOPUNROLL)

  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
-  i = (uint32_t) S->Nby2 >> 2U;
+  i = S->Nby2 >> 2U;

  /* First part of the processing with loop unrolling.  Compute 4 outputs at a time.
   ** a second loop below computes the remaining 1 to 3 samples. */
@ -126,7 +124,7 @@ void arm_dct4_q15(
    *pS1++ = *pbuff++;
    *pS2-- = *pbuff++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

@ -137,7 +135,7 @@ void arm_dct4_q15(
  pS1 = pState;

  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
-  i = (uint32_t) S->N >> 2U;
+  i = S->N >> 2U;

  /* Processing with loop unrolling 4 times as N is always multiple of 4.
   * Compute 4 outputs at a time */
@ -158,16 +156,16 @@ void arm_dct4_q15(
   *     Step2: Calculate RFFT for N-point input
   * ---------------------------------------------------------- */
  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);

- /*----------------------------------------------------------------------
-  *  Step3: Multiply the FFT output with the weights.
-  *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
+  /*----------------------------------------------------------------------
+   *  Step3: Multiply the FFT output with the weights.
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);

  /* The output of complex multiplication is in 3.13 format.
   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
-  arm_shift_q15(pState, 2, pState, S->N * 2);
+  arm_shift_q15 (pState, 2, pState, S->N * 2);

  /* ----------- Post-processing ---------- */
  /* DCT-IV can be obtained from DCT-II by the equation,
@ -176,7 +174,7 @@ void arm_dct4_q15(
  /* Getting only real part from the output and Converting to DCT-IV */

  /* Initializing the loop counter to N >> 2 for loop unrolling by 4 */
-  i = ((uint32_t) S->N - 1U) >> 2U;
+  i = (S->N - 1U) >> 2U;

  /* pbuff initialized to input buffer. */
  pbuff = pInlineBuffer;
@ -221,7 +219,7 @@ void arm_dct4_q15(

  /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
   ** No loop unrolling is used. */
-  i = ((uint32_t) S->N - 1U) % 0x4U;
+  i = (S->N - 1U) % 0x4U;

  while (i > 0U)
  {
@ -229,18 +227,19 @@ void arm_dct4_q15(
    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
    in = *pS1++ - in;
    *pbuff++ = in;
+
    /* points to the next real value */
    pS1++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  }


-   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/

  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
-  i = (uint32_t) S->N >> 2U;
+  i = S->N >> 2U;

  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
  pbuff = pInlineBuffer;
@ -261,17 +260,15 @@ void arm_dct4_q15(
    in = *pbuff;
    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);


 #else

-  /* Run the below code for Cortex-M0 */
-
  /* Initializing the loop counter to N/2 */
-  i = (uint32_t) S->Nby2;
+  i = S->Nby2;

  do
  {
@ -292,7 +289,7 @@ void arm_dct4_q15(
  pS1 = pState;

  /* Initializing the loop counter */
-  i = (uint32_t) S->N;
+  i = S->N;

  do
  {
@ -308,16 +305,16 @@ void arm_dct4_q15(
   *     Step2: Calculate RFFT for N-point input
   * ---------------------------------------------------------- */
  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_q15(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_q15 (S->pRfft, pInlineBuffer, pState);

- /*----------------------------------------------------------------------
-  *  Step3: Multiply the FFT output with the weights.
-  *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_q15(pState, weights, pState, S->N);
+  /*----------------------------------------------------------------------
+   *  Step3: Multiply the FFT output with the weights.
+   *----------------------------------------------------------------------*/
+  arm_cmplx_mult_cmplx_q15 (pState, weights, pState, S->N);

  /* The output of complex multiplication is in 3.13 format.
   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.15 format by shifting left by 2 bits. */
-  arm_shift_q15(pState, 2, pState, S->N * 2);
+  arm_shift_q15 (pState, 2, pState, S->N * 2);

  /* ----------- Post-processing ---------- */
  /* DCT-IV can be obtained from DCT-II by the equation,
@ -325,9 +322,6 @@ void arm_dct4_q15(
   *       Hence, Y4(0) = Y2(0)/2  */
  /* Getting only real part from the output and Converting to DCT-IV */

-  /* Initializing the loop counter */
-  i = ((uint32_t) S->N - 1U);
-
  /* pbuff initialized to input buffer. */
  pbuff = pInlineBuffer;

@ -342,25 +336,29 @@ void arm_dct4_q15(
  /* pState pointer is incremented twice as the real values are located alternatively in the array */
  pS1++;

+  /* Initializing the loop counter */
+  i = (S->N - 1U);
+
  do
  {
    /* Calculating Y4(1) to Y4(N-1) from Y2 using equation Y4(k) = Y2(k) - Y4(k-1) */
    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
    in = *pS1++ - in;
    *pbuff++ = in;
+
    /* points to the next real value */
    pS1++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

-   /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/

-  /* Initializing the loop counter */
-  i = (uint32_t) S->N;
+  /* Initializing loop counter */
+  i = S->N;

-  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  /* pbuff initialized to the pInlineBuffer (now contains the output values) */
  pbuff = pInlineBuffer;

  do
@ -369,14 +367,15 @@ void arm_dct4_q15(
    in = *pbuff;
    *pbuff++ = ((q15_t) (((q31_t) in * S->normalize) >> 15));

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
+
  } while (i > 0U);

-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */

 }

 /**
-   * @} end of DCT4_IDCT4 group
-   */
+  @} end of DCT4_IDCT4 group
+ */
--- a/DSP/Source/TransformFunctions/arm_dct4_q31.c
+++ b/DSP/Source/TransformFunctions/arm_dct4_q31.c
@ -3,13 +3,13 @@
 * Title:        arm_dct4_q31.c
 * Description:  Processing function of DCT4 & IDCT4 Q31
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,36 +29,38 @@
 #include "arm_math.h"

 /**
- * @addtogroup DCT4_IDCT4
- * @{
+  @addtogroup DCT4_IDCT4
+  @{
 */

 /**
- * @brief Processing function for the Q31 DCT4/IDCT4.
- * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
- * @param[in]       *pState        points to state buffer.
- * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
- * @return none.
- * \par Input an output formats:
- * Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
- * as the conversion from DCT2 to DCT4 involves one subtraction.
- * Internally inputs are downscaled in the RFFT process function to avoid overflows.
- * Number of bits downscaled, depends on the size of the transform.
- * The input and output formats for different DCT sizes and number of bits to upscale are mentioned in the table below:
- *
- * \image html dct4FormatsQ31Table.gif
+  @brief         Processing function for the Q31 DCT4/IDCT4.
+  @param[in]     S             points to an instance of the Q31 DCT4 structure.
+  @param[in]     pState        points to state buffer.
+  @param[in,out] pInlineBuffer points to the in-place input and output buffer.
+  @return        none
+
+  @par           Input an output formats
+                   Input samples need to be downscaled by 1 bit to avoid saturations in the Q31 DCT process,
+                   as the conversion from DCT2 to DCT4 involves one subtraction.
+                   Internally inputs are downscaled in the RFFT process function to avoid overflows.
+                   Number of bits downscaled, depends on the size of the transform.
+                   The input and output formats for different DCT sizes and number of bits to upscale are
+                   mentioned in the table below:
+
+                   \image html dct4FormatsQ31Table.gif
 */

 void arm_dct4_q31(
  const arm_dct4_instance_q31 * S,
-  q31_t * pState,
-  q31_t * pInlineBuffer)
+        q31_t * pState,
+        q31_t * pInlineBuffer)
 {
-  uint16_t i;                                    /* Loop counter */
-  q31_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
-  q31_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
-  q31_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
-  q31_t in;                                      /* Temporary variable */
+  const q31_t *weights = S->pTwiddle;                  /* Pointer to the Weights table */
+  const q31_t *cosFact = S->pCosFactor;                /* Pointer to the cos factors table */
+        q31_t *pS1, *pS2, *pbuff;                      /* Temporary pointers for input buffer and pState buffer */
+        q31_t in;                                      /* Temporary variable */
+        uint32_t i;                                    /* Loop counter */


  /* DCT4 computation involves DCT2 (which is calculated using RFFT)
@ -80,10 +82,10 @@ void arm_dct4_q31(
   * (d) Multiplying the output with the normalizing factor sqrt(2/N).
   */

-        /*-------- Pre-processing ------------*/
+  /*-------- Pre-processing ------------*/
  /* Multiplying input with cos factor i.e. r(n) = 2 * x(n) * cos(pi*(2*n+1)/(4*n)) */
-  arm_mult_q31(pInlineBuffer, cosFact, pInlineBuffer, S->N);
-  arm_shift_q31(pInlineBuffer, 1, pInlineBuffer, S->N);
+  arm_mult_q31 (pInlineBuffer, cosFact, pInlineBuffer, S->N);
+  arm_shift_q31 (pInlineBuffer, 1, pInlineBuffer, S->N);

  /* ----------------------------------------------------------------
   * Step1: Re-ordering of even and odd elements as
@ -100,9 +102,8 @@ void arm_dct4_q31(
  /* pbuff initialized to input buffer */
  pbuff = pInlineBuffer;

-#if defined (ARM_MATH_DSP)

-  /* Run the below code for Cortex-M4 and Cortex-M3 */
+#if defined (ARM_MATH_LOOPUNROLL)

  /* Initializing the loop counter to N/2 >> 2 for loop unrolling by 4 */
  i = S->Nby2 >> 2U;
@ -126,7 +127,7 @@ void arm_dct4_q31(
    *pS1++ = *pbuff++;
    *pS2-- = *pbuff++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

@ -158,16 +159,16 @@ void arm_dct4_q31(
   *     Step2: Calculate RFFT for N-point input
   * ---------------------------------------------------------- */
  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);

  /*----------------------------------------------------------------------
   *  Step3: Multiply the FFT output with the weights.
   *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
+  arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);

  /* The output of complex multiplication is in 3.29 format.
   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
-  arm_shift_q31(pState, 2, pState, S->N * 2);
+  arm_shift_q31 (pState, 2, pState, S->N * 2);

  /* ----------- Post-processing ---------- */
  /* DCT-IV can be obtained from DCT-II by the equation,
@ -229,15 +230,16 @@ void arm_dct4_q31(
    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
    in = *pS1++ - in;
    *pbuff++ = in;
+
    /* points to the next real value */
    pS1++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  }


-        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/

  /* Initializing the loop counter to N/4 instead of N for loop unrolling */
  i = S->N >> 2U;
@ -261,15 +263,13 @@ void arm_dct4_q31(
    in = *pbuff;
    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);


 #else

-  /* Run the below code for Cortex-M0 */
-
  /* Initializing the loop counter to N/2 */
  i = S->Nby2;

@ -308,12 +308,12 @@ void arm_dct4_q31(
   *     Step2: Calculate RFFT for N-point input
   * ---------------------------------------------------------- */
  /* pInlineBuffer is real input of length N , pState is the complex output of length 2N */
-  arm_rfft_q31(S->pRfft, pInlineBuffer, pState);
+  arm_rfft_q31 (S->pRfft, pInlineBuffer, pState);

  /*----------------------------------------------------------------------
   *  Step3: Multiply the FFT output with the weights.
   *----------------------------------------------------------------------*/
-  arm_cmplx_mult_cmplx_q31(pState, weights, pState, S->N);
+  arm_cmplx_mult_cmplx_q31 (pState, weights, pState, S->N);

  /* The output of complex multiplication is in 3.29 format.
   * Hence changing the format of N (i.e. 2*N elements) complex numbers to 1.31 format by shifting left by 2 bits. */
@ -348,20 +348,20 @@ void arm_dct4_q31(
    /* pState pointer (pS1) is incremented twice as the real values are located alternatively in the array */
    in = *pS1++ - in;
    *pbuff++ = in;
+
    /* points to the next real value */
    pS1++;

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  }

+  /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/

-        /*------------ Normalizing the output by multiplying with the normalizing factor ----------*/
-
-  /* Initializing the loop counter */
+  /* Initializing loop counter */
  i = S->N;

-  /* pbuff initialized to the pInlineBuffer(now contains the output values) */
+  /* pbuff initialized to the pInlineBuffer (now contains the output values) */
  pbuff = pInlineBuffer;

  do
@ -370,14 +370,14 @@ void arm_dct4_q31(
    in = *pbuff;
    *pbuff++ = ((q31_t) (((q63_t) in * S->normalize) >> 31));

-    /* Decrement the loop counter */
+    /* Decrement loop counter */
    i--;
  } while (i > 0U);

-#endif /* #if defined (ARM_MATH_DSP) */
+#endif /* #if defined (ARM_MATH_LOOPUNROLL) */

 }

 /**
-   * @} end of DCT4_IDCT4 group
-   */
+  @} end of DCT4_IDCT4 group
+ */
--- a/DSP/Source/TransformFunctions/arm_rfft_f32.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_rfft_f32.c
 * Description:  RFFT & RIFFT Floating point process function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -33,159 +33,149 @@
 * -------------------------------------------------------------------- */

 extern void arm_radix4_butterfly_f32(
-    float32_t * pSrc,
-    uint16_t fftLen,
-    float32_t * pCoef,
-    uint16_t twidCoefModifier);
+        float32_t * pSrc,
+        uint16_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier);

 extern void arm_radix4_butterfly_inverse_f32(
-    float32_t * pSrc,
-    uint16_t fftLen,
-    float32_t * pCoef,
-    uint16_t twidCoefModifier,
-    float32_t onebyfftLen);
+        float32_t * pSrc,
+        uint16_t fftLen,
+  const float32_t * pCoef,
+        uint16_t twidCoefModifier,
+        float32_t onebyfftLen);

 extern void arm_bitreversal_f32(
-    float32_t * pSrc,
-    uint16_t fftSize,
-    uint16_t bitRevFactor,
-    uint16_t * pBitRevTab);
+        float32_t * pSrc,
+        uint16_t fftSize,
+        uint16_t bitRevFactor,
+  const uint16_t * pBitRevTab);

 void arm_split_rfft_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pATable,
-  float32_t * pBTable,
-  float32_t * pDst,
-  uint32_t modifier);
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pATable,
+  const float32_t * pBTable,
+        float32_t * pDst,
+        uint32_t modifier);

 void arm_split_rifft_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pATable,
-  float32_t * pBTable,
-  float32_t * pDst,
-  uint32_t modifier);
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pATable,
+  const float32_t * pBTable,
+        float32_t * pDst,
+        uint32_t modifier);

 /**
-* @ingroup groupTransforms
-*/
-
-/**
- * @addtogroup RealFFT
- * @{
+  @ingroup groupTransforms
 */

 /**
- * @brief Processing function for the floating-point RFFT/RIFFT.
- * @deprecated Do not use this function.  It has been superceded by \ref arm_rfft_fast_f32 and will be removed
- * in the future.
- * @param[in]  *S    points to an instance of the floating-point RFFT/RIFFT structure.
- * @param[in]  *pSrc points to the input buffer.
- * @param[out] *pDst points to the output buffer.
- * @return none.
+  @addtogroup RealFFT
+  @{
+ */
+
+/**
+  @brief         Processing function for the floating-point RFFT/RIFFT.
+  @deprecated    Do not use this function.  It has been superceded by \ref arm_rfft_fast_f32 and will be removed in the future.
+  @param[in]     S    points to an instance of the floating-point RFFT/RIFFT structure
+  @param[in]     pSrc points to the input buffer
+  @param[out]    pDst points to the output buffer
+  @return        none
 */

 void arm_rfft_f32(
  const arm_rfft_instance_f32 * S,
-  float32_t * pSrc,
-  float32_t * pDst)
+        float32_t * pSrc,
+        float32_t * pDst)
 {
  const arm_cfft_radix4_instance_f32 *S_CFFT = S->pCfft;

-
  /* Calculation of Real IFFT of input */
  if (S->ifftFlagR == 1U)
  {
-    /*  Real IFFT core process */
-    arm_split_rifft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
-                        S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+     /*  Real IFFT core process */
+     arm_split_rifft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);


-    /* Complex radix-4 IFFT process */
-    arm_radix4_butterfly_inverse_f32(pDst, S_CFFT->fftLen,
-                                     S_CFFT->pTwiddle,
-                                     S_CFFT->twidCoefModifier,
-                                     S_CFFT->onebyfftLen);
+     /* Complex radix-4 IFFT process */
+     arm_radix4_butterfly_inverse_f32 (pDst, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier, S_CFFT->onebyfftLen);

    /* Bit reversal process */
    if (S->bitReverseFlagR == 1U)
    {
-      arm_bitreversal_f32(pDst, S_CFFT->fftLen,
-                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+      arm_bitreversal_f32 (pDst, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
    }
  }
  else
  {
-
    /* Calculation of RFFT of input */

    /* Complex radix-4 FFT process */
-    arm_radix4_butterfly_f32(pSrc, S_CFFT->fftLen,
-                             S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);
+    arm_radix4_butterfly_f32 (pSrc, S_CFFT->fftLen, S_CFFT->pTwiddle, S_CFFT->twidCoefModifier);

    /* Bit reversal process */
    if (S->bitReverseFlagR == 1U)
    {
-      arm_bitreversal_f32(pSrc, S_CFFT->fftLen,
-                          S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
+      arm_bitreversal_f32 (pSrc, S_CFFT->fftLen, S_CFFT->bitRevFactor, S_CFFT->pBitRevTable);
    }

-
    /*  Real FFT core process */
-    arm_split_rfft_f32(pSrc, S->fftLenBy2, S->pTwiddleAReal,
-                       S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+    arm_split_rfft_f32 (pSrc, S->fftLenBy2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
  }

 }

 /**
-   * @} end of RealFFT group
-   */
+  @} end of RealFFT group
+ */

 /**
- * @brief  Core Real FFT process
- * @param[in]   *pSrc 				points to the input buffer.
- * @param[in]   fftLen  			length of FFT.
- * @param[in]   *pATable 			points to the twiddle Coef A buffer.
- * @param[in]   *pBTable 			points to the twiddle Coef B buffer.
- * @param[out]  *pDst 				points to the output buffer.
- * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
- * @return none.
+  @brief         Core Real FFT process
+  @param[in]     pSrc      points to input buffer
+  @param[in]     fftLen    length of FFT
+  @param[in]     pATable   points to twiddle Coef A buffer
+  @param[in]     pBTable   points to twiddle Coef B buffer
+  @param[out]    pDst      points to output buffer
+  @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  @return        none
 */

 void arm_split_rfft_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pATable,
-  float32_t * pBTable,
-  float32_t * pDst,
-  uint32_t modifier)
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pATable,
+  const float32_t * pBTable,
+        float32_t * pDst,
+        uint32_t modifier)
 {
-  uint32_t i;                                    /* Loop Counter */
-  float32_t outR, outI;                          /* Temporary variables for output */
-  float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */
-  float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */
-  float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U];      /* temp pointers for output buffer */
-  float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U];      /* temp pointers for input buffer */
+        uint32_t i;                                    /* Loop Counter */
+        float32_t outR, outI;                          /* Temporary variables for output */
+  const float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */
+        float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */
+        float32_t *pDst1 = &pDst[2], *pDst2 = &pDst[(4U * fftLen) - 1U];      /* temp pointers for output buffer */
+        float32_t *pSrc1 = &pSrc[2], *pSrc2 = &pSrc[(2U * fftLen) - 1U];      /* temp pointers for input buffer */

  /* Init coefficient pointers */
-  pCoefA = &pATable[modifier * 2U];
-  pCoefB = &pBTable[modifier * 2U];
+  pCoefA = &pATable[modifier * 2];
+  pCoefB = &pBTable[modifier * 2];

  i = fftLen - 1U;

  while (i > 0U)
  {
-    /*
-       outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
-       + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
-       pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
-     */
+     /*
+       outR = (  pSrc[2 * i]             * pATable[2 * i]
+               - pSrc[2 * i + 1]         * pATable[2 * i + 1]
+               + pSrc[2 * n - 2 * i]     * pBTable[2 * i]
+               + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);

-    /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
-       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
+       outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+               + pIn[2 * i]             * pATable[2 * i + 1]
+               + pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+               - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+      */

    /* read pATable[2 * i] */
    CoefA1 = *pCoefA++;
@ -238,81 +228,82 @@ void arm_split_rfft_f32(


 /**
- * @brief  Core Real IFFT process
- * @param[in]   *pSrc 				points to the input buffer.
- * @param[in]   fftLen  			length of FFT.
- * @param[in]   *pATable 			points to the twiddle Coef A buffer.
- * @param[in]   *pBTable 			points to the twiddle Coef B buffer.
- * @param[out]  *pDst 				points to the output buffer.
- * @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
- * @return none.
+  @brief         Core Real IFFT process
+  @param[in]     pSrc      points to input buffer
+  @param[in]     fftLen    length of FFT
+  @param[in]     pATable   points to twiddle Coef A buffer
+  @param[in]     pBTable   points to twiddle Coef B buffer
+  @param[out]    pDst      points to output buffer
+  @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  @return        none
 */

 void arm_split_rifft_f32(
-  float32_t * pSrc,
-  uint32_t fftLen,
-  float32_t * pATable,
-  float32_t * pBTable,
-  float32_t * pDst,
-  uint32_t modifier)
+        float32_t * pSrc,
+        uint32_t fftLen,
+  const float32_t * pATable,
+  const float32_t * pBTable,
+        float32_t * pDst,
+        uint32_t modifier)
 {
-  float32_t outR, outI;                          /* Temporary variables for output */
-  float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */
-  float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */
-  float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];
+        float32_t outR, outI;                          /* Temporary variables for output */
+  const float32_t *pCoefA, *pCoefB;                    /* Temporary pointers for twiddle factors */
+        float32_t CoefA1, CoefA2, CoefB1;              /* Temporary variables for twiddle coefficients */
+        float32_t *pSrc1 = &pSrc[0], *pSrc2 = &pSrc[(2U * fftLen) + 1U];

  pCoefA = &pATable[0];
  pCoefB = &pBTable[0];

  while (fftLen > 0U)
  {
-    /*
-       outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
-       pIn[2 * n - 2 * i] * pBTable[2 * i] -
-       pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+     /*
+       outR = (  pIn[2 * i]             * pATable[2 * i]
+               + pIn[2 * i + 1]         * pATable[2 * i + 1]
+               + pIn[2 * n - 2 * i]     * pBTable[2 * i]
+               - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);

-       outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
-       pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-       pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+       outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+               - pIn[2 * i]             * pATable[2 * i + 1]
+               - pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+               - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+      */

-     */
+     CoefA1 = *pCoefA++;
+     CoefA2 = *pCoefA;

-    CoefA1 = *pCoefA++;
-    CoefA2 = *pCoefA;
+     /* outR = (pSrc[2 * i] * CoefA1 */
+     outR = *pSrc1 * CoefA1;

-    /* outR = (pSrc[2 * i] * CoefA1 */
-    outR = *pSrc1 * CoefA1;
+     /* - pSrc[2 * i] * CoefA2 */
+     outI = -(*pSrc1++) * CoefA2;

-    /* - pSrc[2 * i] * CoefA2 */
-    outI = -(*pSrc1++) * CoefA2;
+     /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
+     outR += (*pSrc1 + *pSrc2) * CoefA2;

-    /* (pSrc[2 * i + 1] + pSrc[2 * fftLen - 2 * i + 1]) * CoefA2 */
-    outR += (*pSrc1 + *pSrc2) * CoefA2;
+     /* pSrc[2 * i + 1] * CoefA1 */
+     outI += (*pSrc1++) * CoefA1;

-    /* pSrc[2 * i + 1] * CoefA1 */
-    outI += (*pSrc1++) * CoefA1;
+     CoefB1 = *pCoefB;

-    CoefB1 = *pCoefB;
+     /* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
+     outI -= *pSrc2-- * CoefB1;

-    /* - pSrc[2 * fftLen - 2 * i + 1] * CoefB1 */
-    outI -= *pSrc2-- * CoefB1;
+     /* pSrc[2 * fftLen - 2 * i] * CoefB1 */
+     outR += *pSrc2 * CoefB1;

-    /* pSrc[2 * fftLen - 2 * i] * CoefB1 */
-    outR += *pSrc2 * CoefB1;
+     /* pSrc[2 * fftLen - 2 * i] * CoefA2 */
+     outI += *pSrc2-- * CoefA2;

-    /* pSrc[2 * fftLen - 2 * i] * CoefA2 */
-    outI += *pSrc2-- * CoefA2;
+     /* write output */
+     *pDst++ = outR;
+     *pDst++ = outI;

-    /* write output */
-    *pDst++ = outR;
-    *pDst++ = outI;
+     /* update coefficient pointer */
+     pCoefB = pCoefB + (modifier * 2);
+     pCoefA = pCoefA + (modifier * 2 - 1);

-    /* update coefficient pointer */
-    pCoefB = pCoefB + (modifier * 2U);
-    pCoefA = pCoefA + ((modifier * 2U) - 1U);
-
-    /* Decrement loop count */
-    fftLen--;
+     /* Decrement loop count */
+     fftLen--;
  }

 }
--- a/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_fast_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_rfft_f32.c
 * Description:  RFFT & RIFFT Floating point process function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -29,17 +29,18 @@
 #include "arm_math.h"

 void stage_rfft_f32(
-  arm_rfft_fast_instance_f32 * S,
-  float32_t * p, float32_t * pOut)
+  const arm_rfft_fast_instance_f32 * S,
+        float32_t * p,
+        float32_t * pOut)
 {
-   uint32_t  k;								   /* Loop Counter                     */
-   float32_t twR, twI;						   /* RFFT Twiddle coefficients        */
-   float32_t * pCoeff = S->pTwiddleRFFT;  /* Points to RFFT Twiddle factors   */
-   float32_t *pA = p;						   /* increasing pointer               */
-   float32_t *pB = p;						   /* decreasing pointer               */
-   float32_t xAR, xAI, xBR, xBI;				/* temporary variables              */
-   float32_t t1a, t1b;				         /* temporary variables              */
-   float32_t p0, p1, p2, p3;				   /* temporary variables              */
+        uint32_t  k;                                /* Loop Counter */
+        float32_t twR, twI;                         /* RFFT Twiddle coefficients */
+  const float32_t * pCoeff = S->pTwiddleRFFT;       /* Points to RFFT Twiddle factors */
+        float32_t *pA = p;                          /* increasing pointer */
+        float32_t *pB = p;                          /* decreasing pointer */
+        float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
+        float32_t t1a, t1b;                         /* temporary variables */
+        float32_t p0, p1, p2, p3;                   /* temporary variables */


   k = (S->Sint).fftLen - 1;
@ -115,16 +116,17 @@ void stage_rfft_f32(

 /* Prepares data for inverse cfft */
 void merge_rfft_f32(
-arm_rfft_fast_instance_f32 * S,
-float32_t * p, float32_t * pOut)
+  const arm_rfft_fast_instance_f32 * S,
+        float32_t * p,
+        float32_t * pOut)
 {
-   uint32_t  k;								/* Loop Counter                     */
-   float32_t twR, twI;						/* RFFT Twiddle coefficients        */
-   float32_t *pCoeff = S->pTwiddleRFFT;		/* Points to RFFT Twiddle factors   */
-   float32_t *pA = p;						/* increasing pointer               */
-   float32_t *pB = p;						/* decreasing pointer               */
-   float32_t xAR, xAI, xBR, xBI;			/* temporary variables              */
-   float32_t t1a, t1b, r, s, t, u;			/* temporary variables              */
+        uint32_t  k;                                /* Loop Counter */
+        float32_t twR, twI;                         /* RFFT Twiddle coefficients */
+  const float32_t *pCoeff = S->pTwiddleRFFT;        /* Points to RFFT Twiddle factors */
+        float32_t *pA = p;                          /* increasing pointer */
+        float32_t *pB = p;                          /* decreasing pointer */
+        float32_t xAR, xAI, xBR, xBI;               /* temporary variables */
+        float32_t t1a, t1b, r, s, t, u;             /* temporary variables */

   k = (S->Sint).fftLen - 1;

@ -173,122 +175,123 @@ float32_t * p, float32_t * pOut)
 }

 /**
-* @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @defgroup RealFFT Real FFT Functions
- *
- * \par
- * The CMSIS DSP library includes specialized algorithms for computing the
- * FFT of real data sequences.  The FFT is defined over complex data but
- * in many applications the input is real.  Real FFT algorithms take advantage
- * of the symmetry properties of the FFT and have a speed advantage over complex
- * algorithms of the same length.
- * \par
- * The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
- * \par
- * The real length N forward FFT of a sequence is computed using the steps shown below.
- * \par
- * \image html RFFT.gif "Real Fast Fourier Transform"
- * \par
- * The real sequence is initially treated as if it were complex to perform a CFFT.
- * Later, a processing stage reshapes the data to obtain half of the frequency spectrum
- * in complex format. Except the first complex number that contains the two real numbers
- * X[0] and X[N/2] all the data is complex. In other words, the first complex sample
- * contains two real values packed.
- * \par
- * The input for the inverse RFFT should keep the same format as the output of the
- * forward RFFT. A first processing stage pre-process the data to later perform an
- * inverse CFFT.
- * \par
- * \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
- * \par
- * The algorithms for floating-point, Q15, and Q31 data are slightly different
- * and we describe each algorithm in turn.
- * \par Floating-point
- * The main functions are arm_rfft_fast_f32() and arm_rfft_fast_init_f32().
- * The older functions arm_rfft_f32() and arm_rfft_init_f32() have been
- * deprecated but are still documented.
- * \par
- * The FFT of a real N-point sequence has even symmetry in the frequency
- * domain. The second half of the data equals the conjugate of the first
- * half flipped in frequency. Looking at the data, we see that we can
- * uniquely represent the FFT using only N/2 complex numbers. These are
- * packed into the output array in alternating real and imaginary
- * components:
- * \par
- * X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
- * real[(N/2)-1], imag[(N/2)-1 }
- * \par
- * It happens that the first complex number (real[0], imag[0]) is actually
- * all real. real[0] represents the DC offset, and imag[0] should be 0.
- * (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
- * the first harmonic and so on.
- * \par
- * The real FFT functions pack the frequency domain data in this fashion.
- * The forward transform outputs the data in this form and the inverse
- * transform expects input data in this form. The function always performs
- * the needed bitreversal so that the input and output data is always in
- * normal order. The functions support lengths of [32, 64, 128, ..., 4096]
- * samples.
- * \par Q15 and Q31
- * The real algorithms are defined in a similar manner and utilize N/2 complex
- * transforms behind the scenes.
- * \par
- * The complex transforms used internally include scaling to prevent fixed-point
- * overflows.  The overall scaling equals 1/(fftLen/2).
- * \par
- * A separate instance structure must be defined for each transform used but
- * twiddle factor and bit reversal tables can be reused.
- * \par
- * There is also an associated initialization function for each data type.
- * The initialization function performs the following operations:
- * - Sets the values of the internal structure fields.
- * - Initializes twiddle factor table and bit reversal table pointers.
- * - Initializes the internal complex FFT data structure.
- * \par
- * Use of the initialization function is optional.
- * However, if the initialization function is used, then the instance structure
- * cannot be placed into a const data section. To place an instance structure
- * into a const data section, the instance structure should be manually
- * initialized as follows:
- * <pre>
- *arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
- *arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
- * </pre>
- * where <code>fftLenReal</code> is the length of the real transform;
- * <code>fftLenBy2</code> length of  the internal complex transform.
- * <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
- * <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
- * output (=1).
- * <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
- * The value is based on the FFT length;
- * <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
- * <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
- * <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
- * must also be initialized.  Refer to arm_cfft_radix4_f32() for details regarding
- * static initialization of the complex FFT instance structure.
+  @defgroup RealFFT Real FFT Functions
+ 
+  @par
+                   The CMSIS DSP library includes specialized algorithms for computing the
+                   FFT of real data sequences.  The FFT is defined over complex data but
+                   in many applications the input is real.  Real FFT algorithms take advantage
+                   of the symmetry properties of the FFT and have a speed advantage over complex
+                   algorithms of the same length.
+  @par
+                   The Fast RFFT algorith relays on the mixed radix CFFT that save processor usage.
+  @par
+                   The real length N forward FFT of a sequence is computed using the steps shown below.
+  @par
+                   \image html RFFT.gif "Real Fast Fourier Transform"
+  @par
+                   The real sequence is initially treated as if it were complex to perform a CFFT.
+                   Later, a processing stage reshapes the data to obtain half of the frequency spectrum
+                   in complex format. Except the first complex number that contains the two real numbers
+                   X[0] and X[N/2] all the data is complex. In other words, the first complex sample
+                   contains two real values packed.
+  @par
+                   The input for the inverse RFFT should keep the same format as the output of the
+                   forward RFFT. A first processing stage pre-process the data to later perform an
+                   inverse CFFT.
+  @par
+                   \image html RIFFT.gif "Real Inverse Fast Fourier Transform"
+  @par
+                   The algorithms for floating-point, Q15, and Q31 data are slightly different
+                   and we describe each algorithm in turn.
+  @par           Floating-point
+                   The main functions are \ref arm_rfft_fast_f32() and \ref arm_rfft_fast_init_f32().
+                   The older functions \ref arm_rfft_f32() and \ref arm_rfft_init_f32() have been deprecated
+                   but are still documented.
+  @par
+                   The FFT of a real N-point sequence has even symmetry in the frequency domain. 
+                   The second half of the data equals the conjugate of the first half flipped in frequency. 
+                   Looking at the data, we see that we can uniquely represent the FFT using only N/2 complex numbers.
+                   These are packed into the output array in alternating real and imaginary components:
+  @par
+                   X = { real[0], imag[0], real[1], imag[1], real[2], imag[2] ...
+                   real[(N/2)-1], imag[(N/2)-1 }
+  @par
+                   It happens that the first complex number (real[0], imag[0]) is actually
+                   all real. real[0] represents the DC offset, and imag[0] should be 0.
+                   (real[1], imag[1]) is the fundamental frequency, (real[2], imag[2]) is
+                   the first harmonic and so on.
+  @par
+                   The real FFT functions pack the frequency domain data in this fashion.
+                   The forward transform outputs the data in this form and the inverse
+                   transform expects input data in this form. The function always performs
+                   the needed bitreversal so that the input and output data is always in
+                   normal order. The functions support lengths of [32, 64, 128, ..., 4096]
+                   samples.
+  @par           Q15 and Q31
+                   The real algorithms are defined in a similar manner and utilize N/2 complex
+                   transforms behind the scenes.
+  @par
+                   The complex transforms used internally include scaling to prevent fixed-point
+                   overflows.  The overall scaling equals 1/(fftLen/2).
+  @par
+                   A separate instance structure must be defined for each transform used but
+                   twiddle factor and bit reversal tables can be reused.
+  @par
+                   There is also an associated initialization function for each data type.
+                   The initialization function performs the following operations:
+                    - Sets the values of the internal structure fields.
+                    - Initializes twiddle factor table and bit reversal table pointers.
+                    - Initializes the internal complex FFT data structure.
+  @par
+                   Use of the initialization function is optional.
+                   However, if the initialization function is used, then the instance structure
+                   cannot be placed into a const data section. To place an instance structure
+                   into a const data section, the instance structure should be manually
+                   initialized as follows:
+  <pre>
+      arm_rfft_instance_q31 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
+      arm_rfft_instance_q15 S = {fftLenReal, fftLenBy2, ifftFlagR, bitReverseFlagR, twidCoefRModifier, pTwiddleAReal, pTwiddleBReal, pCfft};
+  </pre>
+                   where <code>fftLenReal</code> is the length of the real transform;
+                   <code>fftLenBy2</code> length of  the internal complex transform.
+                   <code>ifftFlagR</code> Selects forward (=0) or inverse (=1) transform.
+                   <code>bitReverseFlagR</code> Selects bit reversed output (=0) or normal order
+                   output (=1).
+                   <code>twidCoefRModifier</code> stride modifier for the twiddle factor table.
+                   The value is based on the FFT length;
+                   <code>pTwiddleAReal</code>points to the A array of twiddle coefficients;
+                   <code>pTwiddleBReal</code>points to the B array of twiddle coefficients;
+                   <code>pCfft</code> points to the CFFT Instance structure. The CFFT structure
+                   must also be initialized.  Refer to arm_cfft_radix4_f32() for details regarding
+                   static initialization of the complex FFT instance structure.
 */

 /**
-* @addtogroup RealFFT
-* @{
+  @addtogroup RealFFT
+  @{
 */

 /**
-* @brief Processing function for the floating-point real FFT.
-* @param[in]  *S              points to an arm_rfft_fast_instance_f32 structure.
-* @param[in]  *p              points to the input buffer.
-* @param[in]  *pOut           points to the output buffer.
-* @param[in]  ifftFlag        RFFT if flag is 0, RIFFT if flag is 1
-* @return none.
+  @brief         Processing function for the floating-point real FFT.
+  @param[in]     S         points to an arm_rfft_fast_instance_f32 structure
+  @param[in]     p         points to input buffer
+  @param[in]     pOut      points to output buffer
+  @param[in]     ifftFlag
+                   - value = 0: RFFT
+                   - value = 1: RIFFT
+  @return        none
 */

 void arm_rfft_fast_f32(
-arm_rfft_fast_instance_f32 * S,
-float32_t * p, float32_t * pOut,
-uint8_t ifftFlag)
+  arm_rfft_fast_instance_f32 * S,
+  float32_t * p,
+  float32_t * pOut,
+  uint8_t ifftFlag)
 {
   arm_cfft_instance_f32 * Sint = &(S->Sint);
   Sint->fftLen = S->fftLenRFFT / 2;
--- a/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_fast_init_f32.c
@ -3,13 +3,13 @@
 * Title:        arm_cfft_init_f32.c
 * Description:  Split Radix Decimation in Frequency CFFT Floating point processing function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -30,102 +30,315 @@
 #include "arm_common_tables.h"

 /**
- * @ingroup groupTransforms
+  @ingroup groupTransforms
 */

 /**
- * @addtogroup RealFFT
- * @{
+  @addtogroup RealFFT
+  @{
 */

+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
+
 /**
-* @brief  Initialization function for the floating-point real FFT.
-* @param[in,out] *S             points to an arm_rfft_fast_instance_f32 structure.
-* @param[in]     fftLen         length of the Real Sequence.
-* @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLen</code> is not a supported value.
-*
-* \par Description:
-* \par
-* The parameter <code>fftLen</code>	Specifies length of RFFT/CIFFT process. Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
-* \par
-* This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+  @brief         Initialization function for the 32pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_32_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 16U;
+  S->fftLenRFFT = 32U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_16_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_16;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_32;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
+
+/**
+  @brief         Initialization function for the 64pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_64_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 32U;
+  S->fftLenRFFT = 64U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_32_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_32;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_64;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
+
+/**
+  @brief         Initialization function for the 128pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_128_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 64U;
+  S->fftLenRFFT = 128U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_64_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_64;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_128;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
+
+/**
+  @brief         Initialization function for the 256pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
 */
+
+arm_status arm_rfft_256_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 128U;
+  S->fftLenRFFT = 256U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_128;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_256;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
+
+/**
+  @brief         Initialization function for the 512pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_512_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 256U;
+  S->fftLenRFFT = 512U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_256;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_512;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
+/**
+  @brief         Initialization function for the 1024pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_1024_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 512U;
+  S->fftLenRFFT = 1024U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_512;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_1024;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
+/**
+  @brief         Initialization function for the 2048pt floating-point real FFT.
+  @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+arm_status arm_rfft_2048_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 1024U;
+  S->fftLenRFFT = 2048U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_1024_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_1024;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_2048;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif
+
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
+/**
+* @brief         Initialization function for the 4096pt floating-point real FFT.
+* @param[in,out] S  points to an arm_rfft_fast_instance_f32 structure
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : an error is detected
+ */
+
+arm_status arm_rfft_4096_fast_init_f32( arm_rfft_fast_instance_f32 * S ) {
+
+  arm_cfft_instance_f32 * Sint;
+
+  if( !S ) return ARM_MATH_ARGUMENT_ERROR;
+
+  Sint = &(S->Sint);
+  Sint->fftLen = 2048U;
+  S->fftLenRFFT = 4096U;
+
+  Sint->bitRevLength = ARMBITREVINDEXTABLE_2048_TABLE_LENGTH;
+  Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
+  Sint->pTwiddle     = (float32_t *) twiddleCoef_2048;
+  S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_4096;
+
+  return ARM_MATH_SUCCESS;
+}
+#endif 
+
+/**
+  @brief         Initialization function for the floating-point real FFT.
+  @param[in,out] S       points to an arm_rfft_fast_instance_f32 structure
+  @param[in]     fftLen  length of the Real Sequence
+  @return        execution status
+                   - \ref ARM_MATH_SUCCESS        : Operation successful
+                   - \ref ARM_MATH_ARGUMENT_ERROR : <code>fftLen</code> is not a supported length
+
+  @par           Description
+                   The parameter <code>fftLen</code> specifies the length of RFFT/CIFFT process.
+                   Supported FFT Lengths are 32, 64, 128, 256, 512, 1024, 2048, 4096.
+  @par
+                   This Function also initializes Twiddle factor table pointer and Bit reversal table pointer.
+ */
+
 arm_status arm_rfft_fast_init_f32(
  arm_rfft_fast_instance_f32 * S,
  uint16_t fftLen)
 {
-  arm_cfft_instance_f32 * Sint;
-  /*  Initialise the default arm status */
-  arm_status status = ARM_MATH_SUCCESS;
-  /*  Initialise the FFT length */
-  Sint = &(S->Sint);
-  Sint->fftLen = fftLen/2;
-  S->fftLenRFFT = fftLen;
+  typedef arm_status(*fft_init_ptr)( arm_rfft_fast_instance_f32 *);
+  fft_init_ptr fptr = 0x0;

-  /*  Initializations of structure parameters depending on the FFT length */
-  switch (Sint->fftLen)
+  switch (fftLen)
  {
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_BITREVIDX_FLT_2048) && defined(ARM_TABLE_TWIDDLECOEF_F32_2048) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_4096))
+  case 4096U:
+    fptr = arm_rfft_4096_fast_init_f32;
+    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_BITREVIDX_FLT_1024) && defined(ARM_TABLE_TWIDDLECOEF_F32_1024) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_2048))
  case 2048U:
-    /*  Initializations of structure parameters for 2048 point FFT */
-    /*  Initialise the bit reversal table length */
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_2048_TABLE_LENGTH;
-    /*  Initialise the bit reversal table pointer */
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable2048;
-    /*  Initialise the Twiddle coefficient pointers */
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_2048;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_4096;
+    fptr = arm_rfft_2048_fast_init_f32;
    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_BITREVIDX_FLT_512) && defined(ARM_TABLE_TWIDDLECOEF_F32_512) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_1024))
  case 1024U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_1024_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable1024;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_1024;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_2048;
+    fptr = arm_rfft_1024_fast_init_f32;
    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_BITREVIDX_FLT_256) && defined(ARM_TABLE_TWIDDLECOEF_F32_256) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_512))
  case 512U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_512_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable512;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_512;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_1024;
+    fptr = arm_rfft_512_fast_init_f32;
    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_BITREVIDX_FLT_128) && defined(ARM_TABLE_TWIDDLECOEF_F32_128) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_256))
  case 256U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_256_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable256;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_256;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_512;
+    fptr = arm_rfft_256_fast_init_f32;
    break;
+#endif
+#if (defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_BITREVIDX_FLT_64) && defined(ARM_TABLE_TWIDDLECOEF_F32_64) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_128))
  case 128U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_128_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable128;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_128;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_256;
+    fptr = arm_rfft_128_fast_init_f32;
    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_BITREVIDX_FLT_32) && defined(ARM_TABLE_TWIDDLECOEF_F32_32) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_64))
  case 64U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_64_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable64;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_64;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_128;
+    fptr = arm_rfft_64_fast_init_f32;
    break;
+#endif
+#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FFT_TABLES) || (defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_BITREVIDX_FLT_16) && defined(ARM_TABLE_TWIDDLECOEF_F32_16) && defined(ARM_TABLE_TWIDDLECOEF_RFFT_F32_32))
  case 32U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_32_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable32;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_32;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_64;
-    break;
-  case 16U:
-    Sint->bitRevLength = ARMBITREVINDEXTABLE_16_TABLE_LENGTH;
-    Sint->pBitRevTable = (uint16_t *)armBitRevIndexTable16;
-		Sint->pTwiddle     = (float32_t *) twiddleCoef_16;
-		S->pTwiddleRFFT    = (float32_t *) twiddleCoef_rfft_32;
+    fptr = arm_rfft_32_fast_init_f32;
    break;
+#endif
  default:
-    /*  Reporting argument error if fftSize is not valid value */
-    status = ARM_MATH_ARGUMENT_ERROR;
-    break;
+    return ARM_MATH_ARGUMENT_ERROR;
  }

-  return (status);
+  if( ! fptr ) return ARM_MATH_ARGUMENT_ERROR;
+  return fptr( S );
+
 }

 /**
- * @} end of RealFFT group
+  @} end of RealFFT group
 */
--- a/DSP/Source/TransformFunctions/arm_rfft_init_f32.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_init_f32.c
--- a/DSP/Source/TransformFunctions/arm_rfft_init_q15.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_init_q15.c
--- a/DSP/Source/TransformFunctions/arm_rfft_init_q31.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_init_q31.c
--- a/DSP/Source/TransformFunctions/arm_rfft_q15.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_q15.c
@ -3,13 +3,13 @@
 * Title:        arm_rfft_q15.c
 * Description:  RFFT & RIFFT Q15 process function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -33,173 +33,161 @@
 * -------------------------------------------------------------------- */

 void arm_split_rfft_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    q15_t * pATable,
-    q15_t * pBTable,
-    q15_t * pDst,
-    uint32_t modifier);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pATable,
+  const q15_t * pBTable,
+        q15_t * pDst,
+        uint32_t modifier);

 void arm_split_rifft_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    q15_t * pATable,
-    q15_t * pBTable,
-    q15_t * pDst,
-    uint32_t modifier);
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pATable,
+  const q15_t * pBTable,
+        q15_t * pDst,
+        uint32_t modifier);

 /**
-* @addtogroup RealFFT
-* @{
-*/
+  @addtogroup RealFFT
+  @{
+ */

 /**
-* @brief Processing function for the Q15 RFFT/RIFFT.
-* @param[in]  *S    points to an instance of the Q15 RFFT/RIFFT structure.
-* @param[in]  *pSrc points to the input buffer.
-* @param[out] *pDst points to the output buffer.
-* @return none.
-*
-* \par Input an output formats:
-* \par
-* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
-* Hence the output format is different for different RFFT sizes.
-* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
-* \par
-* \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
-* \par
-* \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
-*/
+  @brief         Processing function for the Q15 RFFT/RIFFT.
+  @param[in]     S     points to an instance of the Q15 RFFT/RIFFT structure
+  @param[in]     pSrc  points to input buffer
+  @param[out]    pDst  points to output buffer
+  @return        none
+
+  @par           Input an output formats
+                   Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
+                   Hence the output format is different for different RFFT sizes.
+                   The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
+  @par
+                   \image html RFFTQ15.gif "Input and Output Formats for Q15 RFFT"
+  @par
+                   \image html RIFFTQ15.gif "Input and Output Formats for Q15 RIFFT"
+ */

 void arm_rfft_q15(
-    const arm_rfft_instance_q15 * S,
-    q15_t * pSrc,
-    q15_t * pDst)
+  const arm_rfft_instance_q15 * S,
+        q15_t * pSrc,
+        q15_t * pDst)
 {
-    const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
-    uint32_t i;
-    uint32_t L2 = S->fftLenReal >> 1;
+  const arm_cfft_instance_q15 *S_CFFT = S->pCfft;
+        uint32_t L2 = S->fftLenReal >> 1U;
+        uint32_t i;

-    /* Calculation of RIFFT of input */
-    if (S->ifftFlagR == 1U)
-    {
-        /*  Real IFFT core process */
-        arm_split_rifft_q15(pSrc, L2, S->pTwiddleAReal,
-                            S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  /* Calculation of RIFFT of input */
+  if (S->ifftFlagR == 1U)
+  {
+     /*  Real IFFT core process */
+     arm_split_rifft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);

-        /* Complex IFFT process */
-        arm_cfft_q15(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
+     /* Complex IFFT process */
+     arm_cfft_q15 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);

-        for(i=0;i<S->fftLenReal;i++)
-        {
-            pDst[i] = pDst[i] << 1;
-        }
-    }
-    else
-    {
-        /* Calculation of RFFT of input */
+     for(i = 0; i < S->fftLenReal; i++)
+     {
+        pDst[i] = pDst[i] << 1U;
+     }
+  }
+  else
+  {
+     /* Calculation of RFFT of input */

-        /* Complex FFT process */
-        arm_cfft_q15(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
+     /* Complex FFT process */
+     arm_cfft_q15 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
+
+     /*  Real FFT core process */
+     arm_split_rfft_q15 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  }

-        /*  Real FFT core process */
-        arm_split_rfft_q15(pSrc, L2, S->pTwiddleAReal,
-                            S->pTwiddleBReal, pDst, S->twidCoefRModifier);
-    }
 }

 /**
-* @} end of RealFFT group
-*/
+  @} end of RealFFT group
+ */

 /**
-* @brief  Core Real FFT process
-* @param  *pSrc 				points to the input buffer.
-* @param  fftLen  				length of FFT.
-* @param  *pATable 			points to the A twiddle Coef buffer.
-* @param  *pBTable 			points to the B twiddle Coef buffer.
-* @param  *pDst 				points to the output buffer.
-* @param  modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
-* The function implements a Real FFT
-*/
+  @brief         Core Real FFT process
+  @param[in]     pSrc      points to input buffer
+  @param[in]     fftLen    length of FFT
+  @param[in]     pATable   points to twiddle Coef A buffer
+  @param[in]     pBTable   points to twiddle Coef B buffer
+  @param[out]    pDst      points to output buffer
+  @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  @return        none
+
+  @par
+                   The function implements a Real FFT
+ */

 void arm_split_rfft_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    q15_t * pATable,
-    q15_t * pBTable,
-    q15_t * pDst,
-    uint32_t modifier)
-{
-    uint32_t i;                                    /* Loop Counter */
-    q31_t outR, outI;                              /* Temporary variables for output */
-    q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
-    q15_t *pSrc1, *pSrc2;
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pATable,
+  const q15_t * pBTable,
+        q15_t * pDst,
+        uint32_t modifier)
+{       
+        uint32_t i;                                    /* Loop Counter */
+        q31_t outR, outI;                              /* Temporary variables for output */
+  const q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+        q15_t *pSrc1, *pSrc2;
 #if defined (ARM_MATH_DSP)
-    q15_t *pD1, *pD2;
+        q15_t *pD1, *pD2;
 #endif

-    //  pSrc[2U * fftLen] = pSrc[0];
-    //  pSrc[(2U * fftLen) + 1U] = pSrc[1];
+  /* Init coefficient pointers */
+  pCoefA = &pATable[modifier * 2];
+  pCoefB = &pBTable[modifier * 2];

-    pCoefA = &pATable[modifier * 2U];
-    pCoefB = &pBTable[modifier * 2U];
-
-    pSrc1 = &pSrc[2];
-    pSrc2 = &pSrc[(2U * fftLen) - 2U];
+  pSrc1 = &pSrc[2];
+  pSrc2 = &pSrc[(2U * fftLen) - 2U];

 #if defined (ARM_MATH_DSP)

-    /* Run the below code for Cortex-M4 and Cortex-M3 */
    i = 1U;
    pD1 = pDst + 2;
    pD2 = pDst + (4U * fftLen) - 2;

-    for(i = fftLen - 1; i > 0; i--)
+    for (i = fftLen - 1; i > 0; i--)
    {
        /*
-        outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
-        + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
-        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
-        */
+          outR = (  pSrc[2 * i]             * pATable[2 * i]
+                  - pSrc[2 * i + 1]         * pATable[2 * i + 1]
+                  + pSrc[2 * n - 2 * i]     * pBTable[2 * i]
+                  + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);

-        /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
+          outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+                  + pIn[2 * i]             * pATable[2 * i + 1]
+                  + pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+                  - pIn[2 * n - 2 * i + 1] * pBTable[2 * i])
+         */


 #ifndef ARM_MATH_BIG_ENDIAN
-
        /* pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1] */
-        outR = __SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA));
-
+        outR = __SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA));
 #else
-
        /* -(pSrc[2 * i + 1] * pATable[2 * i + 1] - pSrc[2 * i] * pATable[2 * i]) */
-        outR = -(__SMUSD(*__SIMD32(pSrc1), *__SIMD32(pCoefA)));
+        outR = -(__SMUSD(read_q15x2 (pSrc1), read_q15x2((q15_t *) pCoefA)));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
-
-        /* pSrc[2 * n - 2 * i] * pBTable[2 * i] +
-        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
-        outR = __SMLAD(*__SIMD32(pSrc2), *__SIMD32(pCoefB), outR) >> 16U;
-
-        /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+        /* pSrc[2 * n - 2 * i] * pBTable[2 * i] + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
+        outR = __SMLAD(read_q15x2 (pSrc2), read_q15x2((q15_t *) pCoefB), outR) >> 16U;

+        /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
 #ifndef ARM_MATH_BIG_ENDIAN
-
-        outI = __SMUSDX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
-
+        outI = __SMUSDX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB));
 #else
-
-        outI = __SMUSDX(*__SIMD32(pCoefB), *__SIMD32(pSrc2)--);
-
-#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+        outI = __SMUSDX(read_q15x2 ((q15_t *) pCoefB), read_q15x2_da (&pSrc2));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

        /* (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] */
-        outI = __SMLADX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), outI);
+        outI = __SMLADX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), outI);

        /* write output */
        *pD1++ = (q15_t) outR;
@ -215,23 +203,23 @@ void arm_split_rfft_q15(
        pCoefA = pCoefA + (2U * modifier);
    }

-    pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
-    pDst[(2U * fftLen) + 1U] = 0;
+    pDst[2U * fftLen]      = (pSrc[0] - pSrc[1]) >> 1U;
+    pDst[2U * fftLen + 1U] = 0;

-    pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
+    pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
    pDst[1] = 0;

 #else

-    /* Run the below code for Cortex-M0 */
    i = 1U;

    while (i < fftLen)
    {
        /*
-        outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
-        + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
-        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+          outR = (  pSrc[2 * i]             * pATable[2 * i]
+                  - pSrc[2 * i + 1]         * pATable[2 * i + 1]
+                  + pSrc[2 * n - 2 * i]     * pBTable[2 * i]
+                  + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
        */

        outR = *pSrc1 * *pCoefA;
@ -239,10 +227,11 @@ void arm_split_rfft_q15(
        outR = outR + (*pSrc2 * *pCoefB);
        outR = (outR + (*(pSrc2 + 1) * *(pCoefB + 1))) >> 16;

-
-        /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+        /*
+          outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+                  + pIn[2 * i]             * pATable[2 * i + 1]
+                  + pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+                  - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
        */

        outI = *pSrc2 * *(pCoefB + 1);
@ -256,7 +245,7 @@ void arm_split_rfft_q15(

        /* write output */
        pDst[2U * i] = (q15_t) outR;
-        pDst[(2U * i) + 1U] = outI >> 16U;
+        pDst[2U * i + 1U] = outI >> 16U;

        /* write complex conjugate output */
        pDst[(4U * fftLen) - (2U * i)] = (q15_t) outR;
@ -270,7 +259,7 @@ void arm_split_rfft_q15(
    }

    pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
-    pDst[(2U * fftLen) + 1U] = 0;
+    pDst[2U * fftLen + 1U] = 0;

    pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
    pDst[1] = 0;
@ -280,147 +269,112 @@ void arm_split_rfft_q15(


 /**
-* @brief  Core Real IFFT process
-* @param[in]   *pSrc 				points to the input buffer.
-* @param[in]   fftLen  		    length of FFT.
-* @param[in]   *pATable 			points to the twiddle Coef A buffer.
-* @param[in]   *pBTable 			points to the twiddle Coef B buffer.
-* @param[out]  *pDst 				points to the output buffer.
-* @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
-* The function implements a Real IFFT
-*/
+  @brief         Core Real IFFT process
+  @param[in]     pSrc      points to input buffer
+  @param[in]     fftLen    length of FFT
+  @param[in]     pATable   points to twiddle Coef A buffer
+  @param[in]     pBTable   points to twiddle Coef B buffer
+  @param[out]    pDst      points to output buffer
+  @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  @return        none
+
+  @par
+                   The function implements a Real IFFT
+ */
+
 void arm_split_rifft_q15(
-    q15_t * pSrc,
-    uint32_t fftLen,
-    q15_t * pATable,
-    q15_t * pBTable,
-    q15_t * pDst,
-    uint32_t modifier)
+        q15_t * pSrc,
+        uint32_t fftLen,
+  const q15_t * pATable,
+  const q15_t * pBTable,
+        q15_t * pDst,
+        uint32_t modifier)
 {
-    uint32_t i;                                    /* Loop Counter */
-    q31_t outR, outI;                              /* Temporary variables for output */
-    q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
-    q15_t *pSrc1, *pSrc2;
-    q15_t *pDst1 = &pDst[0];
+        uint32_t i;                                    /* Loop Counter */
+        q31_t outR, outI;                              /* Temporary variables for output */
+  const q15_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+        q15_t *pSrc1, *pSrc2;
+        q15_t *pDst1 = &pDst[0];

-    pCoefA = &pATable[0];
-    pCoefB = &pBTable[0];
+  pCoefA = &pATable[0];
+  pCoefB = &pBTable[0];

-    pSrc1 = &pSrc[0];
-    pSrc2 = &pSrc[2U * fftLen];
+  pSrc1 = &pSrc[0];
+  pSrc2 = &pSrc[2 * fftLen];
+
+  i = fftLen;
+  while (i > 0U)
+  {
+      /*
+        outR = (  pIn[2 * i]             * pATable[2 * i]
+                + pIn[2 * i + 1]         * pATable[2 * i + 1]
+                + pIn[2 * n - 2 * i]     * pBTable[2 * i]
+                - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+
+        outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+                - pIn[2 * i]             * pATable[2 * i + 1]
+                - pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+                - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+       */

 #if defined (ARM_MATH_DSP)

-    /* Run the below code for Cortex-M4 and Cortex-M3 */
-    i = fftLen;
-
-    while (i > 0U)
-    {
-        /*
-        outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
-
-        outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
-        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
-        */
-
-
 #ifndef ARM_MATH_BIG_ENDIAN
-
-        /* pIn[2 * n - 2 * i] * pBTable[2 * i] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
-        outR = __SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB));
-
+      /* pIn[2 * n - 2 * i] * pBTable[2 * i] - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]) */
+      outR = __SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB));
 #else
+      /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
+      outR = -(__SMUSD(read_q15x2(pSrc2), read_q15x2((q15_t *) pCoefB)));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-        /* -(-pIn[2 * n - 2 * i] * pBTable[2 * i] +
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1])) */
-        outR = -(__SMUSD(*__SIMD32(pSrc2), *__SIMD32(pCoefB)));
+      /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] + pIn[2 * n - 2 * i] * pBTable[2 * i] */
+      outR = __SMLAD(read_q15x2(pSrc1), read_q15x2 ((q15_t *) pCoefA), outR) >> 16U;

-#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
-
-        /* pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i] */
-        outR = __SMLAD(*__SIMD32(pSrc1), *__SIMD32(pCoefA), outR) >> 16U;
-
-        /*
-        -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] +
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
-        outI = __SMUADX(*__SIMD32(pSrc2)--, *__SIMD32(pCoefB));
-
-        /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
+      /* -pIn[2 * n - 2 * i] * pBTable[2 * i + 1] + pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+      outI = __SMUADX(read_q15x2_da (&pSrc2), read_q15x2((q15_t *) pCoefB));

+      /* pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] */
 #ifndef ARM_MATH_BIG_ENDIAN
-
-        outI = __SMLSDX(*__SIMD32(pCoefA), *__SIMD32(pSrc1)++, -outI);
-
+      outI = __SMLSDX(read_q15x2 ((q15_t *) pCoefA), read_q15x2_ia (&pSrc1), -outI);
 #else
+      outI = __SMLSDX(read_q15x2_ia (&pSrc1), read_q15x2 ((q15_t *) pCoefA), -outI);
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-        outI = __SMLSDX(*__SIMD32(pSrc1)++, *__SIMD32(pCoefA), -outI);
-
-#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
-        /* write output */
-
+      /* write output */
 #ifndef ARM_MATH_BIG_ENDIAN
-
-        *__SIMD32(pDst1)++ = __PKHBT(outR, (outI >> 16U), 16);
-
+      write_q15x2_ia (&pDst1, __PKHBT(outR, (outI >> 16U), 16));
 #else
+      write_q15x2_ia (&pDst1, __PKHBT((outI >> 16U), outR, 16));
+#endif /* #ifndef ARM_MATH_BIG_ENDIAN */

-        *__SIMD32(pDst1)++ = __PKHBT((outI >> 16U), outR, 16);

-#endif /*      #ifndef ARM_MATH_BIG_ENDIAN     */
+#else  /* #if defined (ARM_MATH_DSP) */

-        /* update coefficient pointer */
-        pCoefB = pCoefB + (2U * modifier);
-        pCoefA = pCoefA + (2U * modifier);
+      outR = *pSrc2 * *pCoefB;
+      outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
+      outR = outR + (*pSrc1 * *pCoefA);
+      outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;

-        i--;
-    }
-#else
-    /* Run the below code for Cortex-M0 */
-    i = fftLen;
+      outI = *(pSrc1 + 1) * *pCoefA;
+      outI = outI - (*pSrc1 * *(pCoefA + 1));
+      outI = outI - (*pSrc2 * *(pCoefB + 1));
+      outI = outI - (*(pSrc2 + 1) * *(pCoefB));

-    while (i > 0U)
-    {
-        /*
-        outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
-        */
+      /* update input pointers */
+      pSrc1 += 2U;
+      pSrc2 -= 2U;

-        outR = *pSrc2 * *pCoefB;
-        outR = outR - (*(pSrc2 + 1) * *(pCoefB + 1));
-        outR = outR + (*pSrc1 * *pCoefA);
-        outR = (outR + (*(pSrc1 + 1) * *(pCoefA + 1))) >> 16;
+      /* write output */
+      *pDst1++ = (q15_t) outR;
+      *pDst1++ = (q15_t) (outI >> 16);

-        /*
-        outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
-        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
-        */
-
-        outI = *(pSrc1 + 1) * *pCoefA;
-        outI = outI - (*pSrc1 * *(pCoefA + 1));
-        outI = outI - (*pSrc2 * *(pCoefB + 1));
-        outI = outI - (*(pSrc2 + 1) * *(pCoefB));
-
-        /* update input pointers */
-        pSrc1 += 2U;
-        pSrc2 -= 2U;
-
-        /* write output */
-        *pDst1++ = (q15_t) outR;
-        *pDst1++ = (q15_t) (outI >> 16);
-
-        /* update coefficient pointer */
-        pCoefB = pCoefB + (2U * modifier);
-        pCoefA = pCoefA + (2U * modifier);
-
-        i--;
-    }
 #endif /* #if defined (ARM_MATH_DSP) */
+
+      /* update coefficient pointer */
+      pCoefB = pCoefB + (2 * modifier);
+      pCoefA = pCoefA + (2 * modifier);
+
+      i--;
+  }
+
 }
--- a/DSP/Source/TransformFunctions/arm_rfft_q31.c
+++ b/DSP/Source/TransformFunctions/arm_rfft_q31.c
@ -3,13 +3,13 @@
 * Title:        arm_rfft_q31.c
 * Description:  FFT & RIFFT Q31 process function
 *
- * $Date:        27. January 2017
- * $Revision:    V.1.5.1
+ * $Date:        18. March 2019
+ * $Revision:    V1.6.0
 *
 * Target Processor: Cortex-M cores
 * -------------------------------------------------------------------- */
 /*
- * Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
+ * Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
 *
 * SPDX-License-Identifier: Apache-2.0
 *
@ -33,251 +33,260 @@
 * -------------------------------------------------------------------- */

 void arm_split_rfft_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    q31_t * pATable,
-    q31_t * pBTable,
-    q31_t * pDst,
-    uint32_t modifier);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pATable,
+  const q31_t * pBTable,
+        q31_t * pDst,
+        uint32_t modifier);

 void arm_split_rifft_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    q31_t * pATable,
-    q31_t * pBTable,
-    q31_t * pDst,
-    uint32_t modifier);
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pATable,
+  const q31_t * pBTable,
+        q31_t * pDst,
+        uint32_t modifier);

 /**
-* @addtogroup RealFFT
-* @{
-*/
+  @addtogroup RealFFT
+  @{
+ */

 /**
-* @brief Processing function for the Q31 RFFT/RIFFT.
-* @param[in]  *S    points to an instance of the Q31 RFFT/RIFFT structure.
-* @param[in]  *pSrc points to the input buffer.
-* @param[out] *pDst points to the output buffer.
-* @return none.
-*
-* \par Input an output formats:
-* \par
-* Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
-* Hence the output format is different for different RFFT sizes.
-* The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
-* \par
-* \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
-*
-* \par
-* \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
-*/
+  @brief         Processing function for the Q31 RFFT/RIFFT.
+  @param[in]     S     points to an instance of the Q31 RFFT/RIFFT structure
+  @param[in]     pSrc  points to input buffer
+  @param[out]    pDst  points to output buffer
+  @return        none
+
+  @par           Input an output formats
+                   Internally input is downscaled by 2 for every stage to avoid saturations inside CFFT/CIFFT process.
+                   Hence the output format is different for different RFFT sizes.
+                   The input and output formats for different RFFT sizes and number of bits to upscale are mentioned in the tables below for RFFT and RIFFT:
+  @par
+                   \image html RFFTQ31.gif "Input and Output Formats for Q31 RFFT"
+  @par
+                   \image html RIFFTQ31.gif "Input and Output Formats for Q31 RIFFT"
+ */
+
 void arm_rfft_q31(
-    const arm_rfft_instance_q31 * S,
-    q31_t * pSrc,
-    q31_t * pDst)
+  const arm_rfft_instance_q31 * S,
+        q31_t * pSrc,
+        q31_t * pDst)
 {
-    const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
-    uint32_t i;
-    uint32_t L2 = S->fftLenReal >> 1;
+  const arm_cfft_instance_q31 *S_CFFT = S->pCfft;
+        uint32_t L2 = S->fftLenReal >> 1U;
+        uint32_t i;

-    /* Calculation of RIFFT of input */
-    if (S->ifftFlagR == 1U)
-    {
-        /*  Real IFFT core process */
-        arm_split_rifft_q31(pSrc, L2, S->pTwiddleAReal,
-                            S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  /* Calculation of RIFFT of input */
+  if (S->ifftFlagR == 1U)
+  {
+     /*  Real IFFT core process */
+     arm_split_rifft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);

-        /* Complex IFFT process */
-        arm_cfft_q31(S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);
+     /* Complex IFFT process */
+     arm_cfft_q31 (S_CFFT, pDst, S->ifftFlagR, S->bitReverseFlagR);

-        for(i=0;i<S->fftLenReal;i++)
-        {
-            pDst[i] = pDst[i] << 1;
-        }
-    }
-    else
-    {
-        /* Calculation of RFFT of input */
+     for(i = 0; i < S->fftLenReal; i++)
+     {
+        pDst[i] = pDst[i] << 1U;
+     }
+  }
+  else
+  {
+     /* Calculation of RFFT of input */

-        /* Complex FFT process */
-        arm_cfft_q31(S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
+     /* Complex FFT process */
+     arm_cfft_q31 (S_CFFT, pSrc, S->ifftFlagR, S->bitReverseFlagR);
+
+     /*  Real FFT core process */
+     arm_split_rfft_q31 (pSrc, L2, S->pTwiddleAReal, S->pTwiddleBReal, pDst, S->twidCoefRModifier);
+  }

-        /*  Real FFT core process */
-        arm_split_rfft_q31(pSrc, L2, S->pTwiddleAReal,
-                            S->pTwiddleBReal, pDst, S->twidCoefRModifier);
-    }
 }

 /**
-* @} end of RealFFT group
-*/
+  @} end of RealFFT group
+ */

 /**
-* @brief  Core Real FFT process
-* @param[in]   *pSrc 				points to the input buffer.
-* @param[in]   fftLen  			    length of FFT.
-* @param[in]   *pATable 			points to the twiddle Coef A buffer.
-* @param[in]   *pBTable 			points to the twiddle Coef B buffer.
-* @param[out]  *pDst 				points to the output buffer.
-* @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
-*/
+  @brief         Core Real FFT process
+  @param[in]     pSrc      points to input buffer
+  @param[in]     fftLen    length of FFT
+  @param[in]     pATable   points to twiddle Coef A buffer
+  @param[in]     pBTable   points to twiddle Coef B buffer
+  @param[out]    pDst      points to output buffer
+  @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  @return        none
+ */
+
 void arm_split_rfft_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    q31_t * pATable,
-    q31_t * pBTable,
-    q31_t * pDst,
-    uint32_t modifier)
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pATable,
+  const q31_t * pBTable,
+        q31_t * pDst,
+        uint32_t modifier)
 {
-    uint32_t i;                                    /* Loop Counter */
-    q31_t outR, outI;                              /* Temporary variables for output */
-    q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
-    q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
-    q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[(4U * fftLen) - 1U];
-    q31_t *pIn1 = &pSrc[2], *pIn2 = &pSrc[(2U * fftLen) - 1U];
+        uint32_t i;                                    /* Loop Counter */
+        q31_t outR, outI;                              /* Temporary variables for output */
+  const q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+        q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
+        q31_t *pOut1 = &pDst[2], *pOut2 = &pDst[4 * fftLen - 1];
+        q31_t *pIn1 =  &pSrc[2], *pIn2 =  &pSrc[2 * fftLen - 1];

-    /* Init coefficient pointers */
-    pCoefA = &pATable[modifier * 2U];
-    pCoefB = &pBTable[modifier * 2U];
+  /* Init coefficient pointers */
+  pCoefA = &pATable[modifier * 2];
+  pCoefB = &pBTable[modifier * 2];

-    i = fftLen - 1U;
+  i = fftLen - 1U;

-    while (i > 0U)
-    {
-        /*
-        outR = (pSrc[2 * i] * pATable[2 * i] - pSrc[2 * i + 1] * pATable[2 * i + 1]
-        + pSrc[2 * n - 2 * i] * pBTable[2 * i] +
-        pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
-        */
+  while (i > 0U)
+  {
+     /*
+       outR = (  pSrc[2 * i]             * pATable[2 * i]
+               - pSrc[2 * i + 1]         * pATable[2 * i + 1]
+               + pSrc[2 * n - 2 * i]     * pBTable[2 * i]
+               + pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);

-        /* outI = (pIn[2 * i + 1] * pATable[2 * i] + pIn[2 * i] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]); */
+       outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+               + pIn[2 * i]             * pATable[2 * i + 1]
+               + pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+               - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+      */

-        CoefA1 = *pCoefA++;
-        CoefA2 = *pCoefA;
+     CoefA1 = *pCoefA++;
+     CoefA2 = *pCoefA;

-        /* outR = (pSrc[2 * i] * pATable[2 * i] */
-        mult_32x32_keep32_R(outR, *pIn1, CoefA1);
+     /* outR = (pSrc[2 * i] * pATable[2 * i] */
+     mult_32x32_keep32_R (outR, *pIn1, CoefA1);

-        /* outI = pIn[2 * i] * pATable[2 * i + 1] */
-        mult_32x32_keep32_R(outI, *pIn1++, CoefA2);
+     /* outI = pIn[2 * i] * pATable[2 * i + 1] */
+     mult_32x32_keep32_R (outI, *pIn1++, CoefA2);

-        /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
-        multSub_32x32_keep32_R(outR, *pIn1, CoefA2);
+     /* - pSrc[2 * i + 1] * pATable[2 * i + 1] */
+     multSub_32x32_keep32_R (outR, *pIn1, CoefA2);

-        /* (pIn[2 * i + 1] * pATable[2 * i] */
-        multAcc_32x32_keep32_R(outI, *pIn1++, CoefA1);
+     /* (pIn[2 * i + 1] * pATable[2 * i] */
+     multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);

-        /* pSrc[2 * n - 2 * i] * pBTable[2 * i]  */
-        multSub_32x32_keep32_R(outR, *pIn2, CoefA2);
-        CoefB1 = *pCoefB;
+     /* pSrc[2 * n - 2 * i] * pBTable[2 * i]  */
+     multSub_32x32_keep32_R (outR, *pIn2, CoefA2);
+     CoefB1 = *pCoefB;

-        /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
-        multSub_32x32_keep32_R(outI, *pIn2--, CoefB1);
+     /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
+     multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);

-        /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
-        multAcc_32x32_keep32_R(outR, *pIn2, CoefB1);
+     /* pSrc[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
+     multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);

-        /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
-        multSub_32x32_keep32_R(outI, *pIn2--, CoefA2);
+     /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+     multSub_32x32_keep32_R (outI, *pIn2--, CoefA2);

-        /* write output */
-        *pOut1++ = outR;
-        *pOut1++ = outI;
+     /* write output */
+     *pOut1++ = outR;
+     *pOut1++ = outI;

-        /* write complex conjugate output */
-        *pOut2-- = -outI;
-        *pOut2-- = outR;
+     /* write complex conjugate output */
+     *pOut2-- = -outI;
+     *pOut2-- = outR;

-        /* update coefficient pointer */
-        pCoefB = pCoefB + (modifier * 2U);
-        pCoefA = pCoefA + ((modifier * 2U) - 1U);
+     /* update coefficient pointer */
+     pCoefB = pCoefB + (2 * modifier);
+     pCoefA = pCoefA + (2 * modifier - 1);

-        i--;
-    }
-    pDst[2U * fftLen] = (pSrc[0] - pSrc[1]) >> 1;
-    pDst[(2U * fftLen) + 1U] = 0;
+     /* Decrement loop count */
+     i--;
+  }

-    pDst[0] = (pSrc[0] + pSrc[1]) >> 1;
-    pDst[1] = 0;
+  pDst[2 * fftLen]     = (pSrc[0] - pSrc[1]) >> 1U;
+  pDst[2 * fftLen + 1] = 0;
+
+  pDst[0] = (pSrc[0] + pSrc[1]) >> 1U;
+  pDst[1] = 0;
 }

+
 /**
-* @brief  Core Real IFFT process
-* @param[in]   *pSrc 				points to the input buffer.
-* @param[in]   fftLen  			    length of FFT.
-* @param[in]   *pATable 			points to the twiddle Coef A buffer.
-* @param[in]   *pBTable 			points to the twiddle Coef B buffer.
-* @param[out]  *pDst 				points to the output buffer.
-* @param[in]   modifier 	        twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table.
-* @return none.
-*/
+  @brief         Core Real IFFT process
+  @param[in]     pSrc      points to input buffer
+  @param[in]     fftLen    length of FFT
+  @param[in]     pATable   points to twiddle Coef A buffer
+  @param[in]     pBTable   points to twiddle Coef B buffer
+  @param[out]    pDst      points to output buffer
+  @param[in]     modifier  twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table
+  @return        none
+ */
+
 void arm_split_rifft_q31(
-    q31_t * pSrc,
-    uint32_t fftLen,
-    q31_t * pATable,
-    q31_t * pBTable,
-    q31_t * pDst,
-    uint32_t modifier)
-{
-    q31_t outR, outI;                              /* Temporary variables for output */
-    q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
-    q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
-    q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[(2U * fftLen) + 1U];
+        q31_t * pSrc,
+        uint32_t fftLen,
+  const q31_t * pATable,
+  const q31_t * pBTable,
+        q31_t * pDst,
+        uint32_t modifier)
+{       
+        q31_t outR, outI;                              /* Temporary variables for output */
+  const q31_t *pCoefA, *pCoefB;                        /* Temporary pointers for twiddle factors */
+        q31_t CoefA1, CoefA2, CoefB1;                  /* Temporary variables for twiddle coefficients */
+        q31_t *pIn1 = &pSrc[0], *pIn2 = &pSrc[2 * fftLen + 1];

-    pCoefA = &pATable[0];
-    pCoefB = &pBTable[0];
+  pCoefA = &pATable[0];
+  pCoefB = &pBTable[0];

-    while (fftLen > 0U)
-    {
-        /*
-        outR = (pIn[2 * i] * pATable[2 * i] + pIn[2 * i + 1] * pATable[2 * i + 1] +
-        pIn[2 * n - 2 * i] * pBTable[2 * i] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);
+  while (fftLen > 0U)
+  {
+     /*
+       outR = (  pIn[2 * i]             * pATable[2 * i]
+               + pIn[2 * i + 1]         * pATable[2 * i + 1]
+               + pIn[2 * n - 2 * i]     * pBTable[2 * i]
+               - pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1]);

-        outI = (pIn[2 * i + 1] * pATable[2 * i] - pIn[2 * i] * pATable[2 * i + 1] -
-        pIn[2 * n - 2 * i] * pBTable[2 * i + 1] -
-        pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
-        */
-        CoefA1 = *pCoefA++;
-        CoefA2 = *pCoefA;
+       outI = (  pIn[2 * i + 1]         * pATable[2 * i]
+               - pIn[2 * i]             * pATable[2 * i + 1]
+               - pIn[2 * n - 2 * i]     * pBTable[2 * i + 1]
+               - pIn[2 * n - 2 * i + 1] * pBTable[2 * i]);
+      */

-        /* outR = (pIn[2 * i] * pATable[2 * i] */
-        mult_32x32_keep32_R(outR, *pIn1, CoefA1);
+     CoefA1 = *pCoefA++;
+     CoefA2 = *pCoefA;

-        /* - pIn[2 * i] * pATable[2 * i + 1] */
-        mult_32x32_keep32_R(outI, *pIn1++, -CoefA2);
+     /* outR = (pIn[2 * i] * pATable[2 * i] */
+     mult_32x32_keep32_R (outR, *pIn1, CoefA1);

-        /* pIn[2 * i + 1] * pATable[2 * i + 1] */
-        multAcc_32x32_keep32_R(outR, *pIn1, CoefA2);
+     /* - pIn[2 * i] * pATable[2 * i + 1] */
+     mult_32x32_keep32_R (outI, *pIn1++, -CoefA2);

-        /* pIn[2 * i + 1] * pATable[2 * i] */
-        multAcc_32x32_keep32_R(outI, *pIn1++, CoefA1);
+     /* pIn[2 * i + 1] * pATable[2 * i + 1] */
+     multAcc_32x32_keep32_R (outR, *pIn1, CoefA2);

-        /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
-        multAcc_32x32_keep32_R(outR, *pIn2, CoefA2);
-        CoefB1 = *pCoefB;
+     /* pIn[2 * i + 1] * pATable[2 * i] */
+     multAcc_32x32_keep32_R (outI, *pIn1++, CoefA1);

-        /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
-        multSub_32x32_keep32_R(outI, *pIn2--, CoefB1);
+     /* pIn[2 * n - 2 * i] * pBTable[2 * i] */
+     multAcc_32x32_keep32_R (outR, *pIn2, CoefA2);
+     CoefB1 = *pCoefB;

-        /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
-        multAcc_32x32_keep32_R(outR, *pIn2, CoefB1);
+     /* pIn[2 * n - 2 * i] * pBTable[2 * i + 1] */
+     multSub_32x32_keep32_R (outI, *pIn2--, CoefB1);

-        /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
-        multAcc_32x32_keep32_R(outI, *pIn2--, CoefA2);
+     /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i + 1] */
+     multAcc_32x32_keep32_R (outR, *pIn2, CoefB1);

-        /* write output */
-        *pDst++ = outR;
-        *pDst++ = outI;
+     /* pIn[2 * n - 2 * i + 1] * pBTable[2 * i] */
+     multAcc_32x32_keep32_R (outI, *pIn2--, CoefA2);

-        /* update coefficient pointer */
-        pCoefB = pCoefB + (modifier * 2U);
-        pCoefA = pCoefA + ((modifier * 2U) - 1U);
+     /* write output */
+     *pDst++ = outR;
+     *pDst++ = outI;
+
+     /* update coefficient pointer */
+     pCoefB = pCoefB + (modifier * 2);
+     pCoefA = pCoefA + (modifier * 2 - 1);
+
+     /* Decrement loop count */
+     fftLen--;
+  }

-        /* Decrement loop count */
-        fftLen--;
-    }
 }