Official ARM version: v5.6.0
This commit is contained in:
parent
9f95ff5b6b
commit
96d6da4e25
2939 changed files with 339304 additions and 113320 deletions
16
DSP/Source/SupportFunctions/CMakeLists.txt
Normal file
16
DSP/Source/SupportFunctions/CMakeLists.txt
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
cmake_minimum_required (VERSION 3.6)
|
||||
|
||||
project(CMSISDSPSupport)
|
||||
|
||||
|
||||
file(GLOB SRC "./*_*.c")
|
||||
|
||||
add_library(CMSISDSPSupport STATIC ${SRC})
|
||||
|
||||
configdsp(CMSISDSPSupport ..)
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPSupport PUBLIC "${DSP}/../../Include")
|
||||
|
||||
|
||||
|
||||
48
DSP/Source/SupportFunctions/SupportFunctions.c
Normal file
48
DSP/Source/SupportFunctions/SupportFunctions.c
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: SupportFunctions.c
|
||||
* Description: Combination of all support function source files.
|
||||
*
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.0.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_copy_f32.c"
|
||||
#include "arm_copy_q15.c"
|
||||
#include "arm_copy_q31.c"
|
||||
#include "arm_copy_q7.c"
|
||||
#include "arm_fill_f32.c"
|
||||
#include "arm_fill_q15.c"
|
||||
#include "arm_fill_q31.c"
|
||||
#include "arm_fill_q7.c"
|
||||
#include "arm_float_to_q15.c"
|
||||
#include "arm_float_to_q31.c"
|
||||
#include "arm_float_to_q7.c"
|
||||
#include "arm_q15_to_float.c"
|
||||
#include "arm_q15_to_q31.c"
|
||||
#include "arm_q15_to_q7.c"
|
||||
#include "arm_q31_to_float.c"
|
||||
#include "arm_q31_to_q15.c"
|
||||
#include "arm_q31_to_q7.c"
|
||||
#include "arm_q7_to_float.c"
|
||||
#include "arm_q7_to_q15.c"
|
||||
#include "arm_q7_to_q31.c"
|
||||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_copy_f32.c
|
||||
* Description: Copies the elements of a floating-point vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,66 +29,56 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup copy Vector Copy
|
||||
*
|
||||
* Copies sample by sample from source vector to destination vector.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = pSrc[n]; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
@defgroup copy Vector Copy
|
||||
|
||||
Copies sample by sample from source vector to destination vector.
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrc[n]; 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup copy
|
||||
* @{
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Copies the elements of a floating-point vector.
|
||||
* @param[in] *pSrc points to input vector
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Copies the elements of a floating-point vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_copy_f32(
|
||||
float32_t * pSrc,
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
float32x4_t inV;
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the results in the destination buffer */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = in1;
|
||||
*pDst++ = in2;
|
||||
*pDst++ = in3;
|
||||
*pDst++ = in4;
|
||||
inV = vld1q_f32(pSrc);
|
||||
vst1q_f32(pDst, inV);
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
|
@ -96,16 +86,7 @@ void arm_copy_f32(
|
|||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
|
@ -117,7 +98,55 @@ void arm_copy_f32(
|
|||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_copy_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
/**
|
||||
* @} end of BasicCopy group
|
||||
@} end of BasicCopy group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_copy_q15.c
|
||||
* Description: Copies the elements of a Q15 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,74 +29,68 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup copy
|
||||
* @{
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Copies the elements of a Q15 vector.
|
||||
* @param[in] *pSrc points to input vector
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Copies the elements of a Q15 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_copy_q15(
|
||||
q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Read two inputs */
|
||||
*__SIMD32(pDst)++ = *__SIMD32(pSrc)++;
|
||||
*__SIMD32(pDst)++ = *__SIMD32(pSrc)++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* read 2 times 2 samples at a time */
|
||||
write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
|
||||
write_q15x2_ia (&pDst, read_q15x2_ia ((q15_t **) &pSrc));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the value in the destination buffer */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicCopy group
|
||||
@} end of BasicCopy group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_copy_q31.c
|
||||
* Description: Copies the elements of a Q31 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,83 +29,70 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup copy
|
||||
* @{
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Copies the elements of a Q31 vector.
|
||||
* @param[in] *pSrc points to input vector
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Copies the elements of a Q31 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_copy_q31(
|
||||
q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the values in the destination buffer */
|
||||
in1 = *pSrc++;
|
||||
in2 = *pSrc++;
|
||||
in3 = *pSrc++;
|
||||
in4 = *pSrc++;
|
||||
|
||||
*pDst++ = in1;
|
||||
*pDst++ = in2;
|
||||
*pDst++ = in3;
|
||||
*pDst++ = in4;
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the value in the destination buffer */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicCopy group
|
||||
@} end of BasicCopy group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_copy_q7.c
|
||||
* Description: Copies the elements of a Q7 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,75 +29,67 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup copy
|
||||
* @{
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Copies the elements of a Q7 vector.
|
||||
* @param[in] *pSrc points to input vector
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Copies the elements of a Q7 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_copy_q7(
|
||||
q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the results in the destination buffer */
|
||||
/* 4 samples are copied and stored at a time using SIMD */
|
||||
*__SIMD32(pDst)++ = *__SIMD32(pSrc)++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* read 4 samples at a time */
|
||||
write_q7x4_ia (&pDst, read_q7x4_ia ((q7_t **) &pSrc));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the results in the destination buffer */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of BasicCopy group
|
||||
@} end of BasicCopy group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_fill_f32.c
|
||||
* Description: Fills a constant value into a floating-point vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,36 +29,35 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup Fill Vector Fill
|
||||
*
|
||||
* Fills the destination vector with a constant value.
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = value; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
* There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
@defgroup Fill Vector Fill
|
||||
|
||||
Fills the destination vector with a constant value.
|
||||
|
||||
<pre>
|
||||
pDst[n] = value; 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Fill
|
||||
* @{
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Fills a constant value into a floating-point vector.
|
||||
* @param[in] value input value to be filled
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the output vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Fills a constant value into a floating-point vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_fill_f32(
|
||||
float32_t value,
|
||||
float32_t * pDst,
|
||||
|
|
@ -66,27 +65,19 @@ void arm_fill_f32(
|
|||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
float32_t in1 = value;
|
||||
float32_t in2 = value;
|
||||
float32_t in3 = value;
|
||||
float32_t in4 = value;
|
||||
float32x4_t inV = vdupq_n_f32(value);
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
*pDst++ = in1;
|
||||
*pDst++ = in2;
|
||||
*pDst++ = in3;
|
||||
*pDst++ = in4;
|
||||
vst1q_f32(pDst, inV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
|
@ -94,17 +85,7 @@ void arm_fill_f32(
|
|||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
|
@ -116,7 +97,55 @@ void arm_fill_f32(
|
|||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_fill_f32(
|
||||
float32_t value,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
/**
|
||||
* @} end of Fill group
|
||||
@} end of Fill group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_fill_q15.c
|
||||
* Description: Fills a constant value into a Q15 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,21 +29,20 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Fill
|
||||
* @{
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Fills a constant value into a Q15 vector.
|
||||
* @param[in] value input value to be filled
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the output vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Fills a constant value into a Q15 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_fill_q15(
|
||||
|
|
@ -51,58 +50,51 @@ void arm_fill_q15(
|
|||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
q31_t packedValue; /* value packed to 32 bits */
|
||||
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Packing two 16 bit values to 32 bit value in order to use SIMD */
|
||||
packedValue = __PKHBT(value, value, 16U);
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
*__SIMD32(pDst)++ = packedValue;
|
||||
*__SIMD32(pDst)++ = packedValue;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* fill 2 times 2 samples at a time */
|
||||
write_q15x2_ia (&pDst, packedValue);
|
||||
write_q15x2_ia (&pDst, packedValue);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Fill group
|
||||
@} end of Fill group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_fill_q31.c
|
||||
* Description: Fills a constant value into a Q31 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,21 +29,20 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Fill
|
||||
* @{
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Fills a constant value into a Q31 vector.
|
||||
* @param[in] value input value to be filled
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the output vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Fills a constant value into a Q31 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_fill_q31(
|
||||
|
|
@ -51,59 +50,49 @@ void arm_fill_q31(
|
|||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1 = value;
|
||||
q31_t in2 = value;
|
||||
q31_t in3 = value;
|
||||
q31_t in4 = value;
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
*pDst++ = in1;
|
||||
*pDst++ = in2;
|
||||
*pDst++ = in3;
|
||||
*pDst++ = in4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Fill group
|
||||
@} end of Fill group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_fill_q7.c
|
||||
* Description: Fills a constant value into a Q7 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,21 +29,20 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup Fill
|
||||
* @{
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Fills a constant value into a Q7 vector.
|
||||
* @param[in] value input value to be filled
|
||||
* @param[out] *pDst points to output vector
|
||||
* @param[in] blockSize length of the output vector
|
||||
* @return none.
|
||||
*
|
||||
@brief Fills a constant value into a Q7 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
|
||||
void arm_fill_q7(
|
||||
|
|
@ -51,56 +50,50 @@ void arm_fill_q7(
|
|||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
q31_t packedValue; /* value packed to 32 bits */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Packing four 8 bit values to 32 bit value in order to use SIMD */
|
||||
packedValue = __PACKq7(value, value, value, value);
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
*__SIMD32(pDst)++ = packedValue;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* fill 4 samples at a time */
|
||||
write_q7x4_ia (&pDst, packedValue);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of Fill group
|
||||
@} end of Fill group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_float_to_q15.c
|
||||
* Description: Converts the elements of the floating-point vector to Q15 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,95 +29,92 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup float_to_x
|
||||
* @{
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the floating-point vector to Q15 vector.
|
||||
* @param[in] *pSrc points to the floating-point input vector
|
||||
* @param[out] *pDst points to the Q15 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
* \par
|
||||
* The equation used for the conversion process is:
|
||||
* <pre>
|
||||
* pDst[n] = (q15_t)(pSrc[n] * 32768); 0 <= n < blockSize.
|
||||
* </pre>
|
||||
* \par Scaling and Overflow Behavior:
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||
* \note
|
||||
* In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
* defined in the preprocessor section of project options.
|
||||
*
|
||||
@brief Converts the elements of the floating-point vector to Q15 vector.
|
||||
@param[in] pSrc points to the floating-point input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t)(pSrc[n] * 32768); 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
|
||||
@note
|
||||
In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
defined in the preprocessor section of project options.
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_float_to_q15(
|
||||
float32_t * pSrc,
|
||||
const float32_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t *pIn = pSrc; /* Src pointer */
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
float32_t in;
|
||||
float32x4_t inV;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32x4_t zeroV = vdupq_n_f32(0.0f);
|
||||
float32x4_t pHalf = vdupq_n_f32(0.5f / 32768.0f);
|
||||
float32x4_t mHalf = vdupq_n_f32(-0.5f / 32768.0f);
|
||||
float32x4_t r;
|
||||
uint32x4_t cmp;
|
||||
#endif
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
int32x4_t cvt;
|
||||
int16x4_t outV;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
pIn += 4;
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
cvt = vcvtq_n_s32_f32(inV,15);
|
||||
outV = vqmovn_s32(cvt);
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
vst1_s16(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
|
||||
cvt = vcvtq_n_s32_f32(inV,15);
|
||||
outV = vqmovn_s32(cvt);
|
||||
|
||||
vst1_s16(pDst, outV);
|
||||
pDst += 4;
|
||||
pIn += 4;
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
|
|
@ -127,14 +124,14 @@ void arm_float_to_q15(
|
|||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
|
|
@ -143,7 +140,7 @@ void arm_float_to_q15(
|
|||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
|
@ -151,42 +148,97 @@ void arm_float_to_q15(
|
|||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_float_to_q15(
|
||||
const float32_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const float32_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0 ? 0.5f : -0.5f;
|
||||
|
||||
/* convert from float to Q15 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 32768 */
|
||||
|
||||
/* convert from float to Q15 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
/**
|
||||
* @} end of float_to_x group
|
||||
@} end of float_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_float_to_q31.c
|
||||
* Description: Converts the elements of the floating-point vector to Q31 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -37,56 +37,56 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup float_to_x
|
||||
* @{
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the floating-point vector to Q31 vector.
|
||||
* @param[in] *pSrc points to the floating-point input vector
|
||||
* @param[out] *pDst points to the Q31 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
*\par Description:
|
||||
* \par
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q31_t)(pSrc[n] * 2147483648); 0 <= n < blockSize.
|
||||
* </pre>
|
||||
* <b>Scaling and Overflow Behavior:</b>
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||
*
|
||||
* \note In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
* defined in the preprocessor section of project options.
|
||||
@brief Converts the elements of the floating-point vector to Q31 vector.
|
||||
@param[in] pSrc points to the floating-point input vector
|
||||
@param[out] pDst points to the Q31 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q31_t)(pSrc[n] * 2147483648); 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
|
||||
|
||||
@note
|
||||
In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
defined in the preprocessor section of project options.
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_float_to_q31(
|
||||
float32_t * pSrc,
|
||||
const float32_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t *pIn = pSrc; /* Src pointer */
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
float32_t in;
|
||||
float32x4_t inV;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32x4_t zeroV = vdupq_n_f32(0.0f);
|
||||
float32x4_t pHalf = vdupq_n_f32(0.5f / 2147483648.0f);
|
||||
float32x4_t mHalf = vdupq_n_f32(-0.5f / 2147483648.0f);
|
||||
float32x4_t r;
|
||||
uint32x4_t cmp;
|
||||
#endif
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
int32x4_t outV;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
|
@ -94,35 +94,30 @@ void arm_float_to_q31(
|
|||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
pIn += 4;
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
outV = vcvtq_n_s32_f32(inV,31);
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
vst1q_s32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
|
||||
outV = vcvtq_n_s32_f32(inV,31);
|
||||
|
||||
vst1q_s32(pDst, outV);
|
||||
pDst += 4;
|
||||
pIn += 4;
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
|
|
@ -132,7 +127,7 @@ void arm_float_to_q31(
|
|||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
|
@ -140,7 +135,7 @@ void arm_float_to_q31(
|
|||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
|
|
@ -149,7 +144,7 @@ void arm_float_to_q31(
|
|||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
|
@ -159,41 +154,99 @@ void arm_float_to_q31(
|
|||
}
|
||||
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_float_to_q31(
|
||||
const float32_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const float32_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 2147483648 */
|
||||
|
||||
/* convert from float to Q31 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0 ? 0.5f : -0.5f;
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 2147483648 */
|
||||
|
||||
/* convert from float to Q31 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
/**
|
||||
* @} end of float_to_x group
|
||||
@} end of float_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_float_to_q7.c
|
||||
* Description: Converts the elements of the floating-point vector to Q7 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,12 +29,12 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup float_to_x
|
||||
* @{
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -59,65 +59,73 @@
|
|||
* defined in the preprocessor section of project options.
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_float_to_q7(
|
||||
float32_t * pSrc,
|
||||
const float32_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t *pIn = pSrc; /* Src pointer */
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
float32_t in;
|
||||
float32x4_t inV;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32x4_t zeroV = vdupq_n_f32(0.0f);
|
||||
float32x4_t pHalf = vdupq_n_f32(0.5f / 128.0f);
|
||||
float32x4_t mHalf = vdupq_n_f32(-0.5f / 128.0f);
|
||||
float32x4_t r;
|
||||
uint32x4_t cmp;
|
||||
#endif
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
int32x4_t cvt;
|
||||
int16x4_t cvt1,cvt2;
|
||||
int8x8_t outV;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
/* Compute 8 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 7 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 128 */
|
||||
/* convert from float to q7 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = *pIn++;
|
||||
in = (in * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
|
||||
vst1_s8(pDst, outV);
|
||||
pDst += 8;
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 128 */
|
||||
/* convert from float to q7 and then store the results in the destination buffer */
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
inV = vld1q_f32(pIn);
|
||||
cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
|
||||
|
||||
vst1_s8(pDst, outV);
|
||||
pDst += 8;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
|
|
@ -126,14 +134,14 @@ void arm_float_to_q7(
|
|||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 128 */
|
||||
/* convert from float to q7 and then store the results in the destination buffer */
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
|
|
@ -142,7 +150,7 @@ void arm_float_to_q7(
|
|||
#else
|
||||
|
||||
/* C = A * 128 */
|
||||
/* convert from float to q7 and then store the results in the destination buffer */
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
|
@ -151,41 +159,95 @@ void arm_float_to_q7(
|
|||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_float_to_q7(
|
||||
const float32_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const float32_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 128 */
|
||||
/* convert from float to q7 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 128.0f);
|
||||
in += in > 0 ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q31_t) (in), 8));
|
||||
|
||||
/* Convert from float to q7 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 128 */
|
||||
/* convert from float to q7 and then store the results in the destination buffer */
|
||||
*pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 128 */
|
||||
|
||||
/* Convert from float to q7 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
/**
|
||||
* @} end of float_to_x group
|
||||
@} end of float_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q15_to_float.c
|
||||
* Description: Converts the elements of the Q15 vector to floating-point vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -37,74 +37,67 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q15_to_x
|
||||
* @{
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q15 vector to floating-point vector.
|
||||
* @param[in] *pSrc points to the Q15 input vector
|
||||
* @param[out] *pDst points to the floating-point output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (float32_t) pSrc[n] / 32768; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
@brief Converts the elements of the Q15 vector to floating-point vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the floating-point output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float32_t) pSrc[n] / 32768; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_q15_to_float(
|
||||
q15_t * pSrc,
|
||||
const q15_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q15_t *pIn = pSrc; /* Src pointer */
|
||||
const q15_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
int16x8_t inV;
|
||||
int32x4_t inV0, inV1;
|
||||
float32x4_t outV;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
/* Compute 8 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 7 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
/* convert from q15 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
inV = vld1q_s16(pIn);
|
||||
pIn += 8;
|
||||
|
||||
inV0 = vmovl_s16(vget_low_s16(inV));
|
||||
inV1 = vmovl_s16(vget_high_s16(inV));
|
||||
|
||||
outV = vcvtq_n_f32_s32(inV0,15);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inV1,15);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
/* If the blockSize is not a multiple of 8, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
|
@ -116,7 +109,58 @@ void arm_q15_to_float(
|
|||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q15_to_float(
|
||||
const q15_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pIn++ / 32768.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
/**
|
||||
* @} end of q15_to_x group
|
||||
@} end of q15_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q15_to_q31.c
|
||||
* Description: Converts the elements of the Q15 vector to Q31 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,57 +29,53 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q15_to_x
|
||||
* @{
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q15 vector to Q31 vector.
|
||||
* @param[in] *pSrc points to the Q15 input vector
|
||||
* @param[out] *pDst points to the Q31 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q31_t) pSrc[n] << 16; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@brief Converts the elements of the Q15 vector to Q31 vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the Q31 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q31_t) pSrc[n] << 16; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
void arm_q15_to_q31(
|
||||
q15_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q15_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q15_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2, out3, out4;
|
||||
#endif
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2, out3, out4;
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t)A << 16 */
|
||||
/* convert from q15 to q31 and then store the results in the destination buffer */
|
||||
in1 = *__SIMD32(pIn)++;
|
||||
in2 = *__SIMD32(pIn)++;
|
||||
|
||||
/* Convert from q15 to q31 and store result in destination buffer */
|
||||
in1 = read_q15x2_ia ((q15_t **) &pIn);
|
||||
in2 = read_q15x2_ia ((q15_t **) &pIn);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
|
|
@ -103,42 +99,40 @@ void arm_q15_to_q31(
|
|||
/* extract lower 16 bits to 32 bit result */
|
||||
out4 = in2 << 16U;
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
*pDst++ = out1;
|
||||
*pDst++ = out2;
|
||||
*pDst++ = out3;
|
||||
*pDst++ = out4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t)A << 16 */
|
||||
/* convert from q15 to q31 and then store the results in the destination buffer */
|
||||
*pDst++ = (q31_t) * pIn++ << 16;
|
||||
/* C = (q31_t) A << 16 */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Convert from q15 to q31 and store result in destination buffer */
|
||||
*pDst++ = (q31_t) *pIn++ << 16;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of q15_to_x group
|
||||
@} end of q15_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q15_to_q7.c
|
||||
* Description: Converts the elements of the Q15 vector to Q7 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,58 +29,55 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q15_to_x
|
||||
* @{
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q15 vector to Q7 vector.
|
||||
* @param[in] *pSrc points to the Q15 input vector
|
||||
* @param[out] *pDst points to the Q7 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q7_t) pSrc[n] >> 8; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@brief Converts the elements of the Q15 vector to Q7 vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the Q7 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q7_t) pSrc[n] >> 8; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
void arm_q15_to_q7(
|
||||
q15_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q15_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q15_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2;
|
||||
#endif
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2;
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 8 */
|
||||
/* convert from q15 to q7 and then store the results in the destination buffer */
|
||||
in1 = *__SIMD32(pIn)++;
|
||||
in2 = *__SIMD32(pIn)++;
|
||||
|
||||
/* Convert from q15 to q7 and store result in destination buffer */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
in1 = read_q15x2_ia ((q15_t **) &pIn);
|
||||
in2 = read_q15x2_ia ((q15_t **) &pIn);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
|
|
@ -92,7 +89,7 @@ void arm_q15_to_q7(
|
|||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHBT(in1, in2, 16);
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* rotate packed value by 24 */
|
||||
out2 = ((uint32_t) out2 << 8) | ((uint32_t) out2 >> 24);
|
||||
|
|
@ -106,37 +103,44 @@ void arm_q15_to_q7(
|
|||
out1 = out1 | out2;
|
||||
|
||||
/* store 4 samples at a time to destiantion buffer */
|
||||
*__SIMD32(pDst)++ = out1;
|
||||
write_q7x4_ia (&pDst, out1);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
#else
|
||||
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 8 */
|
||||
/* convert from q15 to q7 and then store the results in the destination buffer */
|
||||
|
||||
/* Convert from q15 to q7 and store result in destination buffer */
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of q15_to_x group
|
||||
@} end of q15_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q31_to_float.c
|
||||
* Description: Converts the elements of the Q31 vector to floating-point vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -37,54 +37,51 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q31_to_x
|
||||
* @{
|
||||
@addtogroup q31_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q31 vector to floating-point vector.
|
||||
* @param[in] *pSrc points to the Q31 input vector
|
||||
* @param[out] *pDst points to the floating-point output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (float32_t) pSrc[n] / 2147483648; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
@brief Converts the elements of the Q31 vector to floating-point vector.
|
||||
@param[in] pSrc points to the Q31 input vector
|
||||
@param[out] pDst points to the floating-point output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float32_t) pSrc[n] / 2147483648; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_q31_to_float(
|
||||
q31_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q31_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t *pIn = pSrc; /* Src pointer */
|
||||
const q31_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
int32x4_t inV;
|
||||
float32x4_t outV;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
/* convert from q31 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
|
||||
/* Convert from q31 to float and then store the results in the destination buffer */
|
||||
inV = vld1q_s32(pIn);
|
||||
pIn += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inV,31);
|
||||
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
|
|
@ -92,28 +89,71 @@ void arm_q31_to_float(
|
|||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
/* convert from q31 to float and then store the results in the destination buffer */
|
||||
/* Convert from q31 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q31_to_float(
|
||||
const q31_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const q31_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
|
||||
/* Convert from q31 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
|
||||
/* Convert from q31 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
/**
|
||||
* @} end of q31_to_x group
|
||||
@} end of q31_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q31_to_q15.c
|
||||
* Description: Converts the elements of the Q31 vector to Q15 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,55 +29,53 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q31_to_x
|
||||
* @{
|
||||
@addtogroup q31_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q31 vector to Q15 vector.
|
||||
* @param[in] *pSrc points to the Q31 input vector
|
||||
* @param[out] *pDst points to the Q15 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q15_t) pSrc[n] >> 16; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@brief Converts the elements of the Q31 vector to Q15 vector.
|
||||
@param[in] pSrc points to the Q31 input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t) pSrc[n] >> 16; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
void arm_q31_to_q15(
|
||||
q31_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q31_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q31_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t out1, out2;
|
||||
#endif
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t out1, out2;
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A >> 16 */
|
||||
/* convert from q31 to q15 and then store the results in the destination buffer */
|
||||
/* C = (q15_t) (A >> 16) */
|
||||
|
||||
/* Convert from q31 to q15 and store result in destination buffer */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
in1 = *pIn++;
|
||||
in2 = *pIn++;
|
||||
in3 = *pIn++;
|
||||
|
|
@ -85,49 +83,52 @@ void arm_q31_to_q15(
|
|||
|
||||
/* pack two higher 16-bit values from two 32-bit values */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
out1 = __PKHTB(in2, in1, 16);
|
||||
out2 = __PKHTB(in4, in3, 16);
|
||||
#else
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHTB(in3, in4, 16);
|
||||
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2_ia (&pDst, out1);
|
||||
write_q15x2_ia (&pDst, out2);
|
||||
|
||||
#else
|
||||
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHTB(in3, in4, 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
|
||||
#endif // #ifdef ARM_MATH_BIG_ENDIAN
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
*__SIMD32(pDst)++ = out1;
|
||||
*__SIMD32(pDst)++ = out2;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A >> 16 */
|
||||
/* convert from q31 to q15 and then store the results in the destination buffer */
|
||||
/* C = (q15_t) (A >> 16) */
|
||||
|
||||
/* Convert from q31 to q15 and store result in destination buffer */
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of q31_to_x group
|
||||
@} end of q31_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q31_to_q7.c
|
||||
* Description: Converts the elements of the Q31 vector to Q7 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,96 +29,82 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q31_to_x
|
||||
* @{
|
||||
@addtogroup q31_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q31 vector to Q7 vector.
|
||||
* @param[in] *pSrc points to the Q31 input vector
|
||||
* @param[out] *pDst points to the Q7 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q7_t) pSrc[n] >> 24; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@brief Converts the elements of the Q31 vector to Q7 vector.
|
||||
@param[in] pSrc points to the Q31 input vector
|
||||
@param[out] pDst points to the Q7 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q7_t) pSrc[n] >> 24; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
void arm_q31_to_q7(
|
||||
q31_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q31_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q31_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q31_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
q31_t in1, in2, in3, in4;
|
||||
q7_t out1, out2, out3, out4;
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 24 */
|
||||
/* convert from q31 to q7 and then store the results in the destination buffer */
|
||||
in1 = *pIn++;
|
||||
in2 = *pIn++;
|
||||
in3 = *pIn++;
|
||||
in4 = *pIn++;
|
||||
/* C = (q7_t) (A >> 24) */
|
||||
|
||||
out1 = (q7_t) (in1 >> 24);
|
||||
out2 = (q7_t) (in2 >> 24);
|
||||
out3 = (q7_t) (in3 >> 24);
|
||||
out4 = (q7_t) (in4 >> 24);
|
||||
/* Convert from q31 to q7 and store result in destination buffer */
|
||||
|
||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||
out1 = (q7_t) (*pIn++ >> 24);
|
||||
out2 = (q7_t) (*pIn++ >> 24);
|
||||
out3 = (q7_t) (*pIn++ >> 24);
|
||||
out4 = (q7_t) (*pIn++ >> 24);
|
||||
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 24 */
|
||||
/* convert from q31 to q7 and then store the results in the destination buffer */
|
||||
/* C = (q7_t) (A >> 24) */
|
||||
|
||||
/* Convert from q31 to q7 and store result in destination buffer */
|
||||
*pDst++ = (q7_t) (*pIn++ >> 24);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of q31_to_x group
|
||||
@} end of q31_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q7_to_float.c
|
||||
* Description: Converts the elements of the Q7 vector to floating-point vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,7 +29,7 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
|
|
@ -37,83 +37,143 @@
|
|||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q7_to_x
|
||||
* @{
|
||||
@addtogroup q7_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q7 vector to floating-point vector.
|
||||
* @param[in] *pSrc points to the Q7 input vector
|
||||
* @param[out] *pDst points to the floating-point output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (float32_t) pSrc[n] / 128; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
@brief Converts the elements of the Q7 vector to floating-point vector.
|
||||
@param[in] pSrc points to the Q7 input vector
|
||||
@param[out] pDst points to the floating-point output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float32_t) pSrc[n] / 128; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_q7_to_float(
|
||||
q7_t * pSrc,
|
||||
const q7_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q7_t *pIn = pSrc; /* Src pointer */
|
||||
const q7_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
int8x16_t inV;
|
||||
int16x8_t inVLO, inVHI;
|
||||
int32x4_t inVLL, inVLH, inVHL, inVHH;
|
||||
float32x4_t outV;
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
blkCnt = blockSize >> 4U;
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
/* Compute 16 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 15 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
/* convert from q7 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
/* Convert from q7 to float and then store the results in the destination buffer */
|
||||
inV = vld1q_s8(pIn);
|
||||
pIn += 16;
|
||||
|
||||
inVLO = vmovl_s8(vget_low_s8(inV));
|
||||
inVHI = vmovl_s8(vget_high_s8(inV));
|
||||
|
||||
inVLL = vmovl_s16(vget_low_s16(inVLO));
|
||||
inVLH = vmovl_s16(vget_high_s16(inVLO));
|
||||
inVHL = vmovl_s16(vget_low_s16(inVHI));
|
||||
inVHH = vmovl_s16(vget_high_s16(inVHI));
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVLL,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVLH,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVHL,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVHH,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
/* If the blockSize is not a multiple of 16, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
blkCnt = blockSize & 0xF;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
/* convert from q7 to float and then store the results in the destination buffer */
|
||||
/* Convert from q7 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q7_to_float(
|
||||
const q7_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q7_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
|
||||
/* Convert from q7 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
|
||||
/* Convert from q7 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
|
||||
/**
|
||||
* @} end of q7_to_x group
|
||||
@} end of q7_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q7_to_q15.c
|
||||
* Description: Converts the elements of the Q7 vector to Q15 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,60 +29,55 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q7_to_x
|
||||
* @{
|
||||
@addtogroup q7_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q7 vector to Q15 vector.
|
||||
* @param[in] *pSrc points to the Q7 input vector
|
||||
* @param[out] *pDst points to the Q15 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q15_t) pSrc[n] << 8; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@brief Converts the elements of the Q7 vector to Q15 vector.
|
||||
@param[in] pSrc points to the Q7 input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t) pSrc[n] << 8; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
void arm_q7_to_q15(
|
||||
q7_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q7_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q7_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q7_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
q31_t in;
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2;
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
|
||||
q31_t in;
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2;
|
||||
#endif
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A << 8 */
|
||||
/* convert from q7 to q15 and then store the results in the destination buffer */
|
||||
in = *__SIMD32(pIn)++;
|
||||
|
||||
/* Convert from q7 to q15 and store result in destination buffer */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
in = read_q7x4_ia ((q7_t **) &pIn);
|
||||
|
||||
/* rotatate in by 8 and extend two q7_t values to q15_t values */
|
||||
in1 = __SXTB16(__ROR(in, 8));
|
||||
|
|
@ -97,49 +92,52 @@ void arm_q7_to_q15(
|
|||
in2 = in2 & 0xFF00FF00;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
out2 = __PKHTB(in1, in2, 16);
|
||||
out1 = __PKHBT(in2, in1, 16);
|
||||
#else
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHBT(in2, in1, 16);
|
||||
#endif
|
||||
|
||||
write_q15x2_ia (&pDst, out1);
|
||||
write_q15x2_ia (&pDst, out2);
|
||||
|
||||
#else
|
||||
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHBT(in2, in1, 16);
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
|
||||
#endif
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
*__SIMD32(pDst)++ = out1;
|
||||
*__SIMD32(pDst)++ = out2;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A << 8 */
|
||||
/* convert from q7 to q15 and then store the results in the destination buffer */
|
||||
|
||||
/* Convert from q7 to q15 and store result in destination buffer */
|
||||
*pDst++ = (q15_t) * pIn++ << 8;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of q7_to_x group
|
||||
@} end of q7_to_x group
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -3,13 +3,13 @@
|
|||
* Title: arm_q7_to_q31.c
|
||||
* Description: Converts the elements of the Q7 vector to Q31 vector
|
||||
*
|
||||
* $Date: 27. January 2017
|
||||
* $Revision: V.1.5.1
|
||||
* $Date: 18. March 2019
|
||||
* $Revision: V1.6.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||
* Copyright (C) 2010-2019 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
|
|
@ -29,56 +29,49 @@
|
|||
#include "arm_math.h"
|
||||
|
||||
/**
|
||||
* @ingroup groupSupport
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @addtogroup q7_to_x
|
||||
* @{
|
||||
@addtogroup q7_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the Q7 vector to Q31 vector.
|
||||
* @param[in] *pSrc points to the Q7 input vector
|
||||
* @param[out] *pDst points to the Q31 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
* \par Description:
|
||||
*
|
||||
* The equation used for the conversion process is:
|
||||
*
|
||||
* <pre>
|
||||
* pDst[n] = (q31_t) pSrc[n] << 24; 0 <= n < blockSize.
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@brief Converts the elements of the Q7 vector to Q31 vector.
|
||||
@param[in] pSrc points to the Q7 input vector
|
||||
@param[out] pDst points to the Q31 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q31_t) pSrc[n] << 24; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
void arm_q7_to_q31(
|
||||
q7_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
const q7_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
q7_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q7_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_DSP)
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
q31_t in;
|
||||
q31_t in;
|
||||
|
||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||
|
||||
/*loop Unrolling */
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 24 */
|
||||
/* convert from q7 to q31 and then store the results in the destination buffer */
|
||||
in = *__SIMD32(pIn)++;
|
||||
|
||||
/* Convert from q7 to q31 and store result in destination buffer */
|
||||
in = read_q7x4_ia ((q7_t **) &pIn);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
|
|
@ -94,37 +87,35 @@ void arm_q7_to_q31(
|
|||
*pDst++ = (__ROR(in, 16)) & 0xFF000000;
|
||||
*pDst++ = (__ROR(in, 8)) & 0xFF000000;
|
||||
|
||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Run the below code for Cortex-M0 */
|
||||
|
||||
/* Loop over blockSize number of values */
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 24 */
|
||||
/* convert from q7 to q31 and then store the results in the destination buffer */
|
||||
|
||||
/* Convert from q7 to q31 and store result in destination buffer */
|
||||
*pDst++ = (q31_t) * pIn++ << 24;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* @} end of q7_to_x group
|
||||
@} end of q7_to_x group
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue