diff --git a/ARM_math/arm_mat_inverse_f32.c b/ARM_math/arm_mat_inverse_f32.c new file mode 100644 index 0000000..9c0afe9 --- /dev/null +++ b/ARM_math/arm_mat_inverse_f32.c @@ -0,0 +1,299 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_inverse_f32.c + * Description: Floating-point matrix inverse + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "matrix_functions.h" +#include "matrix_utils.h" + + +/** + @ingroup groupMatrix + */ + +/** + @defgroup MatrixInv Matrix Inverse + + Computes the inverse of a matrix. + + The inverse is defined only if the input matrix is square and non-singular (the determinant is non-zero). + The function checks that the input and output matrices are square and of the same size. + + Matrix inversion is numerically sensitive and the CMSIS DSP library only supports matrix + inversion of floating-point matrices. + + @par Algorithm + The Gauss-Jordan method is used to find the inverse. + The algorithm performs a sequence of elementary row-operations until it + reduces the input matrix to an identity matrix. Applying the same sequence + of elementary row-operations to an identity matrix yields the inverse matrix. + If the input matrix is singular, then the algorithm terminates and returns error status + ARM_MATH_SINGULAR. + + @par Matrix Inverse of a 3 x 3 matrix using Gauss-Jordan Method + + \f[ + \begin{pmatrix} + a_{1,1} & a_{1,2} & a_{1,3} & | & 1 & 0 & 0\\ + a_{2,1} & a_{2,2} & a_{2,3} & | & 0 & 1 & 0\\ + a_{3,1} & a_{3,2} & a_{3,3} & | & 0 & 0 & 1\\ + \end{pmatrix} + \rightarrow + \begin{pmatrix} + 1 & 0 & 0 & | & x_{1,1} & x_{2,1} & x_{3,1} \\ + 0 & 1 & 0 & | & x_{1,2} & x_{2,2} & x_{3,2} \\ + 0 & 0 & 1 & | & x_{1,3} & x_{2,3} & x_{3,3} \\ + \end{pmatrix} + \f] + */ + +/** + @addtogroup MatrixInv + @{ + */ + +/** + @brief Floating-point matrix inverse. + @param[in] pSrc points to input matrix structure. The source matrix is modified by the function. + @param[out] pDst points to output matrix structure + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible) + */ +arm_status arm_mat_inverse_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn = pSrc->pData; /* input data matrix pointer */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + + float32_t *pTmp; + uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */ + uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ + + + float32_t pivot = 0.0f, newPivot=0.0f; /* Temporary input values */ + uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column; /* loop counters */ + arm_status status; /* status of matrix inverse */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pSrc->numCols) || + (pDst->numRows != pDst->numCols) || + (pSrc->numRows != pDst->numRows) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /*-------------------------------------------------------------------------------------------------------------- + * Matrix Inverse can be solved using elementary row operations. + * + * Gauss-Jordan Method: + * + * 1. First combine the identity matrix and the input matrix separated by a bar to form an + * augmented matrix as follows: + * _ _ _ _ + * | a11 a12 | 1 0 | | X11 X12 | + * | | | = | | + * |_ a21 a22 | 0 1 _| |_ X21 X21 _| + * + * 2. In our implementation, pDst Matrix is used as identity matrix. + * + * 3. Begin with the first row. Let i = 1. + * + * 4. Check to see if the pivot for row i is zero. + * The pivot is the element of the main diagonal that is on the current row. + * For instance, if working with row i, then the pivot element is aii. + * If the pivot is zero, exchange that row with a row below it that does not + * contain a zero in column i. If this is not possible, then an inverse + * to that matrix does not exist. + * + * 5. Divide every element of row i by the pivot. + * + * 6. For every row below and row i, replace that row with the sum of that row and + * a multiple of row i so that each new element in column i below row i is zero. + * + * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros + * for every element below and above the main diagonal. + * + * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc). + * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst). + *----------------------------------------------------------------------------------------------------------------*/ + + /* Working pointer for destination matrix */ + pTmp = pOut; + + /* Loop over the number of rows */ + rowCnt = numRows; + + /* Making the destination matrix as identity matrix */ + while (rowCnt > 0U) + { + /* Writing all zeroes in lower triangle of the destination matrix */ + j = numRows - rowCnt; + while (j > 0U) + { + *pTmp++ = 0.0f; + j--; + } + + /* Writing all ones in the diagonal of the destination matrix */ + *pTmp++ = 1.0f; + + /* Writing all zeroes in upper triangle of the destination matrix */ + j = rowCnt - 1U; + while (j > 0U) + { + *pTmp++ = 0.0f; + j--; + } + + /* Decrement loop counter */ + rowCnt--; + } + + /* Loop over the number of columns of the input matrix. + All the elements in each column are processed by the row operations */ + + /* Index modifier to navigate through the columns */ + for(column = 0U; column < numCols; column++) + { + /* Check if the pivot element is zero.. + * If it is zero then interchange the row with non zero row below. + * If there is no non zero element to replace in the rows below, + * then the matrix is Singular. */ + + pivotRow = column; + + /* Temporary variable to hold the pivot value */ + pTmp = ELEM(pSrc,column,column) ; + pivot = *pTmp; + selectedRow = column; + + /* Find maximum pivot in column */ + + /* Loop over the number rows present below */ + + for (rowNb = column+1; rowNb < numRows; rowNb++) + { + /* Update the input and destination pointers */ + pTmp = ELEM(pSrc,rowNb,column); + newPivot = *pTmp; + if (fabsf(newPivot) > fabsf(pivot)) + { + selectedRow = rowNb; + pivot = newPivot; + } + } + + /* Check if there is a non zero pivot element to + * replace in the rows below */ + if ((pivot != 0.0f) && (selectedRow != column)) + { + + SWAP_ROWS_F32(pSrc,column, pivotRow,selectedRow); + SWAP_ROWS_F32(pDst,0, pivotRow,selectedRow); + + + /* Flag to indicate whether exchange is done or not */ + flag = 1U; + } + + + + + + /* Update the status if the matrix is singular */ + if ((flag != 1U) && (pivot == 0.0f)) + { + return ARM_MATH_SINGULAR; + } + + + /* Pivot element of the row */ + pivot = 1.0f / pivot; + + SCALE_ROW_F32(pSrc,column,pivot,pivotRow); + SCALE_ROW_F32(pDst,0,pivot,pivotRow); + + + /* Replace the rows with the sum of that row and a multiple of row i + * so that each new element in column i above row i is zero.*/ + + rowNb = 0; + for (;rowNb < pivotRow; rowNb++) + { + pTmp = ELEM(pSrc,rowNb,column) ; + pivot = *pTmp; + + MAS_ROW_F32(column,pSrc,rowNb,pivot,pSrc,pivotRow); + MAS_ROW_F32(0 ,pDst,rowNb,pivot,pDst,pivotRow); + + + } + + for (rowNb = pivotRow + 1; rowNb < numRows; rowNb++) + { + pTmp = ELEM(pSrc,rowNb,column) ; + pivot = *pTmp; + + MAS_ROW_F32(column,pSrc,rowNb,pivot,pSrc,pivotRow); + MAS_ROW_F32(0 ,pDst,rowNb,pivot,pDst,pivotRow); + + } + + } + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + + if ((flag != 1U) && (pivot == 0.0f)) + { + pIn = pSrc->pData; + for (i = 0; i < numRows * numCols; i++) + { + if (pIn[i] != 0.0f) + break; + } + + if (i == numRows * numCols) + status = ARM_MATH_SINGULAR; + } + } + + /* Return to application */ + return (status); +} +/** + @} end of MatrixInv group + */ diff --git a/ARM_math/arm_mat_inverse_f64.c b/ARM_math/arm_mat_inverse_f64.c new file mode 100644 index 0000000..d55806a --- /dev/null +++ b/ARM_math/arm_mat_inverse_f64.c @@ -0,0 +1,263 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_inverse_f64.c + * Description: Floating-point matrix inverse + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "matrix_functions.h" +#include "matrix_utils.h" + +/** + @ingroup groupMatrix + */ + + +/** + @addtogroup MatrixInv + @{ + */ + +/** + @brief Floating-point (64 bit) matrix inverse. + @param[in] pSrc points to input matrix structure. The source matrix is modified by the function. + @param[out] pDst points to output matrix structure + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible) + */ + +arm_status arm_mat_inverse_f64( + const arm_matrix_instance_f64 * pSrc, + arm_matrix_instance_f64 * pDst) +{ + float64_t *pIn = pSrc->pData; /* input data matrix pointer */ + float64_t *pOut = pDst->pData; /* output data matrix pointer */ + + float64_t *pTmp; + uint32_t numRows = pSrc->numRows; /* Number of rows in the matrix */ + uint32_t numCols = pSrc->numCols; /* Number of Cols in the matrix */ + + + float64_t pivot = 0.0, newPivot=0.0; /* Temporary input values */ + uint32_t selectedRow,pivotRow,i, rowNb, rowCnt, flag = 0U, j,column; /* loop counters */ + arm_status status; /* status of matrix inverse */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pSrc->numCols) || + (pDst->numRows != pDst->numCols) || + (pSrc->numRows != pDst->numRows) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /*-------------------------------------------------------------------------------------------------------------- + * Matrix Inverse can be solved using elementary row operations. + * + * Gauss-Jordan Method: + * + * 1. First combine the identity matrix and the input matrix separated by a bar to form an + * augmented matrix as follows: + * _ _ _ _ + * | a11 a12 | 1 0 | | X11 X12 | + * | | | = | | + * |_ a21 a22 | 0 1 _| |_ X21 X21 _| + * + * 2. In our implementation, pDst Matrix is used as identity matrix. + * + * 3. Begin with the first row. Let i = 1. + * + * 4. Check to see if the pivot for row i is zero. + * The pivot is the element of the main diagonal that is on the current row. + * For instance, if working with row i, then the pivot element is aii. + * If the pivot is zero, exchange that row with a row below it that does not + * contain a zero in column i. If this is not possible, then an inverse + * to that matrix does not exist. + * + * 5. Divide every element of row i by the pivot. + * + * 6. For every row below and row i, replace that row with the sum of that row and + * a multiple of row i so that each new element in column i below row i is zero. + * + * 7. Move to the next row and column and repeat steps 2 through 5 until you have zeros + * for every element below and above the main diagonal. + * + * 8. Now an identical matrix is formed to the left of the bar(input matrix, pSrc). + * Therefore, the matrix to the right of the bar is our solution(pDst matrix, pDst). + *----------------------------------------------------------------------------------------------------------------*/ + + /* Working pointer for destination matrix */ + pTmp = pOut; + + /* Loop over the number of rows */ + rowCnt = numRows; + + /* Making the destination matrix as identity matrix */ + while (rowCnt > 0U) + { + /* Writing all zeroes in lower triangle of the destination matrix */ + j = numRows - rowCnt; + while (j > 0U) + { + *pTmp++ = 0.0; + j--; + } + + /* Writing all ones in the diagonal of the destination matrix */ + *pTmp++ = 1.0; + + /* Writing all zeroes in upper triangle of the destination matrix */ + j = rowCnt - 1U; + while (j > 0U) + { + *pTmp++ = 0.0; + j--; + } + + /* Decrement loop counter */ + rowCnt--; + } + + /* Loop over the number of columns of the input matrix. + All the elements in each column are processed by the row operations */ + + /* Index modifier to navigate through the columns */ + for(column = 0U; column < numCols; column++) + { + /* Check if the pivot element is zero.. + * If it is zero then interchange the row with non zero row below. + * If there is no non zero element to replace in the rows below, + * then the matrix is Singular. */ + + pivotRow = column; + + /* Temporary variable to hold the pivot value */ + pTmp = ELEM(pSrc,column,column) ; + pivot = *pTmp; + selectedRow = column; + + + /* Loop over the number rows present below */ + + for (rowNb = column+1; rowNb < numRows; rowNb++) + { + /* Update the input and destination pointers */ + pTmp = ELEM(pSrc,rowNb,column); + newPivot = *pTmp; + if (fabs(newPivot) > fabs(pivot)) + { + selectedRow = rowNb; + pivot = newPivot; + } + } + + /* Check if there is a non zero pivot element to + * replace in the rows below */ + if ((pivot != 0.0) && (selectedRow != column)) + { + /* Loop over number of columns + * to the right of the pilot element */ + + SWAP_ROWS_F64(pSrc,column, pivotRow,selectedRow); + SWAP_ROWS_F64(pDst,0, pivotRow,selectedRow); + + + /* Flag to indicate whether exchange is done or not */ + flag = 1U; + + } + + + /* Update the status if the matrix is singular */ + if ((flag != 1U) && (pivot == 0.0)) + { + return ARM_MATH_SINGULAR; + } + + + /* Pivot element of the row */ + pivot = 1.0 / pivot; + + SCALE_ROW_F64(pSrc,column,pivot,pivotRow); + SCALE_ROW_F64(pDst,0,pivot,pivotRow); + + + /* Replace the rows with the sum of that row and a multiple of row i + * so that each new element in column i above row i is zero.*/ + + rowNb = 0; + for (;rowNb < pivotRow; rowNb++) + { + pTmp = ELEM(pSrc,rowNb,column) ; + pivot = *pTmp; + + MAS_ROW_F64(column,pSrc,rowNb,pivot,pSrc,pivotRow); + MAS_ROW_F64(0 ,pDst,rowNb,pivot,pDst,pivotRow); + + + } + + for (rowNb = pivotRow + 1; rowNb < numRows; rowNb++) + { + pTmp = ELEM(pSrc,rowNb,column) ; + pivot = *pTmp; + + MAS_ROW_F64(column,pSrc,rowNb,pivot,pSrc,pivotRow); + MAS_ROW_F64(0 ,pDst,rowNb,pivot,pDst,pivotRow); + + } + + } + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + + if ((flag != 1U) && (pivot == 0.0)) + { + pIn = pSrc->pData; + for (i = 0; i < numRows * numCols; i++) + { + if (pIn[i] != 0.0) + break; + } + + if (i == numRows * numCols) + status = ARM_MATH_SINGULAR; + } + } + + /* Return to application */ + return (status); +} +/** + @} end of MatrixInv group + */ diff --git a/ARM_math/arm_mat_mult_f32.c b/ARM_math/arm_mat_mult_f32.c new file mode 100644 index 0000000..978e8be --- /dev/null +++ b/ARM_math/arm_mat_mult_f32.c @@ -0,0 +1,1021 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_mult_f32.c + * Description: Floating-point matrix multiplication + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "matrix_functions.h" + +#if defined(ARM_MATH_NEON) +#define GROUPOFROWS 8 +#endif + +/** + * @ingroup groupMatrix + */ + +/** + * @defgroup MatrixMult Matrix Multiplication + * + * Multiplies two matrices. + * + * @par Multiplication of two 3x3 matrices: + * + * \f[ + * \begin{pmatrix} + * a_{1,1} & a_{1,2} & a_{1,3} \\ + * a_{2,1} & a_{2,2} & a_{2,3} \\ + * a_{3,1} & a_{3,2} & a_{3,3} \\ + * \end{pmatrix} + * + * \begin{pmatrix} + * b_{1,1} & b_{1,2} & b_{1,3} \\ + * b_{2,1} & b_{2,2} & b_{2,3} \\ + * b_{3,1} & b_{3,2} & b_{3,3} \\ + * \end{pmatrix} + * = + * \begin{pmatrix} + * a_{1,1} b_{1,1}+a_{1,2} b_{2,1}+a_{1,3} b_{3,1} & a_{1,1} b_{1,2}+a_{1,2} b_{2,2}+a_{1,3} b_{3,2} & a_{1,1} b_{1,3}+a_{1,2} b_{2,3}+a_{1,3} b_{3,3} \\ + * a_{2,1} b_{1,1}+a_{2,2} b_{2,1}+a_{2,3} b_{3,1} & a_{2,1} b_{1,2}+a_{2,2} b_{2,2}+a_{2,3} b_{3,2} & a_{2,1} b_{1,3}+a_{2,2} b_{2,3}+a_{2,3} b_{3,3} \\ + * a_{3,1} b_{1,1}+a_{3,2} b_{2,1}+a_{3,3} b_{3,1} & a_{3,1} b_{1,2}+a_{3,2} b_{2,2}+a_{3,3} b_{3,2} & a_{3,1} b_{1,3}+a_{3,2} b_{2,3}+a_{3,3} b_{3,3} \\ + * \end{pmatrix} + * \f] + + * Matrix multiplication is only defined if the number of columns of the + * first matrix equals the number of rows of the second matrix. + * Multiplying an M x N matrix with an N x P matrix results + * in an M x P matrix. + * When matrix size checking is enabled, the functions check: (1) that the inner dimensions of + * pSrcA and pSrcB are equal; and (2) that the size of the output + * matrix equals the outer dimensions of pSrcA and pSrcB. + */ + + +/** + * @addtogroup MatrixMult + * @{ + */ + + + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#define MATRIX_DIM3 3 +#define MATRIX_DIM4 4 + +__STATIC_INLINE arm_status arm_mat_mult_f32_2x2_mve( + const arm_matrix_instance_f32 *pSrcA, + const arm_matrix_instance_f32 *pSrcB, + arm_matrix_instance_f32 *pDst) +{ + /* {a00, a00, a10, a10} */ + static const uint32_t offsetA0[4] = { 0, 0, 2, 2 }; + /* {b00, b01, b00, b01} */ + static const uint32_t offsetB0[4] = { 0, 1, 0, 1 }; + /* {a01, a01, a11, a11} */ + static const uint32_t offsetA1[4] = { 1, 1, 3, 3 }; + /* {b10, b11, b10, b11} */ + static const uint32_t offsetB1[4] = { 2, 3, 2, 3 }; + + uint32x4_t vecOffsA, vecOffsB; + f32x4_t vecInA, vecInB, vecDst; + + vecOffsA = vldrwq_u32((uint32_t const *) offsetA0); + vecOffsB = vldrwq_u32((uint32_t const *) offsetB0); + + vecInA = vldrwq_gather_shifted_offset((float32_t const *) pSrcA->pData, vecOffsA); + vecInB = vldrwq_gather_shifted_offset((float32_t const *) pSrcB->pData, vecOffsB); + + vecDst = vmulq(vecInA, vecInB); + + vecOffsA = vldrwq_u32((uint32_t const *) offsetA1); + vecOffsB = vldrwq_u32((uint32_t const *) offsetB1); + + vecInA = vldrwq_gather_shifted_offset((float32_t const *) pSrcA->pData, vecOffsA); + vecInB = vldrwq_gather_shifted_offset((float32_t const *) pSrcB->pData, vecOffsB); + + vecDst = vfmaq(vecDst, vecInA, vecInB); + + vstrwq_f32(pDst->pData, vecDst); + + return (ARM_MATH_SUCCESS); + +} + + +/* + * A = {{a00, a01, a02}, + * {a10, a11, a12}, + * {a20, a21, a22}} + * B = {{b00, b01, b02}, + * {b10, b11, b12}, + * {b20, b21, b22}} + * + * Dst = {{a00 b00 + a01 b10 + a02 b20, a00 b01 + a01 b11 + a02 b21, a00 b02 + a01 b12 + a02 b22}, + * {a10 b00 + a11 b10 + a12 b20, a10 b01 + a11 b11 + a12 b21, a10 b02 + a11 b12 + a12 b22}, + * {a20 b00 + a21 b10 + a22 b20, a20 b01 + a21 b11 + a22 b21, a20 b02 + a21 b12 + a22 b22}} + */ +__STATIC_INLINE arm_status arm_mat_mult_f32_3x3_mve( + const arm_matrix_instance_f32 *pSrcA, + const arm_matrix_instance_f32 *pSrcB, + arm_matrix_instance_f32 *pDst) +{ + float32_t *pInB = pSrcB->pData; /* input data matrix pointer B */ + float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *pInA0, *pInA1, *pInA2; + f32x4_t vecMac0, vecMac1, vecMac2; + f32x4_t vecInB; + float32_t const *pSrBVec; + + pSrBVec = (float32_t const *) pInB; + + pInA0 = pInA; + pInA1 = pInA0 + MATRIX_DIM3; + pInA2 = pInA1 + MATRIX_DIM3; + /* enable predication to disable last (4th) vector element */ + mve_pred16_t p0 = vctp32q(MATRIX_DIM3); + + /* + * load {b0,0, b0,1, b0,2, 0} + */ + vecInB = vldrwq_z_f32(pSrBVec, p0); + pSrBVec += MATRIX_DIM3; + + vecMac0 = vmulq(vecInB, *pInA0++); + vecMac1 = vmulq(vecInB, *pInA1++); + vecMac2 = vmulq(vecInB, *pInA2++); + /* + * load {b1,0, b1,1, b1,2, 0} + */ + vecInB = vldrwq_z_f32(pSrBVec, p0); + pSrBVec += MATRIX_DIM3; + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + /* + * load {b2,0, b2,1 , b2,2, 0} + */ + vecInB = vldrwq_z_f32(pSrBVec, p0); + pSrBVec += MATRIX_DIM3; + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + + /* partial vector stores */ + vstrwq_p_f32(pOut, vecMac0, p0); + pOut += MATRIX_DIM3; + vstrwq_p_f32(pOut, vecMac1, p0); + pOut += MATRIX_DIM3; + vstrwq_p_f32(pOut, vecMac2, p0); + /* + * Return to application + */ + return (ARM_MATH_SUCCESS); +} + + + + +__STATIC_INLINE arm_status arm_mat_mult_f32_4x4_mve( + const arm_matrix_instance_f32 *pSrcA, + const arm_matrix_instance_f32 *pSrcB, + arm_matrix_instance_f32 *pDst) +{ + float32_t const *pSrBVec; + float32_t *pInB = pSrcB->pData; /* input data matrix pointer B */ + float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *pInA0, *pInA1, *pInA2, *pInA3; + f32x4_t vecMac0, vecMac1, vecMac2, vecMac3; + f32x4_t vecInB; + + pSrBVec = (float32_t const *) pInB; + + pInA0 = pInA; + pInA1 = pInA0 + MATRIX_DIM4; + pInA2 = pInA1 + MATRIX_DIM4; + pInA3 = pInA2 + MATRIX_DIM4; + /* + * load {b0,0, b0,1, b0,2, b0,3} + */ + vecInB = vld1q(pSrBVec); + pSrBVec += MATRIX_DIM4; + + vecMac0 = vmulq(vecInB, *pInA0++); + vecMac1 = vmulq(vecInB, *pInA1++); + vecMac2 = vmulq(vecInB, *pInA2++); + vecMac3 = vmulq(vecInB, *pInA3++); + /* + * load {b1,0, b1,1, b1,2, b1,3} + */ + vecInB = vld1q(pSrBVec); + pSrBVec += MATRIX_DIM4; + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + vecMac3 = vfmaq(vecMac3, vecInB, *pInA3++); + /* + * load {b2,0, b2,1, b2,2, b2,3} + */ + vecInB = vld1q(pSrBVec); + pSrBVec += MATRIX_DIM4; + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + vecMac3 = vfmaq(vecMac3, vecInB, *pInA3++); + /* + * load {b3,0, b3,1, b3,2, b3,3} + */ + vecInB = vld1q(pSrBVec); + pSrBVec += MATRIX_DIM4; + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + vecMac3 = vfmaq(vecMac3, vecInB, *pInA3++); + + vst1q(pOut, vecMac0); + pOut += MATRIX_DIM4; + vst1q(pOut, vecMac1); + pOut += MATRIX_DIM4; + vst1q(pOut, vecMac2); + pOut += MATRIX_DIM4; + vst1q(pOut, vecMac3); + /* + * Return to application + */ + return (ARM_MATH_SUCCESS); +} + + +/** + * @brief Floating-point matrix multiplication. + * @param[in] *pSrcA points to the first input matrix structure + * @param[in] *pSrcB points to the second input matrix structure + * @param[out] *pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pInB = pSrcB->pData; /* input data matrix pointer B */ + float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + int numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ + int numColsB = pSrcB->numCols; /* number of columns of input matrix B */ + int numColsA = pSrcA->numCols; /* number of columns of input matrix A */ + uint32_t blkCnt; /* loop counters */ + uint32_t i; + arm_status status; + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrcA->numCols != pSrcB->numRows) || + (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + { + /* small squared matrix specialized routines */ + if(numRowsA == numColsB && numColsB == numColsA) { + if (numRowsA == 1) + { + pOut[0] = pInA[0] * pInB[0]; + return(ARM_MATH_SUCCESS); + } + else if(numRowsA == 2) + return arm_mat_mult_f32_2x2_mve(pSrcA, pSrcB, pDst); + else if(numRowsA == 3) + return arm_mat_mult_f32_3x3_mve(pSrcA, pSrcB, pDst); + else if(numRowsA == 4) + return arm_mat_mult_f32_4x4_mve(pSrcA, pSrcB, pDst); + } + + /* main loop process 4 rows */ + i = numRowsA >> 2; + while (i > 0U) + { + float32_t *pInA0, *pInA1, *pInA2, *pInA3; + float32_t *pInB0; + float32_t *pOut0, *pOut1, *pOut2, *pOut3; + f32x4_t vecMac0, vecMac1, vecMac2, vecMac3; + f32x4_t vecInB; + + /* pointers to 4 consecutive output rows */ + pOut0 = pOut; + pOut1 = pOut0 + numColsB; + pOut2 = pOut1 + numColsB; + pOut3 = pOut2 + numColsB; + pInB0 = pInB; + + uint32_t k = numColsB >> 2; + while (k > 0U) + { + /* pointers to 4 consecutive Matrix A rows */ + pInA0 = pInA; + pInA1 = pInA0 + numColsA; + pInA2 = pInA1 + numColsA; + pInA3 = pInA2 + numColsA; + + vecMac0 = vdupq_n_f32(0.0f); + vecMac1 = vdupq_n_f32(0.0f); + vecMac2 = vdupq_n_f32(0.0f); + vecMac3 = vdupq_n_f32(0.0f); + + blkCnt = numColsA; + + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = *(f32x4_t *)pInB0; /* vldrwq_f32(pInB0, 0); */ + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + vecMac3 = vfmaq(vecMac3, vecInB, *pInA3++); + + pInB0 = pInB0 + numColsB; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (4 x 4 block) in the destination buffer */ + vst1q(pOut0, vecMac0); + pOut0 += 4; + vst1q(pOut1, vecMac1); + pOut1 += 4; + vst1q(pOut2, vecMac2); + pOut2 += 4; + vst1q(pOut3, vecMac3); + pOut3 += 4; + + /* + * rewind + */ + pInB0 -= (numColsB * numColsA) - 4; + k--; + } + + int colBLeft = numColsB & 3; + if (colBLeft) + { + pInA0 = pInA; + pInA1 = pInA0 + numColsA; + pInA2 = pInA1 + numColsA; + pInA3 = pInA2 + numColsA; + mve_pred16_t p0 = vctp32q(colBLeft); + + vecMac0 = vdupq_n_f32(0.0f); + vecMac1 = vdupq_n_f32(0.0f); + vecMac2 = vdupq_n_f32(0.0f); + vecMac3 = vdupq_n_f32(0.0f); + + blkCnt = numColsA; + + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = vldrwq_z_f32(pInB0, p0); + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + vecMac1 = vfmaq(vecMac1, vecInB, *pInA1++); + vecMac2 = vfmaq(vecMac2, vecInB, *pInA2++); + vecMac3 = vfmaq(vecMac3, vecInB, *pInA3++); + + pInB0 = pInB0 + numColsB; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (4 x colBLeft block) in the destination buffer */ + vstrwq_p_f32(pOut0, vecMac0, p0); + vstrwq_p_f32(pOut1, vecMac1, p0); + vstrwq_p_f32(pOut2, vecMac2, p0); + vstrwq_p_f32(pOut3, vecMac3, p0); + } + + /* move to next rows */ + pInA += 4 * numColsA; + pOut += 4 * numColsB; + i--; + } + + /* + * non multiple of 4 rows for Matrix A + * process single row + */ + if (numRowsA & 3) + { + i = numRowsA & 3; + while (i > 0U) + { + float32_t *pInA0; + float32_t *pInB0; + float32_t *pOut0; + f32x4_t vecInB; + f32x4_t vecMac0; + + pOut0 = pOut; + pInB0 = pInB; + + uint32_t k = numColsB >> 2; + while (k > 0U) + { + pInA0 = pInA; + + vecMac0 = vdupq_n_f32(0.0f); + blkCnt = numColsA; + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = *(f32x4_t *)pInB0; /* vldrwq_f32(pInB0, 0); */ + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + + pInB0 = pInB0 + numColsB; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + + /* Store the results (1 x 4 block) in the destination buffer */ + vst1q(pOut0, vecMac0); + pOut0 += 4; + + /* + * rewind + */ + pInB0 -= (numColsB * numColsA) - 4; + k--; + } + + int colBLeft = numColsB & 3; + if (colBLeft) + { + pInA0 = pInA; + mve_pred16_t p0 = vctp32q(colBLeft); + + vecMac0 = vdupq_n_f32(0.0f); + blkCnt = numColsA; + while (blkCnt > 0U) + { + /* + * load {bi,4n+0, bi,4n+1, bi,4n+2, bi,4n+3} + */ + vecInB = vldrwq_z_f32(pInB0, p0); + + vecMac0 = vfmaq(vecMac0, vecInB, *pInA0++); + + pInB0 = pInB0 + numColsB; + /* + * Decrement the blockSize loop counter + */ + blkCnt--; + } + /* Store the results (1 x colBLeft block) in the destination buffer */ + vstrwq_p_f32(pOut0, vecMac0, p0); + } + + /* move to next row */ + pInA += 1 * numColsA; + pOut += 1 * numColsB; + i--; + } + + } + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} +#else + +#if defined(ARM_MATH_NEON) +/** + * @brief Floating-point matrix multiplication. + * @param[in] *pSrcA points to the first input matrix structure + * @param[in] *pSrcB points to the second input matrix structure + * @param[out] *pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn1 = pSrcA->pData; /* input data matrix pointer A */ + float32_t *pIn2 = pSrcB->pData; /* input data matrix pointer B */ + float32_t *pInA = pSrcA->pData; /* input data matrix pointer A */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *px; /* Temporary output data matrix pointer */ + float32_t sum; /* Accumulator */ + uint16_t numRowsA = pSrcA->numRows; /* number of rows of input matrix A */ + uint16_t numColsB = pSrcB->numCols; /* number of columns of input matrix B */ + uint16_t numColsA = pSrcA->numCols; /* number of columns of input matrix A */ + + + uint16_t col, i = 0U, j, row = numRowsA, rowCnt, colCnt; /* loop counters */ + arm_status status; /* status of matrix multiplication */ + + float32x4_t a0V, a1V, a2V, a3V, a4V, a5V, a6V, a7V; + float32x4_t acc0,acc1,acc2,acc3,acc4,acc5,acc6,acc7,temp; + float32x2_t accum = vdup_n_f32(0); + float32_t *pIn1B = pSrcA->pData; + float32_t *pIn1C = pSrcA->pData; + float32_t *pIn1D = pSrcA->pData; + float32_t *pIn1E = pSrcA->pData; + float32_t *pIn1F = pSrcA->pData; + float32_t *pIn1G = pSrcA->pData; + float32_t *pIn1H = pSrcA->pData; + + float32_t *pxB,*pxC, *pxD, *pxE, *pxF, *pxG, *pxH; /* Temporary output data matrix pointer */ + float32_t sum0,sum1, sum2,sum3, sum4, sum5 , sum6, sum7; + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrcA->numCols != pSrcB->numRows) || + (pSrcA->numRows != pDst->numRows) || (pSrcB->numCols != pDst->numCols)) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + { + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* Row loop */ + rowCnt = row >> 3; + + while(rowCnt > 0) + { + /* Output pointer is set to starting address of the row being processed */ + px = pOut + GROUPOFROWS*i; + pxB = px + numColsB; + pxC = px + 2*numColsB; + pxD = px + 3*numColsB; + pxE = px + 4*numColsB; + pxF = px + 5*numColsB; + pxG = px + 6*numColsB; + pxH = px + 7*numColsB; + + /* For every row wise process, the column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, the pIn2 pointer is set + ** to the starting address of the pSrcB data */ + pIn2 = pSrcB->pData; + + j = 0U; + + /* Column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum0 = 0.0f; + sum1 = 0.0f; + sum2 = 0.0f; + sum3 = 0.0f; + sum4 = 0.0f; + sum5 = 0.0f; + sum6 = 0.0f; + sum7 = 0.0f; + + /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ + pIn1 = pInA; + pIn1B = pIn1 + numColsA; + pIn1C = pIn1 + 2*numColsA; + pIn1D = pIn1 + 3*numColsA; + pIn1E = pIn1 + 4*numColsA; + pIn1F = pIn1 + 5*numColsA; + pIn1G = pIn1 + 6*numColsA; + pIn1H = pIn1 + 7*numColsA; + + acc0 = vdupq_n_f32(0.0); + acc1 = vdupq_n_f32(0.0); + acc2 = vdupq_n_f32(0.0); + acc3 = vdupq_n_f32(0.0); + acc4 = vdupq_n_f32(0.0); + acc5 = vdupq_n_f32(0.0); + acc6 = vdupq_n_f32(0.0); + acc7 = vdupq_n_f32(0.0); + + /* Compute 4 MACs simultaneously. */ + colCnt = numColsA >> 2U; + + /* Matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1)*b(1,1) + a(1,2)*b(2,1) + ... + a(m,p)*b(p,n) */ + a0V = vld1q_f32(pIn1); + a1V = vld1q_f32(pIn1B); + a2V = vld1q_f32(pIn1C); + a3V = vld1q_f32(pIn1D); + a4V = vld1q_f32(pIn1E); + a5V = vld1q_f32(pIn1F); + a6V = vld1q_f32(pIn1G); + a7V = vld1q_f32(pIn1H); + + pIn1 += 4; + pIn1B += 4; + pIn1C += 4; + pIn1D += 4; + pIn1E += 4; + pIn1F += 4; + pIn1G += 4; + pIn1H += 4; + + temp = vsetq_lane_f32(*pIn2,temp,0); + pIn2 += numColsB; + temp = vsetq_lane_f32(*pIn2,temp,1); + pIn2 += numColsB; + temp = vsetq_lane_f32(*pIn2,temp,2); + pIn2 += numColsB; + temp = vsetq_lane_f32(*pIn2,temp,3); + pIn2 += numColsB; + + acc0 = vmlaq_f32(acc0,a0V,temp); + acc1 = vmlaq_f32(acc1,a1V,temp); + acc2 = vmlaq_f32(acc2,a2V,temp); + acc3 = vmlaq_f32(acc3,a3V,temp); + acc4 = vmlaq_f32(acc4,a4V,temp); + acc5 = vmlaq_f32(acc5,a5V,temp); + acc6 = vmlaq_f32(acc6,a6V,temp); + acc7 = vmlaq_f32(acc7,a7V,temp); + + /* Decrement the loop count */ + colCnt--; + } + + accum = vpadd_f32(vget_low_f32(acc0), vget_high_f32(acc0)); + sum0 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc1), vget_high_f32(acc1)); + sum1 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc2), vget_high_f32(acc2)); + sum2 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc3), vget_high_f32(acc3)); + sum3 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc4), vget_high_f32(acc4)); + sum4 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc5), vget_high_f32(acc5)); + sum5 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc6), vget_high_f32(acc6)); + sum6 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + accum = vpadd_f32(vget_low_f32(acc7), vget_high_f32(acc7)); + sum7 += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. + ** No loop unrolling is used. */ + colCnt = numColsA & 3; + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1)*b(1,1) + a(1,2)*b(2,1) + ... + a(m,p)*b(p,n) */ + sum0 += *pIn1++ * (*pIn2); + sum1 += *pIn1B++ * (*pIn2); + sum2 += *pIn1C++ * (*pIn2); + sum3 += *pIn1D++ * (*pIn2); + sum4 += *pIn1E++ * (*pIn2); + sum5 += *pIn1F++ * (*pIn2); + sum6 += *pIn1G++ * (*pIn2); + sum7 += *pIn1H++ * (*pIn2); + pIn2 += numColsB; + + /* Decrement the loop counter */ + colCnt--; + } + + /* Store the result in the destination buffer */ + *px++ = sum0; + *pxB++ = sum1; + *pxC++ = sum2; + *pxD++ = sum3; + *pxE++ = sum4; + *pxF++ = sum5; + *pxG++ = sum6; + *pxH++ = sum7; + + /* Update the pointer pIn2 to point to the starting address of the next column */ + j++; + pIn2 = pSrcB->pData + j; + + /* Decrement the column loop counter */ + col--; + + } while (col > 0U); + + /* Update the pointer pInA to point to the starting address of the next row */ + i = i + numColsB; + pInA = pInA + GROUPOFROWS*numColsA; + + /* Decrement the row loop counter */ + rowCnt--; + } + + /* + + i was the index of a group of rows computed by previous loop. + Now i is the index of a row since below code is computing row per row + and no more group of row per group of rows. + + */ + + i = GROUPOFROWS*i; + rowCnt = row & 7; + + while(rowCnt > 0) + { + /* Output pointer is set to starting address of the row being processed */ + px = pOut + i; + + /* For every row wise process, the column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, the pIn2 pointer is set + ** to the starting address of the pSrcB data */ + pIn2 = pSrcB->pData; + + j = 0U; + + /* Column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = 0.0f; + + /* Initiate the pointer pIn1 to point to the starting address of the column being processed */ + pIn1 = pInA; + + acc0 = vdupq_n_f32(0.0); + + /* Compute 4 MACs simultaneously. */ + colCnt = numColsA >> 2U; + + /* Matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1)*b(1,1) + a(1,2)*b(2,1) + ... + a(m,p)*b(p,n) */ + a0V = vld1q_f32(pIn1); // load & separate real/imag pSrcA (de-interleave 2) + pIn1 += 4; + + temp = vsetq_lane_f32(*pIn2,temp,0); + pIn2 += numColsB; + temp = vsetq_lane_f32(*pIn2,temp,1); + pIn2 += numColsB; + temp = vsetq_lane_f32(*pIn2,temp,2); + pIn2 += numColsB; + temp = vsetq_lane_f32(*pIn2,temp,3); + pIn2 += numColsB; + + acc0 = vmlaq_f32(acc0,a0V,temp); + + /* Decrement the loop count */ + colCnt--; + } + + accum = vpadd_f32(vget_low_f32(acc0), vget_high_f32(acc0)); + sum += vget_lane_f32(accum, 0) + vget_lane_f32(accum, 1); + + /* If the columns of pSrcA is not a multiple of 4, compute any remaining MACs here. + ** No loop unrolling is used. */ + colCnt = numColsA % 0x4U; + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1)*b(1,1) + a(1,2)*b(2,1) + ... + a(m,p)*b(p,n) */ + sum += *pIn1++ * (*pIn2); + pIn2 += numColsB; + + /* Decrement the loop counter */ + colCnt--; + } + + /* Store the result in the destination buffer */ + *px++ = sum; + + /* Update the pointer pIn2 to point to the starting address of the next column */ + j++; + pIn2 = pSrcB->pData + j; + + /* Decrement the column loop counter */ + col--; + + } while (col > 0U); + + + /* Update the pointer pInA to point to the starting address of the next row */ + i = i + numColsB; + pInA = pInA + numColsA; + + /* Decrement the row loop counter */ + rowCnt--; + + } + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} +#else +/** + * @brief Floating-point matrix multiplication. + * @param[in] *pSrcA points to the first input matrix structure + * @param[in] *pSrcB points to the second input matrix structure + * @param[out] *pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn1 = pSrcA->pData; /* Input data matrix pointer A */ + float32_t *pIn2 = pSrcB->pData; /* Input data matrix pointer B */ + float32_t *pInA = pSrcA->pData; /* Input data matrix pointer A */ + float32_t *pInB = pSrcB->pData; /* Input data matrix pointer B */ + float32_t *pOut = pDst->pData; /* Output data matrix pointer */ + float32_t *px; /* Temporary output data matrix pointer */ + float32_t sum; /* Accumulator */ + uint16_t numRowsA = pSrcA->numRows; /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB->numCols; /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA->numCols; /* Number of columns of input matrix A */ + uint32_t col, i = 0U, row = numRowsA, colCnt; /* Loop counters */ + arm_status status; /* Status of matrix multiplication */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrcA->numCols != pSrcB->numRows) || + (pSrcA->numRows != pDst->numRows) || + (pSrcB->numCols != pDst->numCols) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* Output pointer is set to starting address of row being processed */ + px = pOut + i; + + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */ + pIn2 = pSrcB->pData; + + /* column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = 0.0f; + + /* Initialize pointer pIn1 to point to starting address of column being processed */ + pIn1 = pInA; + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 MACs at a time. */ + colCnt = numColsA >> 2U; + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,p) = a(m,1) * b(1,p) + a(m,2) * b(2,p) + .... + a(m,n) * b(n,p) */ + + /* Perform the multiply-accumulates */ + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + /* Decrement loop counter */ + colCnt--; + } + + /* Loop unrolling: Compute remaining MACs */ + colCnt = numColsA % 0x4U; + +#else + + /* Initialize cntCnt with number of columns */ + colCnt = numColsA; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (colCnt > 0U) + { + /* c(m,p) = a(m,1) * b(1,p) + a(m,2) * b(2,p) + .... + a(m,n) * b(n,p) */ + + /* Perform the multiply-accumulates */ + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + /* Decrement loop counter */ + colCnt--; + } + + /* Store result in destination buffer */ + *px++ = sum; + + /* Decrement column loop counter */ + col--; + + /* Update pointer pIn2 to point to starting address of next column */ + pIn2 = pInB + (numColsB - col); + + } while (col > 0U); + + /* Update pointer pInA to point to starting address of next row */ + i = i + numColsB; + pInA = pInA + numColsA; + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} + +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of MatrixMult group + */ diff --git a/ARM_math/arm_mat_mult_f64.c b/ARM_math/arm_mat_mult_f64.c new file mode 100644 index 0000000..f7b3110 --- /dev/null +++ b/ARM_math/arm_mat_mult_f64.c @@ -0,0 +1,185 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_mult_f64.c + * Description: Floating-point matrix multiplication + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "matrix_functions.h" + +/** + * @ingroup groupMatrix + */ + +/** + * @addtogroup MatrixMult + * @{ + */ + +/** + * @brief Floating-point matrix multiplication. + * @param[in] *pSrcA points to the first input matrix structure + * @param[in] *pSrcB points to the second input matrix structure + * @param[out] *pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ + + +arm_status arm_mat_mult_f64( + const arm_matrix_instance_f64 * pSrcA, + const arm_matrix_instance_f64 * pSrcB, + arm_matrix_instance_f64 * pDst) +{ + float64_t *pIn1 = pSrcA->pData; /* Input data matrix pointer A */ + float64_t *pIn2 = pSrcB->pData; /* Input data matrix pointer B */ + float64_t *pInA = pSrcA->pData; /* Input data matrix pointer A */ + float64_t *pInB = pSrcB->pData; /* Input data matrix pointer B */ + float64_t *pOut = pDst->pData; /* Output data matrix pointer */ + float64_t *px; /* Temporary output data matrix pointer */ + float64_t sum; /* Accumulator */ + uint16_t numRowsA = pSrcA->numRows; /* Number of rows of input matrix A */ + uint16_t numColsB = pSrcB->numCols; /* Number of columns of input matrix B */ + uint16_t numColsA = pSrcA->numCols; /* Number of columns of input matrix A */ + uint64_t col, i = 0U, row = numRowsA, colCnt; /* Loop counters */ + arm_status status; /* Status of matrix multiplication */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrcA->numCols != pSrcB->numRows) || + (pSrcA->numRows != pDst->numRows) || + (pSrcB->numCols != pDst->numCols) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /* The following loop performs the dot-product of each row in pSrcA with each column in pSrcB */ + /* row loop */ + do + { + /* Output pointer is set to starting address of row being processed */ + px = pOut + i; + + /* For every row wise process, column loop counter is to be initiated */ + col = numColsB; + + /* For every row wise process, pIn2 pointer is set to starting address of pSrcB data */ + pIn2 = pSrcB->pData; + + /* column loop */ + do + { + /* Set the variable sum, that acts as accumulator, to zero */ + sum = 0.0; + + /* Initialize pointer pIn1 to point to starting address of column being processed */ + pIn1 = pInA; + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 MACs at a time. */ + colCnt = numColsA >> 2U; + + /* matrix multiplication */ + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* Perform the multiply-accumulates */ + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + /* Decrement loop counter */ + colCnt--; + } + + /* Loop unrolling: Compute remaining MACs */ + colCnt = numColsA % 0x4U; + +#else + + /* Initialize cntCnt with number of columns */ + colCnt = numColsA; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (colCnt > 0U) + { + /* c(m,n) = a(1,1) * b(1,1) + a(1,2) * b(2,1) + .... + a(m,p) * b(p,n) */ + + /* Perform the multiply-accumulates */ + sum += *pIn1++ * *pIn2; + pIn2 += numColsB; + + /* Decrement loop counter */ + colCnt--; + } + + /* Store result in destination buffer */ + *px++ = sum; + + /* Decrement column loop counter */ + col--; + + /* Update pointer pIn2 to point to starting address of next column */ + pIn2 = pInB + (numColsB - col); + + } while (col > 0U); + + /* Update pointer pInA to point to starting address of next row */ + i = i + numColsB; + pInA = pInA + numColsA; + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} + + +/** + * @} end of MatrixMult group + */ diff --git a/ARM_math/arm_mat_trans_f32.c b/ARM_math/arm_mat_trans_f32.c new file mode 100644 index 0000000..27868d2 --- /dev/null +++ b/ARM_math/arm_mat_trans_f32.c @@ -0,0 +1,341 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_trans_f32.c + * Description: Floating-point matrix transpose + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "matrix_functions.h" + +/** + @ingroup groupMatrix + */ + +/** + @defgroup MatrixTrans Matrix Transpose + + Tranposes a matrix. + + Transposing an M x N matrix flips it around the center diagonal and results in an N x M matrix. + + @par Transpose of a 3 x 3 matrix + + \f[ + \begin{pmatrix} + a_{1,1} & a_{1,2} & a_{1,3} \\ + a_{2,1} & a_{2,2} & a_{2,3} \\ + a_{3,1} & a_{3,2} & a_{3,3} \\ + \end{pmatrix}^T + = + \begin{pmatrix} + a_{1,1} & a_{2,1} & a_{3,1} \\ + a_{1,2} & a_{2,2} & a_{3,2} \\ + a_{1,3} & a_{2,3} & a_{3,3} \\ + \end{pmatrix} + \f] + + */ + +/** + @addtogroup MatrixTrans + @{ + */ + +/** + @brief Floating-point matrix transpose. + @param[in] pSrc points to input matrix + @param[out] pDst points to output matrix + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + */ +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#include "arm_helium_utils.h" + +arm_status arm_mat_trans_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + arm_status status; /* status of matrix transpose */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + { + if (pDst->numRows == pDst->numCols) + { + if (pDst->numCols == 2) + return arm_mat_trans_32bit_2x2_mve((uint32_t *)pSrc->pData, (uint32_t *)pDst->pData); + if (pDst->numCols == 3) + return arm_mat_trans_32bit_3x3_mve((uint32_t *)pSrc->pData, (uint32_t *)pDst->pData); + if (pDst->numCols == 4) + return arm_mat_trans_32bit_4x4_mve((uint32_t *)pSrc->pData, (uint32_t *)pDst->pData); + } + + arm_mat_trans_32bit_generic_mve(pSrc->numRows, pSrc->numCols, (uint32_t *)pSrc->pData, (uint32_t *)pDst->pData); + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} + +#else +#if defined(ARM_MATH_NEON) + +arm_status arm_mat_trans_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn = pSrc->pData; /* input data matrix pointer */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *px; /* Temporary output data matrix pointer */ + uint16_t nRows = pSrc->numRows; /* number of rows */ + uint16_t nColumns = pSrc->numCols; /* number of columns */ + + uint16_t blkCnt, rowCnt, i = 0U, row = nRows; /* loop counters */ + arm_status status; /* status of matrix transpose */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pDst->numCols) || (pSrc->numCols != pDst->numRows)) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /* Matrix transpose by exchanging the rows with columns */ + /* Row loop */ + rowCnt = row >> 2; + while (rowCnt > 0U) + { + float32x4_t row0V,row1V,row2V,row3V; + float32x4x2_t ra0,ra1,rb0,rb1; + + blkCnt = nColumns >> 2; + + /* The pointer px is set to starting address of the column being processed */ + px = pOut + i; + + /* Compute 4 outputs at a time. + ** a second loop below computes the remaining 1 to 3 samples. */ + while (blkCnt > 0U) /* Column loop */ + { + row0V = vld1q_f32(pIn); + row1V = vld1q_f32(pIn + 1 * nColumns); + row2V = vld1q_f32(pIn + 2 * nColumns); + row3V = vld1q_f32(pIn + 3 * nColumns); + pIn += 4; + + ra0 = vzipq_f32(row0V,row2V); + ra1 = vzipq_f32(row1V,row3V); + + rb0 = vzipq_f32(ra0.val[0],ra1.val[0]); + rb1 = vzipq_f32(ra0.val[1],ra1.val[1]); + + vst1q_f32(px,rb0.val[0]); + px += nRows; + + vst1q_f32(px,rb0.val[1]); + px += nRows; + + vst1q_f32(px,rb1.val[0]); + px += nRows; + + vst1q_f32(px,rb1.val[1]); + px += nRows; + + /* Decrement the column loop counter */ + blkCnt--; + } + + /* Perform matrix transpose for last 3 samples here. */ + blkCnt = nColumns % 0x4U; + + while (blkCnt > 0U) + { + /* Read and store the input element in the destination */ + *px++ = *pIn; + *px++ = *(pIn + 1 * nColumns); + *px++ = *(pIn + 2 * nColumns); + *px++ = *(pIn + 3 * nColumns); + + px += (nRows - 4); + pIn++; + + /* Decrement the column loop counter */ + blkCnt--; + } + + i += 4; + pIn += 3 * nColumns; + + /* Decrement the row loop counter */ + rowCnt--; + + } /* Row loop end */ + + rowCnt = row & 3; + while (rowCnt > 0U) + { + blkCnt = nColumns ; + /* The pointer px is set to starting address of the column being processed */ + px = pOut + i; + + while (blkCnt > 0U) + { + /* Read and store the input element in the destination */ + *px = *pIn++; + + /* Update the pointer px to point to the next row of the transposed matrix */ + px += nRows; + + /* Decrement the column loop counter */ + blkCnt--; + } + i++; + rowCnt -- ; + } + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} +#else +arm_status arm_mat_trans_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst) +{ + float32_t *pIn = pSrc->pData; /* input data matrix pointer */ + float32_t *pOut = pDst->pData; /* output data matrix pointer */ + float32_t *px; /* Temporary output data matrix pointer */ + uint16_t nRows = pSrc->numRows; /* number of rows */ + uint16_t nCols = pSrc->numCols; /* number of columns */ + uint32_t col, row = nRows, i = 0U; /* Loop counters */ + arm_status status; /* status of matrix transpose */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pDst->numCols) || + (pSrc->numCols != pDst->numRows) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /* Matrix transpose by exchanging the rows with columns */ + /* row loop */ + do + { + /* Pointer px is set to starting address of column being processed */ + px = pOut + i; + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + col = nCols >> 2U; + + while (col > 0U) /* column loop */ + { + /* Read and store input element in destination */ + *px = *pIn++; + /* Update pointer px to point to next row of transposed matrix */ + px += nRows; + + *px = *pIn++; + px += nRows; + + *px = *pIn++; + px += nRows; + + *px = *pIn++; + px += nRows; + + /* Decrement column loop counter */ + col--; + } + + /* Loop unrolling: Compute remaining outputs */ + col = nCols % 0x4U; + +#else + + /* Initialize col with number of samples */ + col = nCols; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (col > 0U) + { + /* Read and store input element in destination */ + *px = *pIn++; + + /* Update pointer px to point to next row of transposed matrix */ + px += nRows; + + /* Decrement column loop counter */ + col--; + } + + i++; + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); /* row loop end */ + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} +#endif /* #if defined(ARM_MATH_NEON) */ +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + +/** + * @} end of MatrixTrans group + */ diff --git a/ARM_math/arm_mat_trans_f64.c b/ARM_math/arm_mat_trans_f64.c new file mode 100644 index 0000000..a507347 --- /dev/null +++ b/ARM_math/arm_mat_trans_f64.c @@ -0,0 +1,148 @@ +/* ---------------------------------------------------------------------- + * Project: CMSIS DSP Library + * Title: arm_mat_trans_f64.c + * Description: Floating-point matrix transpose + * + * $Date: 23 April 2021 + * $Revision: V1.9.0 + * + * Target Processor: Cortex-M and Cortex-A cores + * -------------------------------------------------------------------- */ +/* + * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "matrix_functions.h" + +/** + @ingroup groupMatrix + */ + + + +/** + @addtogroup MatrixTrans + @{ + */ + +/** + @brief Floating-point matrix transpose. + @param[in] pSrc points to input matrix + @param[out] pDst points to output matrix + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + */ + +arm_status arm_mat_trans_f64( + const arm_matrix_instance_f64 * pSrc, + arm_matrix_instance_f64 * pDst) +{ + float64_t *pIn = pSrc->pData; /* input data matrix pointer */ + float64_t *pOut = pDst->pData; /* output data matrix pointer */ + float64_t *px; /* Temporary output data matrix pointer */ + uint16_t nRows = pSrc->numRows; /* number of rows */ + uint16_t nCols = pSrc->numCols; /* number of columns */ + uint64_t col, row = nRows, i = 0U; /* Loop counters */ + arm_status status; /* status of matrix transpose */ + +#ifdef ARM_MATH_MATRIX_CHECK + + /* Check for matrix mismatch condition */ + if ((pSrc->numRows != pDst->numCols) || + (pSrc->numCols != pDst->numRows) ) + { + /* Set status as ARM_MATH_SIZE_MISMATCH */ + status = ARM_MATH_SIZE_MISMATCH; + } + else + +#endif /* #ifdef ARM_MATH_MATRIX_CHECK */ + + { + /* Matrix transpose by exchanging the rows with columns */ + /* row loop */ + do + { + /* Pointer px is set to starting address of column being processed */ + px = pOut + i; + +#if defined (ARM_MATH_LOOPUNROLL) + + /* Loop unrolling: Compute 4 outputs at a time */ + col = nCols >> 2U; + + while (col > 0U) /* column loop */ + { + /* Read and store input element in destination */ + *px = *pIn++; + /* Update pointer px to point to next row of transposed matrix */ + px += nRows; + + *px = *pIn++; + px += nRows; + + *px = *pIn++; + px += nRows; + + *px = *pIn++; + px += nRows; + + /* Decrement column loop counter */ + col--; + } + + /* Loop unrolling: Compute remaining outputs */ + col = nCols % 0x4U; + +#else + + /* Initialize col with number of samples */ + col = nCols; + +#endif /* #if defined (ARM_MATH_LOOPUNROLL) */ + + while (col > 0U) + { + /* Read and store input element in destination */ + *px = *pIn++; + + /* Update pointer px to point to next row of transposed matrix */ + px += nRows; + + /* Decrement column loop counter */ + col--; + } + + i++; + + /* Decrement row loop counter */ + row--; + + } while (row > 0U); /* row loop end */ + + /* Set status as ARM_MATH_SUCCESS */ + status = ARM_MATH_SUCCESS; + } + + /* Return to application */ + return (status); +} + +/** + * @} end of MatrixTrans group + */ diff --git a/ARM_math/arm_math_memory.h b/ARM_math/arm_math_memory.h new file mode 100644 index 0000000..7bd83dc --- /dev/null +++ b/ARM_math/arm_math_memory.h @@ -0,0 +1,206 @@ +/****************************************************************************** + * @file arm_math_memory.h + * @brief Public header file for CMSIS DSP Library + * @version V1.10.0 + * @date 08 July 2021 + * Target Processor: Cortex-M and Cortex-A cores + ******************************************************************************/ +/* + * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARM_MATH_MEMORY_H_ + +#define _ARM_MATH_MEMORY_H_ + +#include "arm_math_types.h" + + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** + @brief definition to read/write two 16 bit values. + @deprecated + */ +#if defined ( __CC_ARM ) + #define __SIMD32_TYPE int32_t __packed +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + #define __SIMD32_TYPE int32_t +#elif defined ( __GNUC__ ) + #define __SIMD32_TYPE int32_t +#elif defined ( __ICCARM__ ) + #define __SIMD32_TYPE int32_t __packed +#elif defined ( __TI_ARM__ ) + #define __SIMD32_TYPE int32_t +#elif defined ( __CSMC__ ) + #define __SIMD32_TYPE int32_t +#elif defined ( __TASKING__ ) + #define __SIMD32_TYPE __un(aligned) int32_t +#elif defined(_MSC_VER ) + #define __SIMD32_TYPE int32_t +#else + #error Unknown compiler +#endif + +#define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr)) +#define __SIMD32_CONST(addr) ( (__SIMD32_TYPE * ) (addr)) +#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE * ) (addr)) +#define __SIMD64(addr) (*( int64_t **) & (addr)) + + +/* SIMD replacement */ + + +/** + @brief Read 2 Q15 from Q15 pointer. + @param[in] pQ15 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q15x2 ( + q15_t const * pQ15) +{ + q31_t val; + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (&val, pQ15, 4); +#else + val = (pQ15[1] << 16) | (pQ15[0] & 0x0FFFF) ; +#endif + + return (val); +} + +/** + @brief Read 2 Q15 from Q15 pointer and increment pointer afterwards. + @param[in] pQ15 points to input value + @return Q31 value + */ +#define read_q15x2_ia(pQ15) read_q15x2((*(pQ15) += 2) - 2) + +/** + @brief Read 2 Q15 from Q15 pointer and decrement pointer afterwards. + @param[in] pQ15 points to input value + @return Q31 value + */ +#define read_q15x2_da(pQ15) read_q15x2((*(pQ15) -= 2) + 2) + +/** + @brief Write 2 Q15 to Q15 pointer and increment pointer afterwards. + @param[in] pQ15 points to input value + @param[in] value Q31 value + @return none + */ +__STATIC_FORCEINLINE void write_q15x2_ia ( + q15_t ** pQ15, + q31_t value) +{ + q31_t val = value; +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (*pQ15, &val, 4); +#else + (*pQ15)[0] = (q15_t)(val & 0x0FFFF); + (*pQ15)[1] = (q15_t)((val >> 16) & 0x0FFFF); +#endif + + *pQ15 += 2; +} + +/** + @brief Write 2 Q15 to Q15 pointer. + @param[in] pQ15 points to input value + @param[in] value Q31 value + @return none + */ +__STATIC_FORCEINLINE void write_q15x2 ( + q15_t * pQ15, + q31_t value) +{ + q31_t val = value; + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (pQ15, &val, 4); +#else + pQ15[0] = (q15_t)(val & 0x0FFFF); + pQ15[1] = (q15_t)(val >> 16); +#endif +} + + +/** + @brief Read 4 Q7 from Q7 pointer + @param[in] pQ7 points to input value + @return Q31 value + */ +__STATIC_FORCEINLINE q31_t read_q7x4 ( + q7_t const * pQ7) +{ + q31_t val; + +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (&val, pQ7, 4); +#else + val =((pQ7[3] & 0x0FF) << 24) | ((pQ7[2] & 0x0FF) << 16) | ((pQ7[1] & 0x0FF) << 8) | (pQ7[0] & 0x0FF); +#endif + return (val); +} + +/** + @brief Read 4 Q7 from Q7 pointer and increment pointer afterwards. + @param[in] pQ7 points to input value + @return Q31 value + */ +#define read_q7x4_ia(pQ7) read_q7x4((*(pQ7) += 4) - 4) + +/** + @brief Read 4 Q7 from Q7 pointer and decrement pointer afterwards. + @param[in] pQ7 points to input value + @return Q31 value + */ +#define read_q7x4_da(pQ7) read_q7x4((*(pQ7) -= 4) + 4) + +/** + @brief Write 4 Q7 to Q7 pointer and increment pointer afterwards. + @param[in] pQ7 points to input value + @param[in] value Q31 value + @return none + */ +__STATIC_FORCEINLINE void write_q7x4_ia ( + q7_t ** pQ7, + q31_t value) +{ + q31_t val = value; +#ifdef __ARM_FEATURE_UNALIGNED + memcpy (*pQ7, &val, 4); +#else + (*pQ7)[0] = (q7_t)(val & 0x0FF); + (*pQ7)[1] = (q7_t)((val >> 8) & 0x0FF); + (*pQ7)[2] = (q7_t)((val >> 16) & 0x0FF); + (*pQ7)[3] = (q7_t)((val >> 24) & 0x0FF); + +#endif + *pQ7 += 4; +} + + +#ifdef __cplusplus +} +#endif + +#endif /*ifndef _ARM_MATH_MEMORY_H_ */ diff --git a/ARM_math/arm_math_types.h b/ARM_math/arm_math_types.h new file mode 100644 index 0000000..c615e66 --- /dev/null +++ b/ARM_math/arm_math_types.h @@ -0,0 +1,616 @@ +/****************************************************************************** + * @file arm_math_types.h + * @brief Public header file for CMSIS DSP Library + * @version V1.10.0 + * @date 08 July 2021 + * Target Processor: Cortex-M and Cortex-A cores + ******************************************************************************/ +/* + * Copyright (c) 2010-2021 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARM_MATH_TYPES_H_ + +#define _ARM_MATH_TYPES_H_ + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __APPLE_CC__ ) + #pragma GCC diagnostic ignored "-Wold-style-cast" + +#elif defined ( __GNUC__ ) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wsign-conversion" + #pragma GCC diagnostic ignored "-Wconversion" + #pragma GCC diagnostic ignored "-Wunused-parameter" + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + + +/* Included for instrinsics definitions */ +#if defined (_MSC_VER ) +#include +#define __STATIC_FORCEINLINE static __forceinline +#define __STATIC_INLINE static __inline +#define __ALIGNED(x) __declspec(align(x)) +#elif defined ( __APPLE_CC__ ) +#include +#define __ALIGNED(x) __attribute__((aligned(x))) +#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) +#define __STATIC_INLINE static inline +#elif defined (__GNUC_PYTHON__) +#include +#define __ALIGNED(x) __attribute__((aligned(x))) +#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) +#define __STATIC_INLINE static inline + +#else +#include "cmsis_compiler.h" +#endif + + + +#include +#include +#include +#include + +/* evaluate ARM DSP feature */ +#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) + #define ARM_MATH_DSP 1 +#endif + +#if defined(ARM_MATH_NEON) + #if defined(_MSC_VER) && defined(_M_ARM64EC) + #include + #else + #include + #endif + #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + #if !defined(ARM_MATH_NEON_FLOAT16) + #define ARM_MATH_NEON_FLOAT16 + #endif + #endif +#endif + +#if !defined(ARM_MATH_AUTOVECTORIZE) + + +#if defined(__ARM_FEATURE_MVE) +#if __ARM_FEATURE_MVE + #if !defined(ARM_MATH_MVEI) + #define ARM_MATH_MVEI + #endif +#endif + +#if (__ARM_FEATURE_MVE & 2) + #if !defined(ARM_MATH_MVEF) + #define ARM_MATH_MVEF + #endif + #if !defined(ARM_MATH_MVE_FLOAT16) + #define ARM_MATH_MVE_FLOAT16 + #endif +#endif + +#endif /*defined(__ARM_FEATURE_MVE)*/ +#endif /*!defined(ARM_MATH_AUTOVECTORIZE)*/ + + +#if defined (ARM_MATH_HELIUM) + #if !defined(ARM_MATH_MVEF) + #define ARM_MATH_MVEF + #endif + + #if !defined(ARM_MATH_MVEI) + #define ARM_MATH_MVEI + #endif + + #if !defined(ARM_MATH_MVE_FLOAT16) + #define ARM_MATH_MVE_FLOAT16 + #endif +#endif + + + +#if defined ( __CC_ARM ) + /* Enter low optimization region - place directly above function definition */ + #if defined( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("push") \ + _Pragma ("O1") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_EXIT \ + _Pragma ("pop") + #else + #define LOW_OPTIMIZATION_EXIT + #endif + + /* Enter low optimization region - place directly above function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined (__ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __APPLE_CC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __GNUC__ ) + #define LOW_OPTIMIZATION_ENTER \ + __attribute__(( optimize("-O1") )) + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __ICCARM__ ) + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define LOW_OPTIMIZATION_EXIT + + /* Enter low optimization region - place directly above function definition */ + #if defined ( __ARM_ARCH_7EM__ ) + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \ + _Pragma ("optimize=low") + #else + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #endif + + /* Exit low optimization region - place directly after end of function definition */ + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TI_ARM__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __CSMC__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( __TASKING__ ) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT + +#elif defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) + #define LOW_OPTIMIZATION_ENTER + #define LOW_OPTIMIZATION_EXIT + #define IAR_ONLY_LOW_OPTIMIZATION_ENTER + #define IAR_ONLY_LOW_OPTIMIZATION_EXIT +#endif + + + +/* Compiler specific diagnostic adjustment */ +#if defined ( __CC_ARM ) + +#elif defined ( __ARMCC_VERSION ) && ( __ARMCC_VERSION >= 6010050 ) + +#elif defined ( __APPLE_CC__ ) + +#elif defined ( __GNUC__ ) +#pragma GCC diagnostic pop + +#elif defined ( __ICCARM__ ) + +#elif defined ( __TI_ARM__ ) + +#elif defined ( __CSMC__ ) + +#elif defined ( __TASKING__ ) + +#elif defined ( _MSC_VER ) + +#else + #error Unknown compiler +#endif + +#ifdef __cplusplus +} +#endif + +#if defined(__ARM_FEATURE_MVE) && __ARM_FEATURE_MVE +#include +#endif + +#ifdef __cplusplus +extern "C" +{ +#endif + + /** + * @brief 8-bit fractional data type in 1.7 format. + */ + typedef int8_t q7_t; + + /** + * @brief 16-bit fractional data type in 1.15 format. + */ + typedef int16_t q15_t; + + /** + * @brief 32-bit fractional data type in 1.31 format. + */ + typedef int32_t q31_t; + + /** + * @brief 64-bit fractional data type in 1.63 format. + */ + typedef int64_t q63_t; + + /** + * @brief 32-bit floating-point type definition. + */ +#if !defined(__ICCARM__) || !(__ARM_FEATURE_MVE & 2) + typedef float float32_t; +#endif + + /** + * @brief 64-bit floating-point type definition. + */ + typedef double float64_t; + + /** + * @brief vector types + */ +#if defined(ARM_MATH_NEON) || (defined (ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)) + /** + * @brief 64-bit fractional 128-bit vector data type in 1.63 format + */ + typedef int64x2_t q63x2_t; + + /** + * @brief 32-bit fractional 128-bit vector data type in 1.31 format. + */ + typedef int32x4_t q31x4_t; + + /** + * @brief 16-bit fractional 128-bit vector data type with 16-bit alignment in 1.15 format. + */ + typedef __ALIGNED(2) int16x8_t q15x8_t; + + /** + * @brief 8-bit fractional 128-bit vector data type with 8-bit alignment in 1.7 format. + */ + typedef __ALIGNED(1) int8x16_t q7x16_t; + + /** + * @brief 32-bit fractional 128-bit vector pair data type in 1.31 format. + */ + typedef int32x4x2_t q31x4x2_t; + + /** + * @brief 32-bit fractional 128-bit vector quadruplet data type in 1.31 format. + */ + typedef int32x4x4_t q31x4x4_t; + + /** + * @brief 16-bit fractional 128-bit vector pair data type in 1.15 format. + */ + typedef int16x8x2_t q15x8x2_t; + + /** + * @brief 16-bit fractional 128-bit vector quadruplet data type in 1.15 format. + */ + typedef int16x8x4_t q15x8x4_t; + + /** + * @brief 8-bit fractional 128-bit vector pair data type in 1.7 format. + */ + typedef int8x16x2_t q7x16x2_t; + + /** + * @brief 8-bit fractional 128-bit vector quadruplet data type in 1.7 format. + */ + typedef int8x16x4_t q7x16x4_t; + + /** + * @brief 32-bit fractional data type in 9.23 format. + */ + typedef int32_t q23_t; + + /** + * @brief 32-bit fractional 128-bit vector data type in 9.23 format. + */ + typedef int32x4_t q23x4_t; + + /** + * @brief 64-bit status 128-bit vector data type. + */ + typedef int64x2_t status64x2_t; + + /** + * @brief 32-bit status 128-bit vector data type. + */ + typedef int32x4_t status32x4_t; + + /** + * @brief 16-bit status 128-bit vector data type. + */ + typedef int16x8_t status16x8_t; + + /** + * @brief 8-bit status 128-bit vector data type. + */ + typedef int8x16_t status8x16_t; + + +#endif + +#if defined(ARM_MATH_NEON) || (defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)) /* floating point vector*/ + /** + * @brief 32-bit floating-point 128-bit vector type + */ + typedef float32x4_t f32x4_t; + + /** + * @brief 32-bit floating-point 128-bit vector pair data type + */ + typedef float32x4x2_t f32x4x2_t; + + /** + * @brief 32-bit floating-point 128-bit vector quadruplet data type + */ + typedef float32x4x4_t f32x4x4_t; + + /** + * @brief 32-bit ubiquitous 128-bit vector data type + */ + typedef union _any32x4_t + { + float32x4_t f; + int32x4_t i; + } any32x4_t; + +#endif + +#if defined(ARM_MATH_NEON) + /** + * @brief 32-bit fractional 64-bit vector data type in 1.31 format. + */ + typedef int32x2_t q31x2_t; + + /** + * @brief 16-bit fractional 64-bit vector data type in 1.15 format. + */ + typedef __ALIGNED(2) int16x4_t q15x4_t; + + /** + * @brief 8-bit fractional 64-bit vector data type in 1.7 format. + */ + typedef __ALIGNED(1) int8x8_t q7x8_t; + + /** + * @brief 32-bit float 64-bit vector data type. + */ + typedef float32x2_t f32x2_t; + + /** + * @brief 32-bit floating-point 128-bit vector triplet data type + */ + typedef float32x4x3_t f32x4x3_t; + + + /** + * @brief 32-bit fractional 128-bit vector triplet data type in 1.31 format + */ + typedef int32x4x3_t q31x4x3_t; + + /** + * @brief 16-bit fractional 128-bit vector triplet data type in 1.15 format + */ + typedef int16x8x3_t q15x8x3_t; + + /** + * @brief 8-bit fractional 128-bit vector triplet data type in 1.7 format + */ + typedef int8x16x3_t q7x16x3_t; + + /** + * @brief 32-bit floating-point 64-bit vector pair data type + */ + typedef float32x2x2_t f32x2x2_t; + + /** + * @brief 32-bit floating-point 64-bit vector triplet data type + */ + typedef float32x2x3_t f32x2x3_t; + + /** + * @brief 32-bit floating-point 64-bit vector quadruplet data type + */ + typedef float32x2x4_t f32x2x4_t; + + + /** + * @brief 32-bit fractional 64-bit vector pair data type in 1.31 format + */ + typedef int32x2x2_t q31x2x2_t; + + /** + * @brief 32-bit fractional 64-bit vector triplet data type in 1.31 format + */ + typedef int32x2x3_t q31x2x3_t; + + /** + * @brief 32-bit fractional 64-bit vector quadruplet data type in 1.31 format + */ + typedef int32x4x3_t q31x2x4_t; + + /** + * @brief 16-bit fractional 64-bit vector pair data type in 1.15 format + */ + typedef int16x4x2_t q15x4x2_t; + + /** + * @brief 16-bit fractional 64-bit vector triplet data type in 1.15 format + */ + typedef int16x4x2_t q15x4x3_t; + + /** + * @brief 16-bit fractional 64-bit vector quadruplet data type in 1.15 format + */ + typedef int16x4x3_t q15x4x4_t; + + /** + * @brief 8-bit fractional 64-bit vector pair data type in 1.7 format + */ + typedef int8x8x2_t q7x8x2_t; + + /** + * @brief 8-bit fractional 64-bit vector triplet data type in 1.7 format + */ + typedef int8x8x3_t q7x8x3_t; + + /** + * @brief 8-bit fractional 64-bit vector quadruplet data type in 1.7 format + */ + typedef int8x8x4_t q7x8x4_t; + + /** + * @brief 32-bit ubiquitous 64-bit vector data type + */ + typedef union _any32x2_t + { + float32x2_t f; + int32x2_t i; + } any32x2_t; + + + /** + * @brief 32-bit status 64-bit vector data type. + */ + typedef int32x4_t status32x2_t; + + /** + * @brief 16-bit status 64-bit vector data type. + */ + typedef int16x8_t status16x4_t; + + /** + * @brief 8-bit status 64-bit vector data type. + */ + typedef int8x16_t status8x8_t; + +#endif + + + + + +#define F64_MAX ((float64_t)DBL_MAX) +#define F32_MAX ((float32_t)FLT_MAX) + + + +#define F64_MIN (-DBL_MAX) +#define F32_MIN (-FLT_MAX) + + + +#define F64_ABSMAX ((float64_t)DBL_MAX) +#define F32_ABSMAX ((float32_t)FLT_MAX) + + + +#define F64_ABSMIN ((float64_t)0.0) +#define F32_ABSMIN ((float32_t)0.0) + + +#define Q31_MAX ((q31_t)(0x7FFFFFFFL)) +#define Q15_MAX ((q15_t)(0x7FFF)) +#define Q7_MAX ((q7_t)(0x7F)) +#define Q31_MIN ((q31_t)(0x80000000L)) +#define Q15_MIN ((q15_t)(0x8000)) +#define Q7_MIN ((q7_t)(0x80)) + +#define Q31_ABSMAX ((q31_t)(0x7FFFFFFFL)) +#define Q15_ABSMAX ((q15_t)(0x7FFF)) +#define Q7_ABSMAX ((q7_t)(0x7F)) +#define Q31_ABSMIN ((q31_t)0) +#define Q15_ABSMIN ((q15_t)0) +#define Q7_ABSMIN ((q7_t)0) + + /* Dimension C vector space */ + #define CMPLX_DIM 2 + + /** + * @brief Error status returned by some functions in the library. + */ + + typedef enum + { + ARM_MATH_SUCCESS = 0, /**< No error */ + ARM_MATH_ARGUMENT_ERROR = -1, /**< One or more arguments are incorrect */ + ARM_MATH_LENGTH_ERROR = -2, /**< Length of data buffer is incorrect */ + ARM_MATH_SIZE_MISMATCH = -3, /**< Size of matrices is not compatible with the operation */ + ARM_MATH_NANINF = -4, /**< Not-a-number (NaN) or infinity is generated */ + ARM_MATH_SINGULAR = -5, /**< Input matrix is singular and cannot be inverted */ + ARM_MATH_TEST_FAILURE = -6, /**< Test Failed */ + ARM_MATH_DECOMPOSITION_FAILURE = -7 /**< Decomposition Failed */ + } arm_status; + + +#ifdef __cplusplus +} +#endif + +#endif /*ifndef _ARM_MATH_TYPES_H_ */ diff --git a/ARM_math/cmsis_compiler.h b/ARM_math/cmsis_compiler.h new file mode 100644 index 0000000..adbf296 --- /dev/null +++ b/ARM_math/cmsis_compiler.h @@ -0,0 +1,283 @@ +/**************************************************************************//** + * @file cmsis_compiler.h + * @brief CMSIS compiler generic header file + * @version V5.1.0 + * @date 09. October 2018 + ******************************************************************************/ +/* + * Copyright (c) 2009-2018 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CMSIS_COMPILER_H +#define __CMSIS_COMPILER_H + +#include + +/* + * Arm Compiler 4/5 + */ +#if defined ( __CC_ARM ) + #include "cmsis_armcc.h" + + +/* + * Arm Compiler 6.6 LTM (armclang) + */ +#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) && (__ARMCC_VERSION < 6100100) + #include "cmsis_armclang_ltm.h" + + /* + * Arm Compiler above 6.10.1 (armclang) + */ +#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) + #include "cmsis_armclang.h" + + +/* + * GNU Compiler + */ +#elif defined ( __GNUC__ ) + #include "cmsis_gcc.h" + + +/* + * IAR Compiler + */ +#elif defined ( __ICCARM__ ) + #include + + +/* + * TI Arm Compiler + */ +#elif defined ( __TI_ARM__ ) + #include + + #ifndef __ASM + #define __ASM __asm + #endif + #ifndef __INLINE + #define __INLINE inline + #endif + #ifndef __STATIC_INLINE + #define __STATIC_INLINE static inline + #endif + #ifndef __STATIC_FORCEINLINE + #define __STATIC_FORCEINLINE __STATIC_INLINE + #endif + #ifndef __NO_RETURN + #define __NO_RETURN __attribute__((noreturn)) + #endif + #ifndef __USED + #define __USED __attribute__((used)) + #endif + #ifndef __WEAK + #define __WEAK __attribute__((weak)) + #endif + #ifndef __PACKED + #define __PACKED __attribute__((packed)) + #endif + #ifndef __PACKED_STRUCT + #define __PACKED_STRUCT struct __attribute__((packed)) + #endif + #ifndef __PACKED_UNION + #define __PACKED_UNION union __attribute__((packed)) + #endif + #ifndef __UNALIGNED_UINT32 /* deprecated */ + struct __attribute__((packed)) T_UINT32 { uint32_t v; }; + #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) + #endif + #ifndef __UNALIGNED_UINT16_WRITE + __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; + #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void*)(addr))->v) = (val)) + #endif + #ifndef __UNALIGNED_UINT16_READ + __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; + #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) + #endif + #ifndef __UNALIGNED_UINT32_WRITE + __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; + #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) + #endif + #ifndef __UNALIGNED_UINT32_READ + __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; + #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) + #endif + #ifndef __ALIGNED + #define __ALIGNED(x) __attribute__((aligned(x))) + #endif + #ifndef __RESTRICT + #define __RESTRICT __restrict + #endif + #ifndef __COMPILER_BARRIER + #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. + #define __COMPILER_BARRIER() (void)0 + #endif + + +/* + * TASKING Compiler + */ +#elif defined ( __TASKING__ ) + /* + * The CMSIS functions have been implemented as intrinsics in the compiler. + * Please use "carm -?i" to get an up to date list of all intrinsics, + * Including the CMSIS ones. + */ + + #ifndef __ASM + #define __ASM __asm + #endif + #ifndef __INLINE + #define __INLINE inline + #endif + #ifndef __STATIC_INLINE + #define __STATIC_INLINE static inline + #endif + #ifndef __STATIC_FORCEINLINE + #define __STATIC_FORCEINLINE __STATIC_INLINE + #endif + #ifndef __NO_RETURN + #define __NO_RETURN __attribute__((noreturn)) + #endif + #ifndef __USED + #define __USED __attribute__((used)) + #endif + #ifndef __WEAK + #define __WEAK __attribute__((weak)) + #endif + #ifndef __PACKED + #define __PACKED __packed__ + #endif + #ifndef __PACKED_STRUCT + #define __PACKED_STRUCT struct __packed__ + #endif + #ifndef __PACKED_UNION + #define __PACKED_UNION union __packed__ + #endif + #ifndef __UNALIGNED_UINT32 /* deprecated */ + struct __packed__ T_UINT32 { uint32_t v; }; + #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) + #endif + #ifndef __UNALIGNED_UINT16_WRITE + __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; + #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) + #endif + #ifndef __UNALIGNED_UINT16_READ + __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; + #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) + #endif + #ifndef __UNALIGNED_UINT32_WRITE + __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; + #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) + #endif + #ifndef __UNALIGNED_UINT32_READ + __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; + #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) + #endif + #ifndef __ALIGNED + #define __ALIGNED(x) __align(x) + #endif + #ifndef __RESTRICT + #warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored. + #define __RESTRICT + #endif + #ifndef __COMPILER_BARRIER + #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. + #define __COMPILER_BARRIER() (void)0 + #endif + + +/* + * COSMIC Compiler + */ +#elif defined ( __CSMC__ ) + #include + + #ifndef __ASM + #define __ASM _asm + #endif + #ifndef __INLINE + #define __INLINE inline + #endif + #ifndef __STATIC_INLINE + #define __STATIC_INLINE static inline + #endif + #ifndef __STATIC_FORCEINLINE + #define __STATIC_FORCEINLINE __STATIC_INLINE + #endif + #ifndef __NO_RETURN + // NO RETURN is automatically detected hence no warning here + #define __NO_RETURN + #endif + #ifndef __USED + #warning No compiler specific solution for __USED. __USED is ignored. + #define __USED + #endif + #ifndef __WEAK + #define __WEAK __weak + #endif + #ifndef __PACKED + #define __PACKED @packed + #endif + #ifndef __PACKED_STRUCT + #define __PACKED_STRUCT @packed struct + #endif + #ifndef __PACKED_UNION + #define __PACKED_UNION @packed union + #endif + #ifndef __UNALIGNED_UINT32 /* deprecated */ + @packed struct T_UINT32 { uint32_t v; }; + #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) + #endif + #ifndef __UNALIGNED_UINT16_WRITE + __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; + #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) + #endif + #ifndef __UNALIGNED_UINT16_READ + __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; + #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) + #endif + #ifndef __UNALIGNED_UINT32_WRITE + __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; + #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) + #endif + #ifndef __UNALIGNED_UINT32_READ + __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; + #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) + #endif + #ifndef __ALIGNED + #warning No compiler specific solution for __ALIGNED. __ALIGNED is ignored. + #define __ALIGNED(x) + #endif + #ifndef __RESTRICT + #warning No compiler specific solution for __RESTRICT. __RESTRICT is ignored. + #define __RESTRICT + #endif + #ifndef __COMPILER_BARRIER + #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. + #define __COMPILER_BARRIER() (void)0 + #endif + + +#else + #error Unknown compiler. +#endif + + +#endif /* __CMSIS_COMPILER_H */ + diff --git a/ARM_math/cmsis_gcc.h b/ARM_math/cmsis_gcc.h new file mode 100644 index 0000000..3ddcc58 --- /dev/null +++ b/ARM_math/cmsis_gcc.h @@ -0,0 +1,2168 @@ +/**************************************************************************//** + * @file cmsis_gcc.h + * @brief CMSIS compiler GCC header file + * @version V5.2.0 + * @date 08. May 2019 + ******************************************************************************/ +/* + * Copyright (c) 2009-2019 Arm Limited. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __CMSIS_GCC_H +#define __CMSIS_GCC_H + +/* ignore some GCC warnings */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wsign-conversion" +#pragma GCC diagnostic ignored "-Wconversion" +#pragma GCC diagnostic ignored "-Wunused-parameter" + +/* Fallback for __has_builtin */ +#ifndef __has_builtin + #define __has_builtin(x) (0) +#endif + +/* CMSIS compiler specific defines */ +#ifndef __ASM + #define __ASM __asm +#endif +#ifndef __INLINE + #define __INLINE inline +#endif +#ifndef __STATIC_INLINE + #define __STATIC_INLINE static inline +#endif +#ifndef __STATIC_FORCEINLINE + #define __STATIC_FORCEINLINE __attribute__((always_inline)) static inline +#endif +#ifndef __NO_RETURN + #define __NO_RETURN __attribute__((__noreturn__)) +#endif +#ifndef __USED + #define __USED __attribute__((used)) +#endif +#ifndef __WEAK + #define __WEAK __attribute__((weak)) +#endif +#ifndef __PACKED + #define __PACKED __attribute__((packed, aligned(1))) +#endif +#ifndef __PACKED_STRUCT + #define __PACKED_STRUCT struct __attribute__((packed, aligned(1))) +#endif +#ifndef __PACKED_UNION + #define __PACKED_UNION union __attribute__((packed, aligned(1))) +#endif +#ifndef __UNALIGNED_UINT32 /* deprecated */ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + struct __attribute__((packed)) T_UINT32 { uint32_t v; }; + #pragma GCC diagnostic pop + #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) +#endif +#ifndef __UNALIGNED_UINT16_WRITE + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; + #pragma GCC diagnostic pop + #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) +#endif +#ifndef __UNALIGNED_UINT16_READ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; + #pragma GCC diagnostic pop + #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) +#endif +#ifndef __UNALIGNED_UINT32_WRITE + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; + #pragma GCC diagnostic pop + #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) +#endif +#ifndef __UNALIGNED_UINT32_READ + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wpacked" + #pragma GCC diagnostic ignored "-Wattributes" + __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; + #pragma GCC diagnostic pop + #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) +#endif +#ifndef __ALIGNED + #define __ALIGNED(x) __attribute__((aligned(x))) +#endif +#ifndef __RESTRICT + #define __RESTRICT __restrict +#endif +#ifndef __COMPILER_BARRIER + #define __COMPILER_BARRIER() __ASM volatile("":::"memory") +#endif + +/* ######################### Startup and Lowlevel Init ######################## */ + +#ifndef __PROGRAM_START + +/** + \brief Initializes data and bss sections + \details This default implementations initialized all data and additional bss + sections relying on .copy.table and .zero.table specified properly + in the used linker script. + + */ +__STATIC_FORCEINLINE __NO_RETURN void __cmsis_start(void) +{ + extern void _start(void) __NO_RETURN; + + typedef struct { + uint32_t const* src; + uint32_t* dest; + uint32_t wlen; + } __copy_table_t; + + typedef struct { + uint32_t* dest; + uint32_t wlen; + } __zero_table_t; + + extern const __copy_table_t __copy_table_start__; + extern const __copy_table_t __copy_table_end__; + extern const __zero_table_t __zero_table_start__; + extern const __zero_table_t __zero_table_end__; + + for (__copy_table_t const* pTable = &__copy_table_start__; pTable < &__copy_table_end__; ++pTable) { + for(uint32_t i=0u; iwlen; ++i) { + pTable->dest[i] = pTable->src[i]; + } + } + + for (__zero_table_t const* pTable = &__zero_table_start__; pTable < &__zero_table_end__; ++pTable) { + for(uint32_t i=0u; iwlen; ++i) { + pTable->dest[i] = 0u; + } + } + + _start(); +} + +#define __PROGRAM_START __cmsis_start +#endif + +#ifndef __INITIAL_SP +#define __INITIAL_SP __StackTop +#endif + +#ifndef __STACK_LIMIT +#define __STACK_LIMIT __StackLimit +#endif + +#ifndef __VECTOR_TABLE +#define __VECTOR_TABLE __Vectors +#endif + +#ifndef __VECTOR_TABLE_ATTRIBUTE +#define __VECTOR_TABLE_ATTRIBUTE __attribute((used, section(".vectors"))) +#endif + +/* ########################### Core Function Access ########################### */ +/** \ingroup CMSIS_Core_FunctionInterface + \defgroup CMSIS_Core_RegAccFunctions CMSIS Core Register Access Functions + @{ + */ + +/** + \brief Enable IRQ Interrupts + \details Enables IRQ interrupts by clearing the I-bit in the CPSR. + Can only be executed in Privileged modes. + */ +__STATIC_FORCEINLINE void __enable_irq(void) +{ + __ASM volatile ("cpsie i" : : : "memory"); +} + + +/** + \brief Disable IRQ Interrupts + \details Disables IRQ interrupts by setting the I-bit in the CPSR. + Can only be executed in Privileged modes. + */ +__STATIC_FORCEINLINE void __disable_irq(void) +{ + __ASM volatile ("cpsid i" : : : "memory"); +} + + +/** + \brief Get Control Register + \details Returns the content of the Control Register. + \return Control Register value + */ +__STATIC_FORCEINLINE uint32_t __get_CONTROL(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, control" : "=r" (result) ); + return(result); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Control Register (non-secure) + \details Returns the content of the non-secure Control Register when in secure mode. + \return non-secure Control Register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_CONTROL_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, control_ns" : "=r" (result) ); + return(result); +} +#endif + + +/** + \brief Set Control Register + \details Writes the given value to the Control Register. + \param [in] control Control Register value to set + */ +__STATIC_FORCEINLINE void __set_CONTROL(uint32_t control) +{ + __ASM volatile ("MSR control, %0" : : "r" (control) : "memory"); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Control Register (non-secure) + \details Writes the given value to the non-secure Control Register when in secure state. + \param [in] control Control Register value to set + */ +__STATIC_FORCEINLINE void __TZ_set_CONTROL_NS(uint32_t control) +{ + __ASM volatile ("MSR control_ns, %0" : : "r" (control) : "memory"); +} +#endif + + +/** + \brief Get IPSR Register + \details Returns the content of the IPSR Register. + \return IPSR Register value + */ +__STATIC_FORCEINLINE uint32_t __get_IPSR(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, ipsr" : "=r" (result) ); + return(result); +} + + +/** + \brief Get APSR Register + \details Returns the content of the APSR Register. + \return APSR Register value + */ +__STATIC_FORCEINLINE uint32_t __get_APSR(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, apsr" : "=r" (result) ); + return(result); +} + + +/** + \brief Get xPSR Register + \details Returns the content of the xPSR Register. + \return xPSR Register value + */ +__STATIC_FORCEINLINE uint32_t __get_xPSR(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, xpsr" : "=r" (result) ); + return(result); +} + + +/** + \brief Get Process Stack Pointer + \details Returns the current value of the Process Stack Pointer (PSP). + \return PSP Register value + */ +__STATIC_FORCEINLINE uint32_t __get_PSP(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, psp" : "=r" (result) ); + return(result); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Process Stack Pointer (non-secure) + \details Returns the current value of the non-secure Process Stack Pointer (PSP) when in secure state. + \return PSP Register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_PSP_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, psp_ns" : "=r" (result) ); + return(result); +} +#endif + + +/** + \brief Set Process Stack Pointer + \details Assigns the given value to the Process Stack Pointer (PSP). + \param [in] topOfProcStack Process Stack Pointer value to set + */ +__STATIC_FORCEINLINE void __set_PSP(uint32_t topOfProcStack) +{ + __ASM volatile ("MSR psp, %0" : : "r" (topOfProcStack) : ); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Process Stack Pointer (non-secure) + \details Assigns the given value to the non-secure Process Stack Pointer (PSP) when in secure state. + \param [in] topOfProcStack Process Stack Pointer value to set + */ +__STATIC_FORCEINLINE void __TZ_set_PSP_NS(uint32_t topOfProcStack) +{ + __ASM volatile ("MSR psp_ns, %0" : : "r" (topOfProcStack) : ); +} +#endif + + +/** + \brief Get Main Stack Pointer + \details Returns the current value of the Main Stack Pointer (MSP). + \return MSP Register value + */ +__STATIC_FORCEINLINE uint32_t __get_MSP(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, msp" : "=r" (result) ); + return(result); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Main Stack Pointer (non-secure) + \details Returns the current value of the non-secure Main Stack Pointer (MSP) when in secure state. + \return MSP Register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_MSP_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, msp_ns" : "=r" (result) ); + return(result); +} +#endif + + +/** + \brief Set Main Stack Pointer + \details Assigns the given value to the Main Stack Pointer (MSP). + \param [in] topOfMainStack Main Stack Pointer value to set + */ +__STATIC_FORCEINLINE void __set_MSP(uint32_t topOfMainStack) +{ + __ASM volatile ("MSR msp, %0" : : "r" (topOfMainStack) : ); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Main Stack Pointer (non-secure) + \details Assigns the given value to the non-secure Main Stack Pointer (MSP) when in secure state. + \param [in] topOfMainStack Main Stack Pointer value to set + */ +__STATIC_FORCEINLINE void __TZ_set_MSP_NS(uint32_t topOfMainStack) +{ + __ASM volatile ("MSR msp_ns, %0" : : "r" (topOfMainStack) : ); +} +#endif + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Stack Pointer (non-secure) + \details Returns the current value of the non-secure Stack Pointer (SP) when in secure state. + \return SP Register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_SP_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, sp_ns" : "=r" (result) ); + return(result); +} + + +/** + \brief Set Stack Pointer (non-secure) + \details Assigns the given value to the non-secure Stack Pointer (SP) when in secure state. + \param [in] topOfStack Stack Pointer value to set + */ +__STATIC_FORCEINLINE void __TZ_set_SP_NS(uint32_t topOfStack) +{ + __ASM volatile ("MSR sp_ns, %0" : : "r" (topOfStack) : ); +} +#endif + + +/** + \brief Get Priority Mask + \details Returns the current state of the priority mask bit from the Priority Mask Register. + \return Priority Mask value + */ +__STATIC_FORCEINLINE uint32_t __get_PRIMASK(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, primask" : "=r" (result) :: "memory"); + return(result); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Priority Mask (non-secure) + \details Returns the current state of the non-secure priority mask bit from the Priority Mask Register when in secure state. + \return Priority Mask value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_PRIMASK_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, primask_ns" : "=r" (result) :: "memory"); + return(result); +} +#endif + + +/** + \brief Set Priority Mask + \details Assigns the given value to the Priority Mask Register. + \param [in] priMask Priority Mask + */ +__STATIC_FORCEINLINE void __set_PRIMASK(uint32_t priMask) +{ + __ASM volatile ("MSR primask, %0" : : "r" (priMask) : "memory"); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Priority Mask (non-secure) + \details Assigns the given value to the non-secure Priority Mask Register when in secure state. + \param [in] priMask Priority Mask + */ +__STATIC_FORCEINLINE void __TZ_set_PRIMASK_NS(uint32_t priMask) +{ + __ASM volatile ("MSR primask_ns, %0" : : "r" (priMask) : "memory"); +} +#endif + + +#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) ) +/** + \brief Enable FIQ + \details Enables FIQ interrupts by clearing the F-bit in the CPSR. + Can only be executed in Privileged modes. + */ +__STATIC_FORCEINLINE void __enable_fault_irq(void) +{ + __ASM volatile ("cpsie f" : : : "memory"); +} + + +/** + \brief Disable FIQ + \details Disables FIQ interrupts by setting the F-bit in the CPSR. + Can only be executed in Privileged modes. + */ +__STATIC_FORCEINLINE void __disable_fault_irq(void) +{ + __ASM volatile ("cpsid f" : : : "memory"); +} + + +/** + \brief Get Base Priority + \details Returns the current value of the Base Priority register. + \return Base Priority register value + */ +__STATIC_FORCEINLINE uint32_t __get_BASEPRI(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, basepri" : "=r" (result) ); + return(result); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Base Priority (non-secure) + \details Returns the current value of the non-secure Base Priority register when in secure state. + \return Base Priority register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_BASEPRI_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, basepri_ns" : "=r" (result) ); + return(result); +} +#endif + + +/** + \brief Set Base Priority + \details Assigns the given value to the Base Priority register. + \param [in] basePri Base Priority value to set + */ +__STATIC_FORCEINLINE void __set_BASEPRI(uint32_t basePri) +{ + __ASM volatile ("MSR basepri, %0" : : "r" (basePri) : "memory"); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Base Priority (non-secure) + \details Assigns the given value to the non-secure Base Priority register when in secure state. + \param [in] basePri Base Priority value to set + */ +__STATIC_FORCEINLINE void __TZ_set_BASEPRI_NS(uint32_t basePri) +{ + __ASM volatile ("MSR basepri_ns, %0" : : "r" (basePri) : "memory"); +} +#endif + + +/** + \brief Set Base Priority with condition + \details Assigns the given value to the Base Priority register only if BASEPRI masking is disabled, + or the new value increases the BASEPRI priority level. + \param [in] basePri Base Priority value to set + */ +__STATIC_FORCEINLINE void __set_BASEPRI_MAX(uint32_t basePri) +{ + __ASM volatile ("MSR basepri_max, %0" : : "r" (basePri) : "memory"); +} + + +/** + \brief Get Fault Mask + \details Returns the current value of the Fault Mask register. + \return Fault Mask register value + */ +__STATIC_FORCEINLINE uint32_t __get_FAULTMASK(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, faultmask" : "=r" (result) ); + return(result); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Fault Mask (non-secure) + \details Returns the current value of the non-secure Fault Mask register when in secure state. + \return Fault Mask register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_FAULTMASK_NS(void) +{ + uint32_t result; + + __ASM volatile ("MRS %0, faultmask_ns" : "=r" (result) ); + return(result); +} +#endif + + +/** + \brief Set Fault Mask + \details Assigns the given value to the Fault Mask register. + \param [in] faultMask Fault Mask value to set + */ +__STATIC_FORCEINLINE void __set_FAULTMASK(uint32_t faultMask) +{ + __ASM volatile ("MSR faultmask, %0" : : "r" (faultMask) : "memory"); +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Fault Mask (non-secure) + \details Assigns the given value to the non-secure Fault Mask register when in secure state. + \param [in] faultMask Fault Mask value to set + */ +__STATIC_FORCEINLINE void __TZ_set_FAULTMASK_NS(uint32_t faultMask) +{ + __ASM volatile ("MSR faultmask_ns, %0" : : "r" (faultMask) : "memory"); +} +#endif + +#endif /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) ) */ + + +#if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \ + (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1)) ) + +/** + \brief Get Process Stack Pointer Limit + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence zero is returned always in non-secure + mode. + + \details Returns the current value of the Process Stack Pointer Limit (PSPLIM). + \return PSPLIM Register value + */ +__STATIC_FORCEINLINE uint32_t __get_PSPLIM(void) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \ + (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3))) + // without main extensions, the non-secure PSPLIM is RAZ/WI + return 0U; +#else + uint32_t result; + __ASM volatile ("MRS %0, psplim" : "=r" (result) ); + return result; +#endif +} + +#if (defined (__ARM_FEATURE_CMSE) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Process Stack Pointer Limit (non-secure) + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence zero is returned always. + + \details Returns the current value of the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state. + \return PSPLIM Register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_PSPLIM_NS(void) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))) + // without main extensions, the non-secure PSPLIM is RAZ/WI + return 0U; +#else + uint32_t result; + __ASM volatile ("MRS %0, psplim_ns" : "=r" (result) ); + return result; +#endif +} +#endif + + +/** + \brief Set Process Stack Pointer Limit + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence the write is silently ignored in non-secure + mode. + + \details Assigns the given value to the Process Stack Pointer Limit (PSPLIM). + \param [in] ProcStackPtrLimit Process Stack Pointer Limit value to set + */ +__STATIC_FORCEINLINE void __set_PSPLIM(uint32_t ProcStackPtrLimit) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \ + (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3))) + // without main extensions, the non-secure PSPLIM is RAZ/WI + (void)ProcStackPtrLimit; +#else + __ASM volatile ("MSR psplim, %0" : : "r" (ProcStackPtrLimit)); +#endif +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Process Stack Pointer (non-secure) + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence the write is silently ignored. + + \details Assigns the given value to the non-secure Process Stack Pointer Limit (PSPLIM) when in secure state. + \param [in] ProcStackPtrLimit Process Stack Pointer Limit value to set + */ +__STATIC_FORCEINLINE void __TZ_set_PSPLIM_NS(uint32_t ProcStackPtrLimit) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))) + // without main extensions, the non-secure PSPLIM is RAZ/WI + (void)ProcStackPtrLimit; +#else + __ASM volatile ("MSR psplim_ns, %0\n" : : "r" (ProcStackPtrLimit)); +#endif +} +#endif + + +/** + \brief Get Main Stack Pointer Limit + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence zero is returned always in non-secure + mode. + + \details Returns the current value of the Main Stack Pointer Limit (MSPLIM). + \return MSPLIM Register value + */ +__STATIC_FORCEINLINE uint32_t __get_MSPLIM(void) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \ + (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3))) + // without main extensions, the non-secure MSPLIM is RAZ/WI + return 0U; +#else + uint32_t result; + __ASM volatile ("MRS %0, msplim" : "=r" (result) ); + return result; +#endif +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Get Main Stack Pointer Limit (non-secure) + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence zero is returned always. + + \details Returns the current value of the non-secure Main Stack Pointer Limit(MSPLIM) when in secure state. + \return MSPLIM Register value + */ +__STATIC_FORCEINLINE uint32_t __TZ_get_MSPLIM_NS(void) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))) + // without main extensions, the non-secure MSPLIM is RAZ/WI + return 0U; +#else + uint32_t result; + __ASM volatile ("MRS %0, msplim_ns" : "=r" (result) ); + return result; +#endif +} +#endif + + +/** + \brief Set Main Stack Pointer Limit + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence the write is silently ignored in non-secure + mode. + + \details Assigns the given value to the Main Stack Pointer Limit (MSPLIM). + \param [in] MainStackPtrLimit Main Stack Pointer Limit value to set + */ +__STATIC_FORCEINLINE void __set_MSPLIM(uint32_t MainStackPtrLimit) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) && \ + (!defined (__ARM_FEATURE_CMSE) || (__ARM_FEATURE_CMSE < 3))) + // without main extensions, the non-secure MSPLIM is RAZ/WI + (void)MainStackPtrLimit; +#else + __ASM volatile ("MSR msplim, %0" : : "r" (MainStackPtrLimit)); +#endif +} + + +#if (defined (__ARM_FEATURE_CMSE ) && (__ARM_FEATURE_CMSE == 3)) +/** + \brief Set Main Stack Pointer Limit (non-secure) + Devices without ARMv8-M Main Extensions (i.e. Cortex-M23) lack the non-secure + Stack Pointer Limit register hence the write is silently ignored. + + \details Assigns the given value to the non-secure Main Stack Pointer Limit (MSPLIM) when in secure state. + \param [in] MainStackPtrLimit Main Stack Pointer value to set + */ +__STATIC_FORCEINLINE void __TZ_set_MSPLIM_NS(uint32_t MainStackPtrLimit) +{ +#if (!(defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1))) + // without main extensions, the non-secure MSPLIM is RAZ/WI + (void)MainStackPtrLimit; +#else + __ASM volatile ("MSR msplim_ns, %0" : : "r" (MainStackPtrLimit)); +#endif +} +#endif + +#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \ + (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1)) ) */ + + +/** + \brief Get FPSCR + \details Returns the current value of the Floating Point Status/Control register. + \return Floating Point Status/Control register value + */ +__STATIC_FORCEINLINE uint32_t __get_FPSCR(void) +{ +#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \ + (defined (__FPU_USED ) && (__FPU_USED == 1U)) ) +#if __has_builtin(__builtin_arm_get_fpscr) +// Re-enable using built-in when GCC has been fixed +// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2) + /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */ + return __builtin_arm_get_fpscr(); +#else + uint32_t result; + + __ASM volatile ("VMRS %0, fpscr" : "=r" (result) ); + return(result); +#endif +#else + return(0U); +#endif +} + + +/** + \brief Set FPSCR + \details Assigns the given value to the Floating Point Status/Control register. + \param [in] fpscr Floating Point Status/Control value to set + */ +__STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr) +{ +#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \ + (defined (__FPU_USED ) && (__FPU_USED == 1U)) ) +#if __has_builtin(__builtin_arm_set_fpscr) +// Re-enable using built-in when GCC has been fixed +// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2) + /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */ + __builtin_arm_set_fpscr(fpscr); +#else + __ASM volatile ("VMSR fpscr, %0" : : "r" (fpscr) : "vfpcc", "memory"); +#endif +#else + (void)fpscr; +#endif +} + + +/*@} end of CMSIS_Core_RegAccFunctions */ + + +/* ########################## Core Instruction Access ######################### */ +/** \defgroup CMSIS_Core_InstructionInterface CMSIS Core Instruction Interface + Access to dedicated instructions + @{ +*/ + +/* Define macros for porting to both thumb1 and thumb2. + * For thumb1, use low register (r0-r7), specified by constraint "l" + * Otherwise, use general registers, specified by constraint "r" */ +#if defined (__thumb__) && !defined (__thumb2__) +#define __CMSIS_GCC_OUT_REG(r) "=l" (r) +#define __CMSIS_GCC_RW_REG(r) "+l" (r) +#define __CMSIS_GCC_USE_REG(r) "l" (r) +#else +#define __CMSIS_GCC_OUT_REG(r) "=r" (r) +#define __CMSIS_GCC_RW_REG(r) "+r" (r) +#define __CMSIS_GCC_USE_REG(r) "r" (r) +#endif + +/** + \brief No Operation + \details No Operation does nothing. This instruction can be used for code alignment purposes. + */ +#define __NOP() __ASM volatile ("nop") + +/** + \brief Wait For Interrupt + \details Wait For Interrupt is a hint instruction that suspends execution until one of a number of events occurs. + */ +#define __WFI() __ASM volatile ("wfi") + + +/** + \brief Wait For Event + \details Wait For Event is a hint instruction that permits the processor to enter + a low-power state until one of a number of events occurs. + */ +#define __WFE() __ASM volatile ("wfe") + + +/** + \brief Send Event + \details Send Event is a hint instruction. It causes an event to be signaled to the CPU. + */ +#define __SEV() __ASM volatile ("sev") + + +/** + \brief Instruction Synchronization Barrier + \details Instruction Synchronization Barrier flushes the pipeline in the processor, + so that all instructions following the ISB are fetched from cache or memory, + after the instruction has been completed. + */ +__STATIC_FORCEINLINE void __ISB(void) +{ + __ASM volatile ("isb 0xF":::"memory"); +} + + +/** + \brief Data Synchronization Barrier + \details Acts as a special kind of Data Memory Barrier. + It completes when all explicit memory accesses before this instruction complete. + */ +__STATIC_FORCEINLINE void __DSB(void) +{ + __ASM volatile ("dsb 0xF":::"memory"); +} + + +/** + \brief Data Memory Barrier + \details Ensures the apparent order of the explicit memory operations before + and after the instruction, without ensuring their completion. + */ +__STATIC_FORCEINLINE void __DMB(void) +{ + __ASM volatile ("dmb 0xF":::"memory"); +} + + +/** + \brief Reverse byte order (32 bit) + \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412. + \param [in] value Value to reverse + \return Reversed value + */ +__STATIC_FORCEINLINE uint32_t __REV(uint32_t value) +{ +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) + return __builtin_bswap32(value); +#else + uint32_t result; + + __ASM volatile ("rev %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) ); + return result; +#endif +} + + +/** + \brief Reverse byte order (16 bit) + \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856. + \param [in] value Value to reverse + \return Reversed value + */ +__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value) +{ + uint32_t result; + + __ASM volatile ("rev16 %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) ); + return result; +} + + +/** + \brief Reverse byte order (16 bit) + \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000. + \param [in] value Value to reverse + \return Reversed value + */ +__STATIC_FORCEINLINE int16_t __REVSH(int16_t value) +{ +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + return (int16_t)__builtin_bswap16(value); +#else + int16_t result; + + __ASM volatile ("revsh %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) ); + return result; +#endif +} + + +/** + \brief Rotate Right in unsigned value (32 bit) + \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits. + \param [in] op1 Value to rotate + \param [in] op2 Number of Bits to rotate + \return Rotated value + */ +__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) +{ + op2 %= 32U; + if (op2 == 0U) + { + return op1; + } + return (op1 >> op2) | (op1 << (32U - op2)); +} + + +/** + \brief Breakpoint + \details Causes the processor to enter Debug state. + Debug tools can use this to investigate system state when the instruction at a particular address is reached. + \param [in] value is ignored by the processor. + If required, a debugger can use it to store additional information about the breakpoint. + */ +#define __BKPT(value) __ASM volatile ("bkpt "#value) + + +/** + \brief Reverse bit order of value + \details Reverses the bit order of the given value. + \param [in] value Value to reverse + \return Reversed value + */ +__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value) +{ + uint32_t result; + +#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) ) + __ASM volatile ("rbit %0, %1" : "=r" (result) : "r" (value) ); +#else + uint32_t s = (4U /*sizeof(v)*/ * 8U) - 1U; /* extra shift needed at end */ + + result = value; /* r will be reversed bits of v; first get LSB of v */ + for (value >>= 1U; value != 0U; value >>= 1U) + { + result <<= 1U; + result |= value & 1U; + s--; + } + result <<= s; /* shift when v's highest bits are zero */ +#endif + return result; +} + + +/** + \brief Count leading zeros + \details Counts the number of leading zeros of a data value. + \param [in] value Value to count the leading zeros + \return number of leading zeros in value + */ +__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value) +{ + /* Even though __builtin_clz produces a CLZ instruction on ARM, formally + __builtin_clz(0) is undefined behaviour, so handle this case specially. + This guarantees ARM-compatible results if happening to compile on a non-ARM + target, and ensures the compiler doesn't decide to activate any + optimisations using the logic "value was passed to __builtin_clz, so it + is non-zero". + ARM GCC 7.3 and possibly earlier will optimise this test away, leaving a + single CLZ instruction. + */ + if (value == 0U) + { + return 32U; + } + return __builtin_clz(value); +} + + +#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \ + (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1)) ) +/** + \brief LDR Exclusive (8 bit) + \details Executes a exclusive LDR instruction for 8 bit value. + \param [in] ptr Pointer to data + \return value of type uint8_t at (*ptr) + */ +__STATIC_FORCEINLINE uint8_t __LDREXB(volatile uint8_t *addr) +{ + uint32_t result; + +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + __ASM volatile ("ldrexb %0, %1" : "=r" (result) : "Q" (*addr) ); +#else + /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not + accepted by assembler. So has to use following less efficient pattern. + */ + __ASM volatile ("ldrexb %0, [%1]" : "=r" (result) : "r" (addr) : "memory" ); +#endif + return ((uint8_t) result); /* Add explicit type cast here */ +} + + +/** + \brief LDR Exclusive (16 bit) + \details Executes a exclusive LDR instruction for 16 bit values. + \param [in] ptr Pointer to data + \return value of type uint16_t at (*ptr) + */ +__STATIC_FORCEINLINE uint16_t __LDREXH(volatile uint16_t *addr) +{ + uint32_t result; + +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + __ASM volatile ("ldrexh %0, %1" : "=r" (result) : "Q" (*addr) ); +#else + /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not + accepted by assembler. So has to use following less efficient pattern. + */ + __ASM volatile ("ldrexh %0, [%1]" : "=r" (result) : "r" (addr) : "memory" ); +#endif + return ((uint16_t) result); /* Add explicit type cast here */ +} + + +/** + \brief LDR Exclusive (32 bit) + \details Executes a exclusive LDR instruction for 32 bit values. + \param [in] ptr Pointer to data + \return value of type uint32_t at (*ptr) + */ +__STATIC_FORCEINLINE uint32_t __LDREXW(volatile uint32_t *addr) +{ + uint32_t result; + + __ASM volatile ("ldrex %0, %1" : "=r" (result) : "Q" (*addr) ); + return(result); +} + + +/** + \brief STR Exclusive (8 bit) + \details Executes a exclusive STR instruction for 8 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + \return 0 Function succeeded + \return 1 Function failed + */ +__STATIC_FORCEINLINE uint32_t __STREXB(uint8_t value, volatile uint8_t *addr) +{ + uint32_t result; + + __ASM volatile ("strexb %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) ); + return(result); +} + + +/** + \brief STR Exclusive (16 bit) + \details Executes a exclusive STR instruction for 16 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + \return 0 Function succeeded + \return 1 Function failed + */ +__STATIC_FORCEINLINE uint32_t __STREXH(uint16_t value, volatile uint16_t *addr) +{ + uint32_t result; + + __ASM volatile ("strexh %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) ); + return(result); +} + + +/** + \brief STR Exclusive (32 bit) + \details Executes a exclusive STR instruction for 32 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + \return 0 Function succeeded + \return 1 Function failed + */ +__STATIC_FORCEINLINE uint32_t __STREXW(uint32_t value, volatile uint32_t *addr) +{ + uint32_t result; + + __ASM volatile ("strex %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" (value) ); + return(result); +} + + +/** + \brief Remove the exclusive lock + \details Removes the exclusive lock which is created by LDREX. + */ +__STATIC_FORCEINLINE void __CLREX(void) +{ + __ASM volatile ("clrex" ::: "memory"); +} + +#endif /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \ + (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1)) ) */ + + +#if ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) ) +/** + \brief Signed Saturate + \details Saturates a signed value. + \param [in] ARG1 Value to be saturated + \param [in] ARG2 Bit position to saturate to (1..32) + \return Saturated value + */ +#define __SSAT(ARG1,ARG2) \ +__extension__ \ +({ \ + int32_t __RES, __ARG1 = (ARG1); \ + __ASM ("ssat %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ + __RES; \ + }) + + +/** + \brief Unsigned Saturate + \details Saturates an unsigned value. + \param [in] ARG1 Value to be saturated + \param [in] ARG2 Bit position to saturate to (0..31) + \return Saturated value + */ +#define __USAT(ARG1,ARG2) \ + __extension__ \ +({ \ + uint32_t __RES, __ARG1 = (ARG1); \ + __ASM ("usat %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ + __RES; \ + }) + + +/** + \brief Rotate Right with Extend (32 bit) + \details Moves each bit of a bitstring right by one bit. + The carry input is shifted in at the left end of the bitstring. + \param [in] value Value to rotate + \return Rotated value + */ +__STATIC_FORCEINLINE uint32_t __RRX(uint32_t value) +{ + uint32_t result; + + __ASM volatile ("rrx %0, %1" : __CMSIS_GCC_OUT_REG (result) : __CMSIS_GCC_USE_REG (value) ); + return(result); +} + + +/** + \brief LDRT Unprivileged (8 bit) + \details Executes a Unprivileged LDRT instruction for 8 bit value. + \param [in] ptr Pointer to data + \return value of type uint8_t at (*ptr) + */ +__STATIC_FORCEINLINE uint8_t __LDRBT(volatile uint8_t *ptr) +{ + uint32_t result; + +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + __ASM volatile ("ldrbt %0, %1" : "=r" (result) : "Q" (*ptr) ); +#else + /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not + accepted by assembler. So has to use following less efficient pattern. + */ + __ASM volatile ("ldrbt %0, [%1]" : "=r" (result) : "r" (ptr) : "memory" ); +#endif + return ((uint8_t) result); /* Add explicit type cast here */ +} + + +/** + \brief LDRT Unprivileged (16 bit) + \details Executes a Unprivileged LDRT instruction for 16 bit values. + \param [in] ptr Pointer to data + \return value of type uint16_t at (*ptr) + */ +__STATIC_FORCEINLINE uint16_t __LDRHT(volatile uint16_t *ptr) +{ + uint32_t result; + +#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) + __ASM volatile ("ldrht %0, %1" : "=r" (result) : "Q" (*ptr) ); +#else + /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not + accepted by assembler. So has to use following less efficient pattern. + */ + __ASM volatile ("ldrht %0, [%1]" : "=r" (result) : "r" (ptr) : "memory" ); +#endif + return ((uint16_t) result); /* Add explicit type cast here */ +} + + +/** + \brief LDRT Unprivileged (32 bit) + \details Executes a Unprivileged LDRT instruction for 32 bit values. + \param [in] ptr Pointer to data + \return value of type uint32_t at (*ptr) + */ +__STATIC_FORCEINLINE uint32_t __LDRT(volatile uint32_t *ptr) +{ + uint32_t result; + + __ASM volatile ("ldrt %0, %1" : "=r" (result) : "Q" (*ptr) ); + return(result); +} + + +/** + \brief STRT Unprivileged (8 bit) + \details Executes a Unprivileged STRT instruction for 8 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + */ +__STATIC_FORCEINLINE void __STRBT(uint8_t value, volatile uint8_t *ptr) +{ + __ASM volatile ("strbt %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) ); +} + + +/** + \brief STRT Unprivileged (16 bit) + \details Executes a Unprivileged STRT instruction for 16 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + */ +__STATIC_FORCEINLINE void __STRHT(uint16_t value, volatile uint16_t *ptr) +{ + __ASM volatile ("strht %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) ); +} + + +/** + \brief STRT Unprivileged (32 bit) + \details Executes a Unprivileged STRT instruction for 32 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + */ +__STATIC_FORCEINLINE void __STRT(uint32_t value, volatile uint32_t *ptr) +{ + __ASM volatile ("strt %1, %0" : "=Q" (*ptr) : "r" (value) ); +} + +#else /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) ) */ + +/** + \brief Signed Saturate + \details Saturates a signed value. + \param [in] value Value to be saturated + \param [in] sat Bit position to saturate to (1..32) + \return Saturated value + */ +__STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) +{ + if ((sat >= 1U) && (sat <= 32U)) + { + const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); + const int32_t min = -1 - max ; + if (val > max) + { + return max; + } + else if (val < min) + { + return min; + } + } + return val; +} + +/** + \brief Unsigned Saturate + \details Saturates an unsigned value. + \param [in] value Value to be saturated + \param [in] sat Bit position to saturate to (0..31) + \return Saturated value + */ +__STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) +{ + if (sat <= 31U) + { + const uint32_t max = ((1U << sat) - 1U); + if (val > (int32_t)max) + { + return max; + } + else if (val < 0) + { + return 0U; + } + } + return (uint32_t)val; +} + +#endif /* ((defined (__ARM_ARCH_7M__ ) && (__ARM_ARCH_7M__ == 1)) || \ + (defined (__ARM_ARCH_7EM__ ) && (__ARM_ARCH_7EM__ == 1)) || \ + (defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) ) */ + + +#if ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \ + (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1)) ) +/** + \brief Load-Acquire (8 bit) + \details Executes a LDAB instruction for 8 bit value. + \param [in] ptr Pointer to data + \return value of type uint8_t at (*ptr) + */ +__STATIC_FORCEINLINE uint8_t __LDAB(volatile uint8_t *ptr) +{ + uint32_t result; + + __ASM volatile ("ldab %0, %1" : "=r" (result) : "Q" (*ptr) ); + return ((uint8_t) result); +} + + +/** + \brief Load-Acquire (16 bit) + \details Executes a LDAH instruction for 16 bit values. + \param [in] ptr Pointer to data + \return value of type uint16_t at (*ptr) + */ +__STATIC_FORCEINLINE uint16_t __LDAH(volatile uint16_t *ptr) +{ + uint32_t result; + + __ASM volatile ("ldah %0, %1" : "=r" (result) : "Q" (*ptr) ); + return ((uint16_t) result); +} + + +/** + \brief Load-Acquire (32 bit) + \details Executes a LDA instruction for 32 bit values. + \param [in] ptr Pointer to data + \return value of type uint32_t at (*ptr) + */ +__STATIC_FORCEINLINE uint32_t __LDA(volatile uint32_t *ptr) +{ + uint32_t result; + + __ASM volatile ("lda %0, %1" : "=r" (result) : "Q" (*ptr) ); + return(result); +} + + +/** + \brief Store-Release (8 bit) + \details Executes a STLB instruction for 8 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + */ +__STATIC_FORCEINLINE void __STLB(uint8_t value, volatile uint8_t *ptr) +{ + __ASM volatile ("stlb %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) ); +} + + +/** + \brief Store-Release (16 bit) + \details Executes a STLH instruction for 16 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + */ +__STATIC_FORCEINLINE void __STLH(uint16_t value, volatile uint16_t *ptr) +{ + __ASM volatile ("stlh %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) ); +} + + +/** + \brief Store-Release (32 bit) + \details Executes a STL instruction for 32 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + */ +__STATIC_FORCEINLINE void __STL(uint32_t value, volatile uint32_t *ptr) +{ + __ASM volatile ("stl %1, %0" : "=Q" (*ptr) : "r" ((uint32_t)value) ); +} + + +/** + \brief Load-Acquire Exclusive (8 bit) + \details Executes a LDAB exclusive instruction for 8 bit value. + \param [in] ptr Pointer to data + \return value of type uint8_t at (*ptr) + */ +__STATIC_FORCEINLINE uint8_t __LDAEXB(volatile uint8_t *ptr) +{ + uint32_t result; + + __ASM volatile ("ldaexb %0, %1" : "=r" (result) : "Q" (*ptr) ); + return ((uint8_t) result); +} + + +/** + \brief Load-Acquire Exclusive (16 bit) + \details Executes a LDAH exclusive instruction for 16 bit values. + \param [in] ptr Pointer to data + \return value of type uint16_t at (*ptr) + */ +__STATIC_FORCEINLINE uint16_t __LDAEXH(volatile uint16_t *ptr) +{ + uint32_t result; + + __ASM volatile ("ldaexh %0, %1" : "=r" (result) : "Q" (*ptr) ); + return ((uint16_t) result); +} + + +/** + \brief Load-Acquire Exclusive (32 bit) + \details Executes a LDA exclusive instruction for 32 bit values. + \param [in] ptr Pointer to data + \return value of type uint32_t at (*ptr) + */ +__STATIC_FORCEINLINE uint32_t __LDAEX(volatile uint32_t *ptr) +{ + uint32_t result; + + __ASM volatile ("ldaex %0, %1" : "=r" (result) : "Q" (*ptr) ); + return(result); +} + + +/** + \brief Store-Release Exclusive (8 bit) + \details Executes a STLB exclusive instruction for 8 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + \return 0 Function succeeded + \return 1 Function failed + */ +__STATIC_FORCEINLINE uint32_t __STLEXB(uint8_t value, volatile uint8_t *ptr) +{ + uint32_t result; + + __ASM volatile ("stlexb %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) ); + return(result); +} + + +/** + \brief Store-Release Exclusive (16 bit) + \details Executes a STLH exclusive instruction for 16 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + \return 0 Function succeeded + \return 1 Function failed + */ +__STATIC_FORCEINLINE uint32_t __STLEXH(uint16_t value, volatile uint16_t *ptr) +{ + uint32_t result; + + __ASM volatile ("stlexh %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) ); + return(result); +} + + +/** + \brief Store-Release Exclusive (32 bit) + \details Executes a STL exclusive instruction for 32 bit values. + \param [in] value Value to store + \param [in] ptr Pointer to location + \return 0 Function succeeded + \return 1 Function failed + */ +__STATIC_FORCEINLINE uint32_t __STLEX(uint32_t value, volatile uint32_t *ptr) +{ + uint32_t result; + + __ASM volatile ("stlex %0, %2, %1" : "=&r" (result), "=Q" (*ptr) : "r" ((uint32_t)value) ); + return(result); +} + +#endif /* ((defined (__ARM_ARCH_8M_MAIN__ ) && (__ARM_ARCH_8M_MAIN__ == 1)) || \ + (defined (__ARM_ARCH_8M_BASE__ ) && (__ARM_ARCH_8M_BASE__ == 1)) ) */ + +/*@}*/ /* end of group CMSIS_Core_InstructionInterface */ + + +/* ################### Compiler specific Intrinsics ########################### */ +/** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics + Access to dedicated SIMD instructions + @{ +*/ + +#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) + +__STATIC_FORCEINLINE uint32_t __SADD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("sadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __QADD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SHADD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("shadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UADD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UQADD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uqadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UHADD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uhadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + + +__STATIC_FORCEINLINE uint32_t __SSUB8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("ssub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SHSUB8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("shsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __USUB8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("usub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UQSUB8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uqsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UHSUB8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uhsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + + +__STATIC_FORCEINLINE uint32_t __SADD16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("sadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __QADD16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UADD16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UQADD16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uqadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UHADD16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uhadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SSUB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("ssub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __USUB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("usub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UQSUB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uqsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UHSUB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uhsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SASX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("sasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __QASX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SHASX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UASX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UQASX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uqasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UHASX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uhasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SSAX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("ssax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __QSAX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __USAX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("usax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UQSAX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uqsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UHSAX(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uhsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __USAD8(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("usad8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __USADA8(uint32_t op1, uint32_t op2, uint32_t op3) +{ + uint32_t result; + + __ASM volatile ("usada8 %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); + return(result); +} + +#define __SSAT16(ARG1,ARG2) \ +({ \ + int32_t __RES, __ARG1 = (ARG1); \ + __ASM ("ssat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ + __RES; \ + }) + +#define __USAT16(ARG1,ARG2) \ +({ \ + uint32_t __RES, __ARG1 = (ARG1); \ + __ASM ("usat16 %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) ); \ + __RES; \ + }) + +__STATIC_FORCEINLINE uint32_t __UXTB16(uint32_t op1) +{ + uint32_t result; + + __ASM volatile ("uxtb16 %0, %1" : "=r" (result) : "r" (op1)); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __UXTAB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("uxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SXTB16(uint32_t op1) +{ + uint32_t result; + + __ASM volatile ("sxtb16 %0, %1" : "=r" (result) : "r" (op1)); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SXTAB16(uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("sxtab16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3) +{ + uint32_t result; + + __ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3) +{ + uint32_t result; + + __ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); + return(result); +} + +__STATIC_FORCEINLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc) +{ + union llreg_u{ + uint32_t w32[2]; + uint64_t w64; + } llr; + llr.w64 = acc; + +#ifndef __ARMEB__ /* Little endian */ + __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); +#else /* Big endian */ + __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); +#endif + + return(llr.w64); +} + +__STATIC_FORCEINLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc) +{ + union llreg_u{ + uint32_t w32[2]; + uint64_t w64; + } llr; + llr.w64 = acc; + +#ifndef __ARMEB__ /* Little endian */ + __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); +#else /* Big endian */ + __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); +#endif + + return(llr.w64); +} + +__STATIC_FORCEINLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMLSD (uint32_t op1, uint32_t op2, uint32_t op3) +{ + uint32_t result; + + __ASM volatile ("smlsd %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); + return(result); +} + +__STATIC_FORCEINLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3) +{ + uint32_t result; + + __ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); + return(result); +} + +__STATIC_FORCEINLINE uint64_t __SMLSLD (uint32_t op1, uint32_t op2, uint64_t acc) +{ + union llreg_u{ + uint32_t w32[2]; + uint64_t w64; + } llr; + llr.w64 = acc; + +#ifndef __ARMEB__ /* Little endian */ + __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); +#else /* Big endian */ + __ASM volatile ("smlsld %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); +#endif + + return(llr.w64); +} + +__STATIC_FORCEINLINE uint64_t __SMLSLDX (uint32_t op1, uint32_t op2, uint64_t acc) +{ + union llreg_u{ + uint32_t w32[2]; + uint64_t w64; + } llr; + llr.w64 = acc; + +#ifndef __ARMEB__ /* Little endian */ + __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); +#else /* Big endian */ + __ASM volatile ("smlsldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); +#endif + + return(llr.w64); +} + +__STATIC_FORCEINLINE uint32_t __SEL (uint32_t op1, uint32_t op2) +{ + uint32_t result; + + __ASM volatile ("sel %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2) +{ + int32_t result; + + __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2) +{ + int32_t result; + + __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); + return(result); +} + +#if 0 +#define __PKHBT(ARG1,ARG2,ARG3) \ +({ \ + uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ + __ASM ("pkhbt %0, %1, %2, lsl %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ + __RES; \ + }) + +#define __PKHTB(ARG1,ARG2,ARG3) \ +({ \ + uint32_t __RES, __ARG1 = (ARG1), __ARG2 = (ARG2); \ + if (ARG3 == 0) \ + __ASM ("pkhtb %0, %1, %2" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2) ); \ + else \ + __ASM ("pkhtb %0, %1, %2, asr %3" : "=r" (__RES) : "r" (__ARG1), "r" (__ARG2), "I" (ARG3) ); \ + __RES; \ + }) +#endif + +#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ + ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) + +#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ + ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) + +__STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3) +{ + int32_t result; + + __ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) ); + return(result); +} + +#endif /* (__ARM_FEATURE_DSP == 1) */ +/*@} end of group CMSIS_SIMD_intrinsics */ + + +#pragma GCC diagnostic pop + +#endif /* __CMSIS_GCC_H */ diff --git a/ARM_math/matrix_functions.h b/ARM_math/matrix_functions.h new file mode 100644 index 0000000..d3f79a0 --- /dev/null +++ b/ARM_math/matrix_functions.h @@ -0,0 +1,842 @@ +/****************************************************************************** + * @file matrix_functions.h + * @brief Public header file for CMSIS DSP Library + * @version V1.10.0 + * @date 08 July 2021 + * Target Processor: Cortex-M and Cortex-A cores + ******************************************************************************/ +/* + * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef _MATRIX_FUNCTIONS_H_ +#define _MATRIX_FUNCTIONS_H_ + +#include "arm_math_types.h" +#include "arm_math_memory.h" + +#include "none.h" +#include "utils.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** + * @defgroup groupMatrix Matrix Functions + * + * This set of functions provides basic matrix math operations. + * The functions operate on matrix data structures. For example, + * the type + * definition for the floating-point matrix structure is shown + * below: + *
+ *     typedef struct
+ *     {
+ *       uint16_t numRows;     // number of rows of the matrix.
+ *       uint16_t numCols;     // number of columns of the matrix.
+ *       float32_t *pData;     // points to the data of the matrix.
+ *     } arm_matrix_instance_f32;
+ * 
+ * There are similar definitions for Q15 and Q31 data types. + * + * The structure specifies the size of the matrix and then points to + * an array of data. The array is of size numRows X numCols + * and the values are arranged in row order. That is, the + * matrix element (i, j) is stored at: + *
+ *     pData[i*numCols + j]
+ * 
+ * + * \par Init Functions + * There is an associated initialization function for each type of matrix + * data structure. + * The initialization function sets the values of the internal structure fields. + * Refer to \ref arm_mat_init_f32(), \ref arm_mat_init_q31() and \ref arm_mat_init_q15() + * for floating-point, Q31 and Q15 types, respectively. + * + * \par + * Use of the initialization function is optional. However, if initialization function is used + * then the instance structure cannot be placed into a const data section. + * To place the instance structure in a const data + * section, manually initialize the data structure. For example: + *
+ * arm_matrix_instance_f32 S = {nRows, nColumns, pData};
+ * arm_matrix_instance_q31 S = {nRows, nColumns, pData};
+ * arm_matrix_instance_q15 S = {nRows, nColumns, pData};
+ * 
+ * where nRows specifies the number of rows, nColumns + * specifies the number of columns, and pData points to the + * data array. + * + * \par Size Checking + * By default all of the matrix functions perform size checking on the input and + * output matrices. For example, the matrix addition function verifies that the + * two input matrices and the output matrix all have the same number of rows and + * columns. If the size check fails the functions return: + *
+ *     ARM_MATH_SIZE_MISMATCH
+ * 
+ * Otherwise the functions return + *
+ *     ARM_MATH_SUCCESS
+ * 
+ * There is some overhead associated with this matrix size checking. + * The matrix size checking is enabled via the \#define + *
+ *     ARM_MATH_MATRIX_CHECK
+ * 
+ * within the library project settings. By default this macro is defined + * and size checking is enabled. By changing the project settings and + * undefining this macro size checking is eliminated and the functions + * run a bit faster. With size checking disabled the functions always + * return ARM_MATH_SUCCESS. + */ + + #define DEFAULT_HOUSEHOLDER_THRESHOLD_F64 (1.0e-16) + #define DEFAULT_HOUSEHOLDER_THRESHOLD_F32 (1.0e-12f) + + /** + * @brief Instance structure for the floating-point matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + float32_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_f32; + + /** + * @brief Instance structure for the floating-point matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + float64_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_f64; + + /** + * @brief Instance structure for the Q7 matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + q7_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_q7; + + /** + * @brief Instance structure for the Q15 matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + q15_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_q15; + + /** + * @brief Instance structure for the Q31 matrix structure. + */ + typedef struct + { + uint16_t numRows; /**< number of rows of the matrix. */ + uint16_t numCols; /**< number of columns of the matrix. */ + q31_t *pData; /**< points to the data of the matrix. */ + } arm_matrix_instance_q31; + + /** + * @brief Floating-point matrix addition. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_add_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix addition. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_add_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix addition. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_add_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point, complex, matrix multiplication. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15, complex, matrix multiplication. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_mult_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst, + q15_t * pScratch); + + /** + * @brief Q31, complex, matrix multiplication. + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_mult_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst); + +/** + * @brief Floating-point matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_f64( + const arm_matrix_instance_f64 * pSrc, + arm_matrix_instance_f64 * pDst); + + /** + * @brief Floating-point complex matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_trans_f32( + const arm_matrix_instance_f32 * pSrc, + arm_matrix_instance_f32 * pDst); + + + /** + * @brief Q15 matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_q15( + const arm_matrix_instance_q15 * pSrc, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q15 complex matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_trans_q15( + const arm_matrix_instance_q15 * pSrc, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q7 matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_q7( + const arm_matrix_instance_q7 * pSrc, + arm_matrix_instance_q7 * pDst); + + /** + * @brief Q31 matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_trans_q31( + const arm_matrix_instance_q31 * pSrc, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Q31 complex matrix transpose. + * @param[in] pSrc points to the input matrix + * @param[out] pDst points to the output matrix + * @return The function returns either ARM_MATH_SIZE_MISMATCH + * or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_cmplx_trans_q31( + const arm_matrix_instance_q31 * pSrc, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Floating-point matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_f64( + const arm_matrix_instance_f64 * pSrcA, + const arm_matrix_instance_f64 * pSrcB, + arm_matrix_instance_f64 * pDst); + + /** + * @brief Floating-point matrix and vector multiplication + * @param[in] pSrcMat points to the input matrix structure + * @param[in] pVec points to vector + * @param[out] pDst points to output vector + */ +void arm_mat_vec_mult_f32( + const arm_matrix_instance_f32 *pSrcMat, + const float32_t *pVec, + float32_t *pDst); + + /** + * @brief Q7 matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @param[in] pState points to the array for storing intermediate results + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_q7( + const arm_matrix_instance_q7 * pSrcA, + const arm_matrix_instance_q7 * pSrcB, + arm_matrix_instance_q7 * pDst, + q7_t * pState); + + /** + * @brief Q7 matrix and vector multiplication + * @param[in] pSrcMat points to the input matrix structure + * @param[in] pVec points to vector + * @param[out] pDst points to output vector + */ +void arm_mat_vec_mult_q7( + const arm_matrix_instance_q7 *pSrcMat, + const q7_t *pVec, + q7_t *pDst); + + /** + * @brief Q15 matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @param[in] pState points to the array for storing intermediate results + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst, + q15_t * pState); + + /** + * @brief Q15 matrix and vector multiplication + * @param[in] pSrcMat points to the input matrix structure + * @param[in] pVec points to vector + * @param[out] pDst points to output vector + */ +void arm_mat_vec_mult_q15( + const arm_matrix_instance_q15 *pSrcMat, + const q15_t *pVec, + q15_t *pDst); + + /** + * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @param[in] pState points to the array for storing intermediate results + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_fast_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst, + q15_t * pState); + + /** + * @brief Q31 matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Q31 matrix multiplication + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @param[in] pState points to the array for storing intermediate results + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_opt_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst, + q31_t *pState); + + /** + * @brief Q31 matrix and vector multiplication + * @param[in] pSrcMat points to the input matrix structure + * @param[in] pVec points to vector + * @param[out] pDst points to output vector + */ +void arm_mat_vec_mult_q31( + const arm_matrix_instance_q31 *pSrcMat, + const q31_t *pVec, + q31_t *pDst); + + /** + * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4 + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_mult_fast_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_f32( + const arm_matrix_instance_f32 * pSrcA, + const arm_matrix_instance_f32 * pSrcB, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Floating-point matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_f64( + const arm_matrix_instance_f64 * pSrcA, + const arm_matrix_instance_f64 * pSrcB, + arm_matrix_instance_f64 * pDst); + + /** + * @brief Q15 matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_q15( + const arm_matrix_instance_q15 * pSrcA, + const arm_matrix_instance_q15 * pSrcB, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix subtraction + * @param[in] pSrcA points to the first input matrix structure + * @param[in] pSrcB points to the second input matrix structure + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_sub_q31( + const arm_matrix_instance_q31 * pSrcA, + const arm_matrix_instance_q31 * pSrcB, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Floating-point matrix scaling. + * @param[in] pSrc points to the input matrix + * @param[in] scale scale factor + * @param[out] pDst points to the output matrix + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_scale_f32( + const arm_matrix_instance_f32 * pSrc, + float32_t scale, + arm_matrix_instance_f32 * pDst); + + /** + * @brief Q15 matrix scaling. + * @param[in] pSrc points to input matrix + * @param[in] scaleFract fractional portion of the scale factor + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to output matrix + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_scale_q15( + const arm_matrix_instance_q15 * pSrc, + q15_t scaleFract, + int32_t shift, + arm_matrix_instance_q15 * pDst); + + /** + * @brief Q31 matrix scaling. + * @param[in] pSrc points to input matrix + * @param[in] scaleFract fractional portion of the scale factor + * @param[in] shift number of bits to shift the result by + * @param[out] pDst points to output matrix structure + * @return The function returns either + * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. + */ +arm_status arm_mat_scale_q31( + const arm_matrix_instance_q31 * pSrc, + q31_t scaleFract, + int32_t shift, + arm_matrix_instance_q31 * pDst); + + /** + * @brief Q31 matrix initialization. + * @param[in,out] S points to an instance of the floating-point matrix structure. + * @param[in] nRows number of rows in the matrix. + * @param[in] nColumns number of columns in the matrix. + * @param[in] pData points to the matrix data array. + */ +void arm_mat_init_q31( + arm_matrix_instance_q31 * S, + uint16_t nRows, + uint16_t nColumns, + q31_t * pData); + + /** + * @brief Q15 matrix initialization. + * @param[in,out] S points to an instance of the floating-point matrix structure. + * @param[in] nRows number of rows in the matrix. + * @param[in] nColumns number of columns in the matrix. + * @param[in] pData points to the matrix data array. + */ +void arm_mat_init_q15( + arm_matrix_instance_q15 * S, + uint16_t nRows, + uint16_t nColumns, + q15_t * pData); + + /** + * @brief Floating-point matrix initialization. + * @param[in,out] S points to an instance of the floating-point matrix structure. + * @param[in] nRows number of rows in the matrix. + * @param[in] nColumns number of columns in the matrix. + * @param[in] pData points to the matrix data array. + */ +void arm_mat_init_f32( + arm_matrix_instance_f32 * S, + uint16_t nRows, + uint16_t nColumns, + float32_t * pData); + + + + /** + * @brief Floating-point matrix inverse. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] dst points to the instance of the output floating-point matrix structure. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR. + */ + arm_status arm_mat_inverse_f32( + const arm_matrix_instance_f32 * src, + arm_matrix_instance_f32 * dst); + + + /** + * @brief Floating-point matrix inverse. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] dst points to the instance of the output floating-point matrix structure. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR. + */ + arm_status arm_mat_inverse_f64( + const arm_matrix_instance_f64 * src, + arm_matrix_instance_f64 * dst); + + /** + * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] dst points to the instance of the output floating-point matrix structure. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE. + * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition. + * The decomposition is returning a lower triangular matrix. + */ + arm_status arm_mat_cholesky_f64( + const arm_matrix_instance_f64 * src, + arm_matrix_instance_f64 * dst); + + /** + * @brief Floating-point Cholesky decomposition of Symmetric Positive Definite Matrix. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] dst points to the instance of the output floating-point matrix structure. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE. + * If the matrix is ill conditioned or only semi-definite, then it is better using the LDL^t decomposition. + * The decomposition is returning a lower triangular matrix. + */ + arm_status arm_mat_cholesky_f32( + const arm_matrix_instance_f32 * src, + arm_matrix_instance_f32 * dst); + + /** + * @brief Solve UT . X = A where UT is an upper triangular matrix + * @param[in] ut The upper triangular matrix + * @param[in] a The matrix a + * @param[out] dst The solution X of UT . X = A + * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. + */ + arm_status arm_mat_solve_upper_triangular_f32( + const arm_matrix_instance_f32 * ut, + const arm_matrix_instance_f32 * a, + arm_matrix_instance_f32 * dst); + + /** + * @brief Solve LT . X = A where LT is a lower triangular matrix + * @param[in] lt The lower triangular matrix + * @param[in] a The matrix a + * @param[out] dst The solution X of LT . X = A + * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. + */ + arm_status arm_mat_solve_lower_triangular_f32( + const arm_matrix_instance_f32 * lt, + const arm_matrix_instance_f32 * a, + arm_matrix_instance_f32 * dst); + + + /** + * @brief Solve UT . X = A where UT is an upper triangular matrix + * @param[in] ut The upper triangular matrix + * @param[in] a The matrix a + * @param[out] dst The solution X of UT . X = A + * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. + */ + arm_status arm_mat_solve_upper_triangular_f64( + const arm_matrix_instance_f64 * ut, + const arm_matrix_instance_f64 * a, + arm_matrix_instance_f64 * dst); + + /** + * @brief Solve LT . X = A where LT is a lower triangular matrix + * @param[in] lt The lower triangular matrix + * @param[in] a The matrix a + * @param[out] dst The solution X of LT . X = A + * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved. + */ + arm_status arm_mat_solve_lower_triangular_f64( + const arm_matrix_instance_f64 * lt, + const arm_matrix_instance_f64 * a, + arm_matrix_instance_f64 * dst); + + + /** + * @brief Floating-point LDL decomposition of Symmetric Positive Semi-Definite Matrix. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] l points to the instance of the output floating-point triangular matrix structure. + * @param[out] d points to the instance of the output floating-point diagonal matrix structure. + * @param[out] p points to the instance of the output floating-point permutation vector. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE. + * The decomposition is returning a lower triangular matrix. + */ + arm_status arm_mat_ldlt_f32( + const arm_matrix_instance_f32 * src, + arm_matrix_instance_f32 * l, + arm_matrix_instance_f32 * d, + uint16_t * pp); + + /** + * @brief Floating-point LDL decomposition of Symmetric Positive Semi-Definite Matrix. + * @param[in] src points to the instance of the input floating-point matrix structure. + * @param[out] l points to the instance of the output floating-point triangular matrix structure. + * @param[out] d points to the instance of the output floating-point diagonal matrix structure. + * @param[out] p points to the instance of the output floating-point permutation vector. + * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match. + * If the input matrix does not have a decomposition, then the algorithm terminates and returns error status ARM_MATH_DECOMPOSITION_FAILURE. + * The decomposition is returning a lower triangular matrix. + */ + arm_status arm_mat_ldlt_f64( + const arm_matrix_instance_f64 * src, + arm_matrix_instance_f64 * l, + arm_matrix_instance_f64 * d, + uint16_t * pp); + +/** + @brief QR decomposition of a m x n floating point matrix with m >= n. + @param[in] pSrc points to input matrix structure. The source matrix is modified by the function. + @param[in] threshold norm2 threshold. + @param[out] pOutR points to output R matrix structure of dimension m x n + @param[out] pOutQ points to output Q matrix structure of dimension m x m + @param[out] pOutTau points to Householder scaling factors of dimension n + @param[inout] pTmpA points to a temporary vector of dimension m. + @param[inout] pTmpB points to a temporary vector of dimension n. + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible) + */ + +arm_status arm_mat_qr_f32( + const arm_matrix_instance_f32 * pSrc, + const float32_t threshold, + arm_matrix_instance_f32 * pOutR, + arm_matrix_instance_f32 * pOutQ, + float32_t * pOutTau, + float32_t *pTmpA, + float32_t *pTmpB + ); + +/** + @brief QR decomposition of a m x n floating point matrix with m >= n. + @param[in] pSrc points to input matrix structure. The source matrix is modified by the function. + @param[in] threshold norm2 threshold. + @param[out] pOutR points to output R matrix structure of dimension m x n + @param[out] pOutQ points to output Q matrix structure of dimension m x m + @param[out] pOutTau points to Householder scaling factors of dimension n + @param[inout] pTmpA points to a temporary vector of dimension m. + @param[inout] pTmpB points to a temporary vector of dimension n. + @return execution status + - \ref ARM_MATH_SUCCESS : Operation successful + - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed + - \ref ARM_MATH_SINGULAR : Input matrix is found to be singular (non-invertible) + */ + +arm_status arm_mat_qr_f64( + const arm_matrix_instance_f64 * pSrc, + const float64_t threshold, + arm_matrix_instance_f64 * pOutR, + arm_matrix_instance_f64 * pOutQ, + float64_t * pOutTau, + float64_t *pTmpA, + float64_t *pTmpB + ); + +/** + @brief Householder transform of a floating point vector. + @param[in] pSrc points to the input vector. + @param[in] threshold norm2 threshold. + @param[in] blockSize dimension of the vector space. + @param[outQ] pOut points to the output vector. + @return beta return the scaling factor beta + */ + +float32_t arm_householder_f32( + const float32_t * pSrc, + const float32_t threshold, + uint32_t blockSize, + float32_t * pOut + ); + +/** + @brief Householder transform of a double floating point vector. + @param[in] pSrc points to the input vector. + @param[in] threshold norm2 threshold. + @param[in] blockSize dimension of the vector space. + @param[outQ] pOut points to the output vector. + @return beta return the scaling factor beta + */ + +float64_t arm_householder_f64( + const float64_t * pSrc, + const float64_t threshold, + uint32_t blockSize, + float64_t * pOut + ); + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef _MATRIX_FUNCTIONS_H_ */ diff --git a/ARM_math/matrix_utils.h b/ARM_math/matrix_utils.h new file mode 100644 index 0000000..552406f --- /dev/null +++ b/ARM_math/matrix_utils.h @@ -0,0 +1,640 @@ +/****************************************************************************** + * @file matrix_utils.h + * @brief Public header file for CMSIS DSP Library + * @version V1.11.0 + * @date 30 May 2022 + * Target Processor: Cortex-M and Cortex-A cores + ******************************************************************************/ +/* + * Copyright (c) 2010-2022 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef _MATRIX_UTILS_H_ +#define _MATRIX_UTILS_H_ + +#include "arm_math_types.h" +#include "arm_math_memory.h" + +#include "none.h" +#include "utils.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define ELEM(A,ROW,COL) &((A)->pData[(A)->numCols* (ROW) + (COL)]) + +#define SCALE_COL_T(T,CAST,A,ROW,v,i) \ +{ \ + int32_t w; \ + T *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + const int32_t nb = (A)->numRows - ROW;\ + \ + data += i + numCols * (ROW); \ + \ + for(w=0;w < nb; w++) \ + { \ + *data *= CAST v; \ + data += numCols; \ + } \ +} + +#define COPY_COL_T(T,A,ROW,COL,DST) \ +{ \ + uint32_t row; \ + T *pb=DST; \ + T *pa = (A)->pData + ROW * (A)->numCols + COL;\ + for(row = ROW; row < (A)->numRows; row ++) \ + { \ + *pb++ = *pa; \ + pa += (A)->numCols; \ + } \ +} + +#if defined(ARM_FLOAT16_SUPPORTED) +#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) + +#define SWAP_ROWS_F16(A,COL,i,j) \ + { \ + int cnt = ((A)->numCols)-(COL); \ + int32_t w; \ + float16_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + \ + for(w=(COL);w < numCols; w+=8) \ + { \ + f16x8_t tmpa,tmpb; \ + mve_pred16_t p0 = vctp16q(cnt); \ + \ + tmpa=vldrhq_z_f16(&data[i*numCols + w],p0);\ + tmpb=vldrhq_z_f16(&data[j*numCols + w],p0);\ + \ + vstrhq_p(&data[i*numCols + w], tmpb, p0); \ + vstrhq_p(&data[j*numCols + w], tmpa, p0); \ + \ + cnt -= 8; \ + } \ + } + +#define SCALE_ROW_F16(A,COL,v,i) \ +{ \ + int cnt = ((A)->numCols)-(COL); \ + int32_t w; \ + float16_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + \ + for(w=(COL);w < numCols; w+=8) \ + { \ + f16x8_t tmpa; \ + mve_pred16_t p0 = vctp16q(cnt); \ + tmpa = vldrhq_z_f16(&data[i*numCols + w],p0);\ + tmpa = vmulq_n_f16(tmpa,(_Float16)v); \ + vstrhq_p(&data[i*numCols + w], tmpa, p0); \ + cnt -= 8; \ + } \ + \ +} + +#define MAC_ROW_F16(COL,A,i,v,B,j) \ +{ \ + int cnt = ((A)->numCols)-(COL); \ + int32_t w; \ + float16_t *dataA = (A)->pData; \ + float16_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols; \ + \ + for(w=(COL);w < numCols; w+=8) \ + { \ + f16x8_t tmpa,tmpb; \ + mve_pred16_t p0 = vctp16q(cnt); \ + tmpa = vldrhq_z_f16(&dataA[i*numCols + w],p0);\ + tmpb = vldrhq_z_f16(&dataB[j*numCols + w],p0);\ + tmpa = vfmaq_n_f16(tmpa,tmpb,v); \ + vstrhq_p(&dataA[i*numCols + w], tmpa, p0); \ + cnt -= 8; \ + } \ + \ +} + +#define MAS_ROW_F16(COL,A,i,v,B,j) \ +{ \ + int cnt = ((A)->numCols)-(COL); \ + int32_t w; \ + float16_t *dataA = (A)->pData; \ + float16_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols; \ + f16x8_t vec=vdupq_n_f16(v); \ + \ + for(w=(COL);w < numCols; w+=8) \ + { \ + f16x8_t tmpa,tmpb; \ + mve_pred16_t p0 = vctp16q(cnt); \ + tmpa = vldrhq_z_f16(&dataA[i*numCols + w],p0);\ + tmpb = vldrhq_z_f16(&dataB[j*numCols + w],p0);\ + tmpa = vfmsq_f16(tmpa,tmpb,vec); \ + vstrhq_p(&dataA[i*numCols + w], tmpa, p0); \ + cnt -= 8; \ + } \ + \ +} + +#else + + +#define SWAP_ROWS_F16(A,COL,i,j) \ +{ \ + int32_t w; \ + float16_t *dataI = (A)->pData; \ + float16_t *dataJ = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + dataI += i*numCols + (COL); \ + dataJ += j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + float16_t tmp; \ + tmp = *dataI; \ + *dataI++ = *dataJ; \ + *dataJ++ = tmp; \ + } \ +} + +#define SCALE_ROW_F16(A,COL,v,i) \ +{ \ + int32_t w; \ + float16_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + data += i*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *data++ *= (_Float16)v; \ + } \ +} + + +#define MAC_ROW_F16(COL,A,i,v,B,j) \ +{ \ + int32_t w; \ + float16_t *dataA = (A)->pData; \ + float16_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols; \ + const int32_t nb = numCols-(COL); \ + \ + dataA += i*numCols + (COL); \ + dataB += j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *dataA++ += (_Float16)v * (_Float16)*dataB++;\ + } \ +} + +#define MAS_ROW_F16(COL,A,i,v,B,j) \ +{ \ + int32_t w; \ + float16_t *dataA = (A)->pData; \ + float16_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols; \ + const int32_t nb = numCols-(COL); \ + \ + dataA += i*numCols + (COL); \ + dataB += j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *dataA++ -= (_Float16)v * (_Float16)*dataB++;\ + } \ +} + +#endif /*defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)*/ + +/* Functions with only a scalar version */ +#define COPY_COL_F16(A,ROW,COL,DST) \ + COPY_COL_T(float16_t,A,ROW,COL,DST) + +#define SCALE_COL_F16(A,ROW,v,i) \ + SCALE_COL_T(float16_t,(_Float16),A,ROW,v,i) + +#endif /* defined(ARM_FLOAT16_SUPPORTED)*/ + +#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) + +#define SWAP_ROWS_F32(A,COL,i,j) \ + { \ + int cnt = ((A)->numCols)-(COL); \ + float32_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + int32_t w; \ + \ + for(w=(COL);w < numCols; w+=4) \ + { \ + f32x4_t tmpa,tmpb; \ + mve_pred16_t p0 = vctp32q(cnt); \ + \ + tmpa=vldrwq_z_f32(&data[i*numCols + w],p0);\ + tmpb=vldrwq_z_f32(&data[j*numCols + w],p0);\ + \ + vstrwq_p(&data[i*numCols + w], tmpb, p0); \ + vstrwq_p(&data[j*numCols + w], tmpa, p0); \ + \ + cnt -= 4; \ + } \ + } + +#define MAC_ROW_F32(COL,A,i,v,B,j) \ +{ \ + int cnt = ((A)->numCols)-(COL); \ + float32_t *dataA = (A)->pData; \ + float32_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols; \ + int32_t w; \ + \ + for(w=(COL);w < numCols; w+=4) \ + { \ + f32x4_t tmpa,tmpb; \ + mve_pred16_t p0 = vctp32q(cnt); \ + tmpa = vldrwq_z_f32(&dataA[i*numCols + w],p0);\ + tmpb = vldrwq_z_f32(&dataB[j*numCols + w],p0);\ + tmpa = vfmaq_n_f32(tmpa,tmpb,v); \ + vstrwq_p(&dataA[i*numCols + w], tmpa, p0); \ + cnt -= 4; \ + } \ + \ +} + +#define MAS_ROW_F32(COL,A,i,v,B,j) \ +{ \ + int cnt = ((A)->numCols)-(COL); \ + float32_t *dataA = (A)->pData; \ + float32_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols; \ + int32_t w; \ + f32x4_t vec=vdupq_n_f32(v); \ + \ + for(w=(COL);w < numCols; w+=4) \ + { \ + f32x4_t tmpa,tmpb; \ + mve_pred16_t p0 = vctp32q(cnt); \ + tmpa = vldrwq_z_f32(&dataA[i*numCols + w],p0);\ + tmpb = vldrwq_z_f32(&dataB[j*numCols + w],p0);\ + tmpa = vfmsq_f32(tmpa,tmpb,vec); \ + vstrwq_p(&dataA[i*numCols + w], tmpa, p0); \ + cnt -= 4; \ + } \ + \ +} + +#define SCALE_ROW_F32(A,COL,v,i) \ +{ \ + int cnt = ((A)->numCols)-(COL); \ + float32_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + int32_t w; \ + \ + for(w=(COL);w < numCols; w+=4) \ + { \ + f32x4_t tmpa; \ + mve_pred16_t p0 = vctp32q(cnt); \ + tmpa = vldrwq_z_f32(&data[i*numCols + w],p0);\ + tmpa = vmulq_n_f32(tmpa,v); \ + vstrwq_p(&data[i*numCols + w], tmpa, p0); \ + cnt -= 4; \ + } \ + \ +} + +#elif defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) + +#define SWAP_ROWS_F32(A,COL,i,j) \ +{ \ + int32_t w; \ + float32_t *dataI = (A)->pData; \ + float32_t *dataJ = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols - COL; \ + \ + dataI += i*numCols + (COL); \ + dataJ += j*numCols + (COL); \ + \ + float32_t tmp; \ + \ + for(w=0;w < nb; w++) \ + { \ + tmp = *dataI; \ + *dataI++ = *dataJ; \ + *dataJ++ = tmp; \ + } \ +} + +#define MAC_ROW_F32(COL,A,i,v,B,j) \ +{ \ + float32_t *dataA = (A)->pData; \ + float32_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols - (COL); \ + int32_t nbElems; \ + f32x4_t vec = vdupq_n_f32(v); \ + \ + nbElems = nb >> 2; \ + \ + dataA += i*numCols + (COL); \ + dataB += j*numCols + (COL); \ + \ + while(nbElems>0) \ + { \ + f32x4_t tmpa,tmpb; \ + tmpa = vld1q_f32(dataA,p0); \ + tmpb = vld1q_f32(dataB,p0); \ + tmpa = vmlaq_f32(tmpa,tmpb,vec);\ + vst1q_f32(dataA, tmpa, p0); \ + nbElems--; \ + dataA += 4; \ + dataB += 4; \ + } \ + \ + nbElems = nb & 3; \ + while(nbElems > 0) \ + { \ + *dataA++ += v* *dataB++; \ + nbElems--; \ + } \ +} + +#define MAS_ROW_F32(COL,A,i,v,B,j) \ +{ \ + float32_t *dataA = (A)->pData; \ + float32_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols - (COL); \ + int32_t nbElems; \ + f32x4_t vec = vdupq_n_f32(v); \ + \ + nbElems = nb >> 2; \ + \ + dataA += i*numCols + (COL); \ + dataB += j*numCols + (COL); \ + \ + while(nbElems>0) \ + { \ + f32x4_t tmpa,tmpb; \ + tmpa = vld1q_f32(dataA); \ + tmpb = vld1q_f32(dataB); \ + tmpa = vmlsq_f32(tmpa,tmpb,vec);\ + vst1q_f32(dataA, tmpa); \ + nbElems--; \ + dataA += 4; \ + dataB += 4; \ + } \ + \ + nbElems = nb & 3; \ + while(nbElems > 0) \ + { \ + *dataA++ -= v* *dataB++; \ + nbElems--; \ + } \ +} + +#define SCALE_ROW_F32(A,COL,v,i) \ +{ \ + float32_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + const int32_t nb = numCols - (COL); \ + int32_t nbElems; \ + f32x4_t vec = vdupq_n_f32(v); \ + \ + nbElems = nb >> 2; \ + \ + data += i*numCols + (COL); \ + while(nbElems>0) \ + { \ + f32x4_t tmpa; \ + tmpa = vld1q_f32(data); \ + tmpa = vmulq_f32(tmpa,vec); \ + vst1q_f32(data, tmpa); \ + data += 4; \ + nbElems --; \ + } \ + \ + nbElems = nb & 3; \ + while(nbElems > 0) \ + { \ + *data++ *= v; \ + nbElems--; \ + } \ + \ +} + +#else + +#define SWAP_ROWS_F32(A,COL,i,j) \ +{ \ + int32_t w; \ + float32_t tmp; \ + float32_t *dataI = (A)->pData; \ + float32_t *dataJ = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols - COL; \ + \ + dataI += i*numCols + (COL); \ + dataJ += j*numCols + (COL); \ + \ + \ + for(w=0;w < nb; w++) \ + { \ + tmp = *dataI; \ + *dataI++ = *dataJ; \ + *dataJ++ = tmp; \ + } \ +} + +#define SCALE_ROW_F32(A,COL,v,i) \ +{ \ + int32_t w; \ + float32_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols - COL; \ + \ + data += i*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *data++ *= v; \ + } \ +} + + +#define MAC_ROW_F32(COL,A,i,v,B,j) \ +{ \ + int32_t w; \ + float32_t *dataA = (A)->pData; \ + float32_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + dataA = dataA + i*numCols + (COL); \ + dataB = dataB + j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *dataA++ += v* *dataB++; \ + } \ +} + +#define MAS_ROW_F32(COL,A,i,v,B,j) \ +{ \ + int32_t w; \ + float32_t *dataA = (A)->pData; \ + float32_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + dataA = dataA + i*numCols + (COL); \ + dataB = dataB + j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *dataA++ -= v* *dataB++; \ + } \ +} + +#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ + + +/* Functions with only a scalar version */ + +#define COPY_COL_F32(A,ROW,COL,DST) \ + COPY_COL_T(float32_t,A,ROW,COL,DST) + +#define COPY_COL_F64(A,ROW,COL,DST) \ + COPY_COL_T(float64_t,A,ROW,COL,DST) + +#define SWAP_COLS_F32(A,COL,i,j) \ +{ \ + int32_t w; \ + float32_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + for(w=(COL);w < numCols; w++) \ + { \ + float32_t tmp; \ + tmp = data[w*numCols + i]; \ + data[w*numCols + i] = data[w*numCols + j];\ + data[w*numCols + j] = tmp; \ + } \ +} + +#define SCALE_COL_F32(A,ROW,v,i) \ + SCALE_COL_T(float32_t,,A,ROW,v,i) + +#define SWAP_ROWS_F64(A,COL,i,j) \ +{ \ + int32_t w; \ + float64_t *dataI = (A)->pData; \ + float64_t *dataJ = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + dataI += i*numCols + (COL); \ + dataJ += j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + float64_t tmp; \ + tmp = *dataI; \ + *dataI++ = *dataJ; \ + *dataJ++ = tmp; \ + } \ +} + +#define SWAP_COLS_F64(A,COL,i,j) \ +{ \ + int32_t w; \ + float64_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols; \ + for(w=(COL);w < numCols; w++) \ + { \ + float64_t tmp; \ + tmp = data[w*numCols + i]; \ + data[w*numCols + i] = data[w*numCols + j];\ + data[w*numCols + j] = tmp; \ + } \ +} + +#define SCALE_ROW_F64(A,COL,v,i) \ +{ \ + int32_t w; \ + float64_t *data = (A)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + data += i*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *data++ *= v; \ + } \ +} + +#define SCALE_COL_F64(A,ROW,v,i) \ + SCALE_COL_T(float64_t,,A,ROW,v,i) + +#define MAC_ROW_F64(COL,A,i,v,B,j) \ +{ \ + int32_t w; \ + float64_t *dataA = (A)->pData; \ + float64_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + dataA += i*numCols + (COL); \ + dataB += j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *dataA++ += v* *dataB++; \ + } \ +} + +#define MAS_ROW_F64(COL,A,i,v,B,j) \ +{ \ + int32_t w; \ + float64_t *dataA = (A)->pData; \ + float64_t *dataB = (B)->pData; \ + const int32_t numCols = (A)->numCols;\ + const int32_t nb = numCols-(COL); \ + \ + dataA += i*numCols + (COL); \ + dataB += j*numCols + (COL); \ + \ + for(w=0;w < nb; w++) \ + { \ + *dataA++ -= v* *dataB++; \ + } \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef _MATRIX_UTILS_H_ */ diff --git a/ARM_math/none.h b/ARM_math/none.h new file mode 100644 index 0000000..130386e --- /dev/null +++ b/ARM_math/none.h @@ -0,0 +1,576 @@ +/****************************************************************************** + * @file none.h + * @brief Intrinsincs when no DSP extension available + * @version V1.9.0 + * @date 20. July 2020 + ******************************************************************************/ +/* + * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + +Definitions in this file are allowing to reuse some versions of the +CMSIS-DSP to build on a core (M0 for instance) or a host where +DSP extension are not available. + +Ideally a pure C version should have been used instead. +But those are not always available or use a restricted set +of intrinsics. + +*/ + +#ifndef _NONE_H_ +#define _NONE_H_ + +#include "arm_math_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + + + +/* + +Normally those kind of definitions are in a compiler file +in Core or Core_A. + +But for MSVC compiler it is a bit special. The goal is very specific +to CMSIS-DSP and only to allow the use of this library from other +systems like Python or Matlab. + +MSVC is not going to be used to cross-compile to ARM. So, having a MSVC +compiler file in Core or Core_A would not make sense. + +*/ +#if defined ( _MSC_VER ) || defined(__GNUC_PYTHON__) || defined(__APPLE_CC__) + __STATIC_FORCEINLINE uint8_t __CLZ(uint32_t data) + { + if (data == 0U) { return 32U; } + + uint32_t count = 0U; + uint32_t mask = 0x80000000U; + + while ((data & mask) == 0U) + { + count += 1U; + mask = mask >> 1U; + } + return count; + } + + __STATIC_FORCEINLINE int32_t __SSAT(int32_t val, uint32_t sat) + { + if ((sat >= 1U) && (sat <= 32U)) + { + const int32_t max = (int32_t)((1U << (sat - 1U)) - 1U); + const int32_t min = -1 - max ; + if (val > max) + { + return max; + } + else if (val < min) + { + return min; + } + } + return val; + } + + __STATIC_FORCEINLINE uint32_t __USAT(int32_t val, uint32_t sat) + { + if (sat <= 31U) + { + const uint32_t max = ((1U << sat) - 1U); + if (val > (int32_t)max) + { + return max; + } + else if (val < 0) + { + return 0U; + } + } + return (uint32_t)val; + } + + /** + \brief Rotate Right in unsigned value (32 bit) + \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits. + \param [in] op1 Value to rotate + \param [in] op2 Number of Bits to rotate + \return Rotated value + */ +__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) +{ + op2 %= 32U; + if (op2 == 0U) + { + return op1; + } + return (op1 >> op2) | (op1 << (32U - op2)); +} + + +#endif + +/** + * @brief Clips Q63 to Q31 values. + */ + __STATIC_FORCEINLINE q31_t clip_q63_to_q31( + q63_t x) + { + return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? + ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x; + } + + /** + * @brief Clips Q63 to Q15 values. + */ + __STATIC_FORCEINLINE q15_t clip_q63_to_q15( + q63_t x) + { + return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ? + ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15); + } + + /** + * @brief Clips Q31 to Q7 values. + */ + __STATIC_FORCEINLINE q7_t clip_q31_to_q7( + q31_t x) + { + return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ? + ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x; + } + + /** + * @brief Clips Q31 to Q15 values. + */ + __STATIC_FORCEINLINE q15_t clip_q31_to_q15( + q31_t x) + { + return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ? + ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x; + } + + /** + * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format. + */ + __STATIC_FORCEINLINE q63_t mult32x64( + q63_t x, + q31_t y) + { + return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) + + (((q63_t) (x >> 32) * y) ) ); + } + +/* SMMLAR */ +#define multAcc_32x32_keep32_R(a, x, y) \ + a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32) + +/* SMMLSR */ +#define multSub_32x32_keep32_R(a, x, y) \ + a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32) + +/* SMMULR */ +#define mult_32x32_keep32_R(a, x, y) \ + a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32) + +/* SMMLA */ +#define multAcc_32x32_keep32(a, x, y) \ + a += (q31_t) (((q63_t) x * y) >> 32) + +/* SMMLS */ +#define multSub_32x32_keep32(a, x, y) \ + a -= (q31_t) (((q63_t) x * y) >> 32) + +/* SMMUL */ +#define mult_32x32_keep32(a, x, y) \ + a = (q31_t) (((q63_t) x * y ) >> 32) + +#ifndef ARM_MATH_DSP + /** + * @brief definition to pack two 16 bit values. + */ + #define __PKHBT(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0x0000FFFF) | \ + (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000) ) + #define __PKHTB(ARG1, ARG2, ARG3) ( (((int32_t)(ARG1) << 0) & (int32_t)0xFFFF0000) | \ + (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF) ) +#endif + + /** + * @brief definition to pack four 8 bit values. + */ +#ifndef ARM_MATH_BIG_ENDIAN + #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) << 0) & (int32_t)0x000000FF) | \ + (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | \ + (((int32_t)(v3) << 24) & (int32_t)0xFF000000) ) +#else + #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) << 0) & (int32_t)0x000000FF) | \ + (((int32_t)(v2) << 8) & (int32_t)0x0000FF00) | \ + (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) | \ + (((int32_t)(v0) << 24) & (int32_t)0xFF000000) ) +#endif + + + + +/* + * @brief C custom defined intrinsic functions + */ +#if !defined (ARM_MATH_DSP) + + + /* + * @brief C custom defined QADD8 + */ + __STATIC_FORCEINLINE uint32_t __QADD8( + uint32_t x, + uint32_t y) + { + q31_t r, s, t, u; + + r = __SSAT(((((q31_t)x << 24) >> 24) + (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; + s = __SSAT(((((q31_t)x << 16) >> 24) + (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; + t = __SSAT(((((q31_t)x << 8) >> 24) + (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; + u = __SSAT(((((q31_t)x ) >> 24) + (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; + + return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); + } + + + /* + * @brief C custom defined QSUB8 + */ + __STATIC_FORCEINLINE uint32_t __QSUB8( + uint32_t x, + uint32_t y) + { + q31_t r, s, t, u; + + r = __SSAT(((((q31_t)x << 24) >> 24) - (((q31_t)y << 24) >> 24)), 8) & (int32_t)0x000000FF; + s = __SSAT(((((q31_t)x << 16) >> 24) - (((q31_t)y << 16) >> 24)), 8) & (int32_t)0x000000FF; + t = __SSAT(((((q31_t)x << 8) >> 24) - (((q31_t)y << 8) >> 24)), 8) & (int32_t)0x000000FF; + u = __SSAT(((((q31_t)x ) >> 24) - (((q31_t)y ) >> 24)), 8) & (int32_t)0x000000FF; + + return ((uint32_t)((u << 24) | (t << 16) | (s << 8) | (r ))); + } + + + /* + * @brief C custom defined QADD16 + */ + __STATIC_FORCEINLINE uint32_t __QADD16( + uint32_t x, + uint32_t y) + { +/* q31_t r, s; without initialisation 'arm_offset_q15 test' fails but 'intrinsic' tests pass! for armCC */ + q31_t r = 0, s = 0; + + r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHADD16 + */ + __STATIC_FORCEINLINE uint32_t __SHADD16( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined QSUB16 + */ + __STATIC_FORCEINLINE uint32_t __QSUB16( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHSUB16 + */ + __STATIC_FORCEINLINE uint32_t __SHSUB16( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined QASX + */ + __STATIC_FORCEINLINE uint32_t __QASX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = __SSAT(((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHASX + */ + __STATIC_FORCEINLINE uint32_t __SHASX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) - (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) + (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined QSAX + */ + __STATIC_FORCEINLINE uint32_t __QSAX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = __SSAT(((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)), 16) & (int32_t)0x0000FFFF; + s = __SSAT(((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)), 16) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SHSAX + */ + __STATIC_FORCEINLINE uint32_t __SHSAX( + uint32_t x, + uint32_t y) + { + q31_t r, s; + + r = (((((q31_t)x << 16) >> 16) + (((q31_t)y ) >> 16)) >> 1) & (int32_t)0x0000FFFF; + s = (((((q31_t)x ) >> 16) - (((q31_t)y << 16) >> 16)) >> 1) & (int32_t)0x0000FFFF; + + return ((uint32_t)((s << 16) | (r ))); + } + + + /* + * @brief C custom defined SMUSDX + */ + __STATIC_FORCEINLINE uint32_t __SMUSDX( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); + } + + /* + * @brief C custom defined SMUADX + */ + __STATIC_FORCEINLINE uint32_t __SMUADX( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) )); + } + + + /* + * @brief C custom defined QADD + */ + __STATIC_FORCEINLINE int32_t __QADD( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_q63_to_q31((q63_t)x + (q31_t)y))); + } + + + /* + * @brief C custom defined QSUB + */ + __STATIC_FORCEINLINE int32_t __QSUB( + int32_t x, + int32_t y) + { + return ((int32_t)(clip_q63_to_q31((q63_t)x - (q31_t)y))); + } + + + /* + * @brief C custom defined SMLAD + */ + __STATIC_FORCEINLINE uint32_t __SMLAD( + uint32_t x, + uint32_t y, + uint32_t sum) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + + ( ((q31_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLADX + */ + __STATIC_FORCEINLINE uint32_t __SMLADX( + uint32_t x, + uint32_t y, + uint32_t sum) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + + ( ((q31_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLSDX + */ + __STATIC_FORCEINLINE uint32_t __SMLSDX( + uint32_t x, + uint32_t y, + uint32_t sum) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) - + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + + ( ((q31_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLALD + */ + __STATIC_FORCEINLINE uint64_t __SMLALD( + uint32_t x, + uint32_t y, + uint64_t sum) + { +/* return (sum + ((q15_t) (x >> 16) * (q15_t) (y >> 16)) + ((q15_t) x * (q15_t) y)); */ + return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) + + ( ((q63_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMLALDX + */ + __STATIC_FORCEINLINE uint64_t __SMLALDX( + uint32_t x, + uint32_t y, + uint64_t sum) + { +/* return (sum + ((q15_t) (x >> 16) * (q15_t) y)) + ((q15_t) x * (q15_t) (y >> 16)); */ + return ((uint64_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y ) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y << 16) >> 16)) + + ( ((q63_t)sum ) ) )); + } + + + /* + * @brief C custom defined SMUAD + */ + __STATIC_FORCEINLINE uint32_t __SMUAD( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) + + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); + } + + + /* + * @brief C custom defined SMUSD + */ + __STATIC_FORCEINLINE uint32_t __SMUSD( + uint32_t x, + uint32_t y) + { + return ((uint32_t)(((((q31_t)x << 16) >> 16) * (((q31_t)y << 16) >> 16)) - + ((((q31_t)x ) >> 16) * (((q31_t)y ) >> 16)) )); + } + + + /* + * @brief C custom defined SXTB16 + */ + __STATIC_FORCEINLINE uint32_t __SXTB16( + uint32_t x) + { + return ((uint32_t)(((((q31_t)x << 24) >> 24) & (q31_t)0x0000FFFF) | + ((((q31_t)x << 8) >> 8) & (q31_t)0xFFFF0000) )); + } + + /* + * @brief C custom defined SMMLA + */ + __STATIC_FORCEINLINE int32_t __SMMLA( + int32_t x, + int32_t y, + int32_t sum) + { + return (sum + (int32_t) (((int64_t) x * y) >> 32)); + } + +#endif /* !defined (ARM_MATH_DSP) */ + + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef _TRANSFORM_FUNCTIONS_H_ */ diff --git a/ARM_math/utils.h b/ARM_math/utils.h new file mode 100644 index 0000000..7b1da5b --- /dev/null +++ b/ARM_math/utils.h @@ -0,0 +1,250 @@ +/****************************************************************************** + * @file arm_math_utils.h + * @brief Public header file for CMSIS DSP Library + * @version V1.9.0 + * @date 20. July 2020 + ******************************************************************************/ +/* + * Copyright (c) 2010-2020 Arm Limited or its affiliates. All rights reserved. + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the License); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _ARM_MATH_UTILS_H_ + +#define _ARM_MATH_UTILS_H_ + +#include "arm_math_types.h" +#include + +#ifdef __cplusplus +extern "C" +{ +#endif + + /** + * @brief Macros required for reciprocal calculation in Normalized LMS + */ + +#define INDEX_MASK 0x0000003F + + +#define SQ(x) ((x) * (x)) + +#define ROUND_UP(N, S) ((((N) + (S) - 1) / (S)) * (S)) + + + /** + * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type. + It should not be used with negative values. + */ + __STATIC_FORCEINLINE uint32_t arm_recip_q31( + q31_t in, + q31_t * dst, + const q31_t * pRecipTable) + { + q31_t out; + uint32_t tempVal; + uint32_t index, i; + uint32_t signBits; + + if (in > 0) + { + signBits = ((uint32_t) (__CLZ( (uint32_t)in) - 1)); + } + else + { + signBits = ((uint32_t) (__CLZ((uint32_t)(-in)) - 1)); + } + + /* Convert input sample to 1.31 format */ + in = (in << signBits); + + /* calculation of index for initial approximated Val */ + index = (uint32_t)(in >> 24); + index = (index & INDEX_MASK); + + /* 1.31 with exp 1 */ + out = pRecipTable[index]; + + /* calculation of reciprocal value */ + /* running approximation for two iterations */ + for (i = 0U; i < 2U; i++) + { + tempVal = (uint32_t) (((q63_t) in * out) >> 31); + tempVal = 0x7FFFFFFFu - tempVal; + /* 1.31 with exp 1 */ + /* out = (q31_t) (((q63_t) out * tempVal) >> 30); */ + out = clip_q63_to_q31(((q63_t) out * tempVal) >> 30); + } + + /* write output */ + *dst = out; + + /* return num of signbits of out = 1/in value */ + return (signBits + 1U); + } + + + /** + * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type. + It should not be used with negative values. + */ + __STATIC_FORCEINLINE uint32_t arm_recip_q15( + q15_t in, + q15_t * dst, + const q15_t * pRecipTable) + { + q15_t out = 0; + int32_t tempVal = 0; + uint32_t index = 0, i = 0; + uint32_t signBits = 0; + + if (in > 0) + { + signBits = ((uint32_t)(__CLZ( (uint32_t)in) - 17)); + } + else + { + signBits = ((uint32_t)(__CLZ((uint32_t)(-in)) - 17)); + } + + /* Convert input sample to 1.15 format */ + in = (q15_t)(in << signBits); + + /* calculation of index for initial approximated Val */ + index = (uint32_t)(in >> 8); + index = (index & INDEX_MASK); + + /* 1.15 with exp 1 */ + out = pRecipTable[index]; + + /* calculation of reciprocal value */ + /* running approximation for two iterations */ + for (i = 0U; i < 2U; i++) + { + tempVal = (((q31_t) in * out) >> 15); + tempVal = 0x7FFF - tempVal; + /* 1.15 with exp 1 */ + out = (q15_t) (((q31_t) out * tempVal) >> 14); + /* out = clip_q31_to_q15(((q31_t) out * tempVal) >> 14); */ + } + + /* write output */ + *dst = out; + + /* return num of signbits of out = 1/in value */ + return (signBits + 1); + } + + +/** + * @brief 64-bit to 32-bit unsigned normalization + * @param[in] in is input unsigned long long value + * @param[out] normalized is the 32-bit normalized value + * @param[out] norm is norm scale + */ +__STATIC_INLINE void arm_norm_64_to_32u(uint64_t in, int32_t * normalized, int32_t *norm) +{ + int32_t n1; + int32_t hi = (int32_t) (in >> 32); + int32_t lo = (int32_t) ((in << 32) >> 32); + + n1 = __CLZ((uint32_t)hi) - 32; + if (!n1) + { + /* + * input fits in 32-bit + */ + n1 = __CLZ((uint32_t)lo); + if (!n1) + { + /* + * MSB set, need to scale down by 1 + */ + *norm = -1; + *normalized = (((uint32_t) lo) >> 1); + } else + { + if (n1 == 32) + { + /* + * input is zero + */ + *norm = 0; + *normalized = 0; + } else + { + /* + * 32-bit normalization + */ + *norm = n1 - 1; + *normalized = lo << *norm; + } + } + } else + { + /* + * input fits in 64-bit + */ + n1 = 1 - n1; + *norm = -n1; + /* + * 64 bit normalization + */ + *normalized = (int32_t)(((uint32_t)lo) >> n1) | (hi << (32 - n1)); + } +} + +__STATIC_INLINE int32_t arm_div_int64_to_int32(int64_t num, int32_t den) +{ + int32_t result; + uint64_t absNum; + int32_t normalized; + int32_t norm; + + /* + * if sum fits in 32bits + * avoid costly 64-bit division + */ + if (num == (int64_t)LONG_MIN) + { + absNum = LONG_MAX; + } + else + { + absNum = (uint64_t) (num > 0 ? num : -num); + } + arm_norm_64_to_32u(absNum, &normalized, &norm); + if (norm > 0) + /* + * 32-bit division + */ + result = (int32_t) num / den; + else + /* + * 64-bit division + */ + result = (int32_t) (num / den); + + return result; +} + + +#ifdef __cplusplus +} +#endif + +#endif /*ifndef _ARM_MATH_UTILS_H_ */ diff --git a/Filter_Design/Kalman_XVA_acc_offset.m b/Filter_Design/Kalman_XVA_acc_offset.m index 99ce527..82306ea 100644 --- a/Filter_Design/Kalman_XVA_acc_offset.m +++ b/Filter_Design/Kalman_XVA_acc_offset.m @@ -91,33 +91,33 @@ end -%subplot( 4, 1, 1); -figure(1); +subplot( 3, 1, 1); +%figure(1); plot(acc_est, 'r'); hold; plot(acc,'g'); -plot(acc_m, '+'); +plot(acc_m, '.'); plot(acc_est_x); plot(acc_offset); legend('Estimated Acc.','True Acc.','Acc.Measurement','Acc. Offset est.','Acc. Offset'); title('Kalmanfilter Movement Estimation'); ylabel('Accel. / m/s/s'); grid; -%subplot( 4, 1, 2); -figure(2); +subplot( 3, 1, 2); +%figure(2); plot(vel_est,'r'); hold; plot(vel,'g'); -plot(vel_m, '+'); +plot(vel_m, '.'); legend('Estimated Velocity','True Velocity','Velocity Measurement'); ylabel('Velocity / m/s'); grid; -%subplot( 4, 1, 3) -figure(3); +subplot( 3, 1, 3) +%figure(3); plot(pos_est, 'r'); hold; plot(pos,'g'); -plot(pos_m, '+'); +plot(pos_m, '.'); ylabel('Position / m'); legend('Estimated Position','True Position','Position Measurement'); xlabel('Time / 10ms'); diff --git a/Filter_Design/induction_sensor_matrix_plus_soft_iron_compensation.m b/Filter_Design/induction_sensor_matrix_plus_soft_iron_compensation.m new file mode 100644 index 0000000..45ca54d --- /dev/null +++ b/Filter_Design/induction_sensor_matrix_plus_soft_iron_compensation.m @@ -0,0 +1,144 @@ +if( 1) + start = 1 + stop=length(x) +else + start = 105 * 6000; % choose ideal flight section + stop = 300 * 6000; +end + +new_sample_rate = 10; % Hz +old_sample_rate = 100; + +if 1 + a_start = round( start * new_sample_rate / old_sample_rate); + a_stop = round( stop * new_sample_rate / old_sample_rate); +else + a_start=1000; + a_stop=length ( mag_err_predicted_x) -1000; +end + +off_x = 0; % artificial offset injection +off_y = 0; +off_z = 0; + +mag_x = resample( x(114,start:stop),new_sample_rate, old_sample_rate )- off_x; +mag_y = resample( x(115,start:stop),new_sample_rate, old_sample_rate )- off_y; +mag_z = resample( x(116,start:stop),new_sample_rate, old_sample_rate )- off_z; + +err_x = resample( x(117,start:stop), new_sample_rate, old_sample_rate)- off_x; +err_y = resample( x(118,start:stop), new_sample_rate, old_sample_rate)- off_y; +err_z = resample( x(119,start:stop), new_sample_rate, old_sample_rate)- off_z; + +ideal_mag_x = mag_x - err_x; +ideal_mag_y = mag_y - err_y; +ideal_mag_z = mag_z - err_z; + +q1 = resample( x(53,start:stop),new_sample_rate, old_sample_rate ); +q2 = resample( x(54,start:stop),new_sample_rate, old_sample_rate ); +q3 = resample( x(55,start:stop),new_sample_rate, old_sample_rate ); +q4 = resample( x(56,start:stop),new_sample_rate, old_sample_rate ); + +% calculate sensor mapping matrix + +input = [ mag_x' mag_y' mag_z']; + + modelterms = ... +[ + 0 0 0; ... + 1 0 0; ... + 0 1 0; ... + 0 0 1]; + +sensor_matrix_x = polyfitn( input, ideal_mag_x, modelterms) +induction_calibrated_x = polyvaln( sensor_matrix_x, input)'; + +sensor_matrix_y = polyfitn( input, ideal_mag_y, modelterms) +induction_calibrated_y = polyvaln( sensor_matrix_y, input)'; + +sensor_matrix_z = polyfitn( input, ideal_mag_z, modelterms) +induction_calibrated_z = polyvaln( sensor_matrix_z, input)'; + + +% calculate soft iron correction terms + +input = [ q1' q2' q3' q4']; + + modelterms = ... +[ + 0 0 0 0; ... + 2 0 0 0; ... + 1 1 0 0; ... + 1 0 1 0; ... + 1 0 0 1; ... + 0 2 0 0; ... + 0 1 1 0; ... + 0 1 0 1; ... + 0 0 2 0; ... + 0 0 1 1; ... + 0 0 0 2]; + +compensation_poly_x = polyfitn( input, induction_calibrated_x - ideal_mag_x, modelterms) +compensation_poly_y = polyfitn( input, induction_calibrated_y - ideal_mag_y, modelterms) +compensation_poly_z = polyfitn( input, induction_calibrated_z - ideal_mag_z, modelterms) + +soft_iron_err_predicted_x = polyvaln( compensation_poly_x, input)'; +soft_iron_err_predicted_y = polyvaln( compensation_poly_y, input)'; +soft_iron_err_predicted_z = polyvaln( compensation_poly_z, input)'; + +induction_corrected_x = induction_calibrated_x - soft_iron_err_predicted_x; +induction_corrected_y = induction_calibrated_y - soft_iron_err_predicted_y; +induction_corrected_z = induction_calibrated_z - soft_iron_err_predicted_z; + +residual_error_x = induction_corrected_x - ideal_mag_x; +residual_error_y = induction_corrected_y - ideal_mag_y; +residual_error_z = induction_corrected_z - ideal_mag_z; + +std_error_x = std( err_x) +std_error_x_corrected = std( residual_error_x) + +std_error_y = std( err_y) +std_error_y_corrected = std( residual_error_y) + +std_error_z = std( err_z) +std_error_z_corrected = std( residual_error_z) + +orientation_angle_error = sqrt( std_error_x * std_error_x + std_error_y * std_error_y +std_error_z * std_error_z)*180/pi + +time = linspace( start / 6000, stop / 6000, length( err_x)); + +a=subplot( 3,1,1); +plot( time, err_x,'Color','red','LineWidth',2.0); +grid +hold +plot( time, induction_calibrated_x - ideal_mag_x,'Color','blue','LineWidth',2.0); +plot( time, induction_corrected_x - ideal_mag_x,'Color','black','LineWidth',2.0); +legend('Observed','Calibrated','Soft-Iron-Corrected') +title('Magnetic Field Error Front') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +%axis([0 max(time) -0.1 0.1]); + +b=subplot( 3,1,2); +plot( time, err_y,'Color','red','LineWidth',2.0); +grid +hold +plot( time, induction_calibrated_y - ideal_mag_y,'Color','blue','LineWidth',2.0); +plot( time, induction_corrected_y - ideal_mag_y,'Color','black','LineWidth',2.0); +legend('Observed','Calibrated','Soft-Iron-Corrected') +title('Magnetic Field Error Right') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +%axis([0 max(time) -0.1 0.1]); + +c=subplot( 3,1,3); +plot( time, err_z,'Color','red','LineWidth',2.0); +grid +hold +plot( time, induction_calibrated_z - ideal_mag_z,'Color','blue','LineWidth',2.0); +plot( time, induction_corrected_z - ideal_mag_z,'Color','black','LineWidth',2.0); +legend('Observed','Calibrated','Soft-Iron-Corrected') +title('Magnetic Field Error Down') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); + +linkaxes([a b c],'x'); diff --git a/Filter_Design/magnetic_calibration_SVD.m b/Filter_Design/magnetic_calibration_SVD.m new file mode 100644 index 0000000..5d66f08 --- /dev/null +++ b/Filter_Design/magnetic_calibration_SVD.m @@ -0,0 +1,125 @@ +start =5*6000; +stop = length(x); + +format long; + +new_sample_rate = 10; % Hz +old_sample_rate = 100; + +mag_x = resample( x(114,start:stop),new_sample_rate, old_sample_rate ); +mag_y = resample( x(115,start:stop),new_sample_rate, old_sample_rate ); +mag_z = resample( x(116,start:stop),new_sample_rate, old_sample_rate ); + +err_x = resample( x(117,start:stop), new_sample_rate, old_sample_rate); +err_y = resample( x(118,start:stop), new_sample_rate, old_sample_rate); +err_z = resample( x(119,start:stop), new_sample_rate, old_sample_rate); + +q1 = resample( x(53,start:stop),new_sample_rate, old_sample_rate ); +q2 = resample( x(54,start:stop),new_sample_rate, old_sample_rate ); +q3 = resample( x(55,start:stop),new_sample_rate, old_sample_rate ); +q4 = resample( x(56,start:stop),new_sample_rate, old_sample_rate ); + +input = [mag_x' mag_y' mag_z' q1' q2' q3' q4']; + +modelterms = ... +[ + 0 0 0 0 0 0 0; + 1 0 0 0 0 0 0; ... + 0 0 0 1 1 0 0; ... + 0 0 0 1 0 1 0; ... + 0 0 0 1 0 0 1; ... + 0 0 0 0 2 0 0; ... + 0 0 0 0 1 1 0; ... + 0 0 0 0 1 0 1; ... + 0 0 0 0 0 2 0; ... + 0 0 0 0 0 1 1; ... + 0 0 0 0 0 0 2]; + +poly_x = polyfitn( input, err_x, modelterms) +modelterms = ... +[ + 0 0 0 0 0 0 0; + 0 1 0 0 0 0 0; ... + 0 0 0 1 1 0 0; ... + 0 0 0 1 0 1 0; ... + 0 0 0 1 0 0 1; ... + 0 0 0 0 2 0 0; ... + 0 0 0 0 1 1 0; ... + 0 0 0 0 1 0 1; ... + 0 0 0 0 0 2 0; ... + 0 0 0 0 0 1 1; ... + 0 0 0 0 0 0 2]; +poly_y = polyfitn( input, err_y, modelterms) +modelterms = ... +[ + 0 0 0 0 0 0 0; + 0 0 1 0 0 0 0; ... + 0 0 0 1 1 0 0; ... + 0 0 0 1 0 1 0; ... + 0 0 0 1 0 0 1; ... + 0 0 0 0 2 0 0; ... + 0 0 0 0 1 1 0; ... + 0 0 0 0 1 0 1; ... + 0 0 0 0 0 2 0; ... + 0 0 0 0 0 1 1; ... + 0 0 0 0 0 0 2]; +poly_z = polyfitn( input, err_z, modelterms) + +mag_err_predicted_x = polyvaln( poly_x, input)'; +mag_err_predicted_y = polyvaln( poly_y, input)'; +mag_err_predicted_z = polyvaln( poly_z, input)'; + +std_error_x = std( err_x) +std_error_x_corrected = std( err_x - mag_err_predicted_x) + +std_error_y = std( err_y) +std_error_y_corrected = std( err_y - mag_err_predicted_y) + +std_error_z = std( err_z) +std_error_z_corrected = std( err_z - mag_err_predicted_z) + +std_error_mag_induction_abs = std( sqrt( ... + (err_x - mag_err_predicted_x).*(err_x - mag_err_predicted_x)+ ... + (err_y - mag_err_predicted_y).*(err_y - mag_err_predicted_y)+ ... + (err_z - mag_err_predicted_z).*(err_z - mag_err_predicted_z))) + +orintation_angle_error = sqrt( std_error_x * std_error_x + std_error_y * std_error_y +std_error_z * std_error_z)*180/pi + +time = linspace( start / 6000, stop/6000, length( err_x)); + +a=subplot( 3,1,1); +plot( time, err_x,'Color','red'); +grid +hold +plot( time, err_x - mag_err_predicted_x,'Color','black'); +legend('Observed','Corrected') +title('Magnetic Field Error Front') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +axis([0 max(time) -0.1 0.1]); + +b=subplot( 3,1,2); +plot( time, err_y,'Color','red'); +grid +hold +plot( time, err_y - mag_err_predicted_y,'Color','black'); +legend('Observed','Corrected') +title('Magnetic Field Error Right') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +axis([0 max(time) -0.1 0.1]); + +c=subplot( 3,1,3); +plot( time, err_z,'Color','red'); +grid +hold +plot( time, err_z - mag_err_predicted_z,'Color','black'); +%plot( time, err_z - testz','Color','blue'); +legend('Observed','Corrected') +title('Magnetic Field Error Down') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +axis([0 max(time) -0.1 0.1]); + +linkaxes([a b c],'x'); + diff --git a/Filter_Design/magnetic_compensation_SVD_work.m b/Filter_Design/magnetic_compensation_SVD_work.m new file mode 100644 index 0000000..019bb49 --- /dev/null +++ b/Filter_Design/magnetic_compensation_SVD_work.m @@ -0,0 +1,99 @@ +start = 80*6000; +stop = 368*6000; +%stop = length(x); + +format long; + +new_sample_rate = 10; % Hz +old_sample_rate = 100; + +mag_x = resample( x(114,start:stop),new_sample_rate, old_sample_rate ); +mag_y = resample( x(115,start:stop),new_sample_rate, old_sample_rate ); +mag_z = resample( x(116,start:stop),new_sample_rate, old_sample_rate ); + +err_x = resample( x(117,start:stop), new_sample_rate, old_sample_rate); +err_y = resample( x(118,start:stop), new_sample_rate, old_sample_rate); +err_z = resample( x(119,start:stop), new_sample_rate, old_sample_rate); + +q1 = resample( x(53,start:stop),new_sample_rate, old_sample_rate ); +q2 = resample( x(54,start:stop),new_sample_rate, old_sample_rate ); +q3 = resample( x(55,start:stop),new_sample_rate, old_sample_rate ); +q4 = resample( x(56,start:stop),new_sample_rate, old_sample_rate ); + +input = [q1' q2' q3' q4']; + +modelterms = ... +[ + 0 0 0 0; + 1 1 0 0; ... + 1 0 1 0; ... + 1 0 0 1; ... + 0 2 0 0; ... + 0 1 1 0; ... + 0 1 0 1; ... + 0 0 2 0; ... + 0 0 1 1; ... + 0 0 0 2]; + +poly_x = polyfitn( input, err_x, modelterms) +poly_y = polyfitn( input, err_y, modelterms) +poly_z = polyfitn( input, err_z, modelterms) + +mag_err_predicted_x = polyvaln( poly_x, input)'; +mag_err_predicted_y = polyvaln( poly_y, input)'; +mag_err_predicted_z = polyvaln( poly_z, input)'; + +std_error_x = std( err_x) +std_error_x_corrected = std( err_x - mag_err_predicted_x) + +std_error_y = std( err_y) +std_error_y_corrected = std( err_y - mag_err_predicted_y) + +std_error_z = std( err_z) +std_error_z_corrected = std( err_z - mag_err_predicted_z) + +std_error_mag_induction_abs = std( sqrt( ... + (err_x - mag_err_predicted_x).*(err_x - mag_err_predicted_x)+ ... + (err_y - mag_err_predicted_y).*(err_y - mag_err_predicted_y)+ ... + (err_z - mag_err_predicted_z).*(err_z - mag_err_predicted_z))) + +orintation_angle_error = sqrt( std_error_x * std_error_x + std_error_y * std_error_y +std_error_z * std_error_z)*180/pi + +time = linspace( start / 6000, stop/6000, length( err_x)); + +a=subplot( 3,1,1); +plot( time, err_x,'Color','red'); +grid +hold +plot( time, err_x - mag_err_predicted_x,'Color','black'); +legend('Observed','Corrected') +title('Magnetic Field Error Front') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +axis([0 max(time) -0.1 0.1]); + +b=subplot( 3,1,2); +plot( time, err_y,'Color','red'); +grid +hold +plot( time, err_y - mag_err_predicted_y,'Color','black'); +legend('Observed','Corrected') +title('Magnetic Field Error Right') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +axis([0 max(time) -0.1 0.1]); + +c=subplot( 3,1,3); +plot( time, err_z,'Color','red'); +grid +hold +plot( time, err_z - mag_err_predicted_z,'Color','black'); +%plot( time, err_z - testz','Color','blue'); +legend('Observed','Corrected') +title('Magnetic Field Error Down') +xlabel('Time / min.'); +ylabel('Magn. Ind. Normalized'); +axis([0 max(time) -0.1 0.1]); + +linkaxes([a b c],'x'); + diff --git a/Generic_Algorithms/accumulating_averager.h b/Generic_Algorithms/accumulating_averager.h index b0e964e..5630c55 100644 --- a/Generic_Algorithms/accumulating_averager.h +++ b/Generic_Algorithms/accumulating_averager.h @@ -1,6 +1,7 @@ #ifndef GENERIC_ALGORITHMS_ACCUMULATING_AVERAGER_H_ #define GENERIC_ALGORITHMS_ACCUMULATING_AVERAGER_H_ +//! simple average mechanism using the sum of input values template class accumulating_averager { public: diff --git a/Generic_Algorithms/boxcar_averager.h b/Generic_Algorithms/boxcar_averager.h index 8964283..00da3e7 100644 --- a/Generic_Algorithms/boxcar_averager.h +++ b/Generic_Algorithms/boxcar_averager.h @@ -8,6 +8,7 @@ #ifndef GENERIC_ALGORITHMS_BOXCAR_AVERAGER_H_ #define GENERIC_ALGORITHMS_BOXCAR_AVERAGER_H_ +//! FIR averager using a rectangle window template class boxcar_averager { public: diff --git a/Generic_Algorithms/differentiator.h b/Generic_Algorithms/differentiator.h index f6384eb..09a6962 100644 --- a/Generic_Algorithms/differentiator.h +++ b/Generic_Algorithms/differentiator.h @@ -62,7 +62,7 @@ template private: //! sampling time - const basetype time_constant; + basetype time_constant; //! maintains old output datatype old_value; datatype differentiation; diff --git a/Generic_Algorithms/euler.h b/Generic_Algorithms/euler.h index d0a297e..3ccd0ca 100644 --- a/Generic_Algorithms/euler.h +++ b/Generic_Algorithms/euler.h @@ -11,10 +11,10 @@ template class eulerangle { public: - eulerangle( datatype ir=0, datatype in=0, datatype iy=0) - : r(ir), p(in), y(iy) + eulerangle( datatype ir=0, datatype ip=0, datatype iy=0) + : roll(ir), pitch(ip), yaw(iy) {} - datatype r,p,y; + datatype roll,pitch,yaw; }; #endif diff --git a/Generic_Algorithms/mean_and_variance_finder.h b/Generic_Algorithms/mean_and_variance_finder.h index e4e5ac0..d87c609 100644 --- a/Generic_Algorithms/mean_and_variance_finder.h +++ b/Generic_Algorithms/mean_and_variance_finder.h @@ -3,6 +3,7 @@ #ifndef GENERIC_ALGORITHMS_MEAN_AND_VARIANCE_FINDER_H_ #define GENERIC_ALGORITHMS_MEAN_AND_VARIANCE_FINDER_H_ +//! generic statistics class for average and variance data template class mean_and_variance_finder_t { public: diff --git a/Generic_Algorithms/quaternion.h b/Generic_Algorithms/quaternion.h index 882a594..305ba81 100644 --- a/Generic_Algorithms/quaternion.h +++ b/Generic_Algorithms/quaternion.h @@ -35,7 +35,7 @@ template class quaternion: public vector { public: - //! constructor from eulerwinkel + //! constructor from euler angle quaternion( vector &init) { from_euler( init[0], init[1], init[2]); @@ -51,78 +51,72 @@ template class quaternion: public vector //! normalize quaternion absolute value to ONE inline void normalize(void) { - datatype tmp = vector::e[0] * vector::e[0] - + vector::e[1] * vector::e[1] - + vector::e[2] * vector::e[2] - + vector::e[3] * vector::e[3] ; + datatype tmp = this->e[0] * this->e[0] + + this->e[1] * this->e[1] + + this->e[2] * this->e[2] + + this->e[3] * this->e[3] ; tmp = ONE / tmp; tmp = SQRT( tmp); for( int i = 0; i < 4; ++i) - vector::e[i] *= tmp; + this->e[i] *= tmp; }; //! quaternion -> euler angle transformation operator eulerangle () const { - datatype e0 = vector::e[0]; - datatype e1 = vector::e[1]; - datatype e2 = vector::e[2]; - datatype e3 = vector::e[3]; + datatype e0 = this->e[0]; + datatype e1 = this->e[1]; + datatype e2 = this->e[2]; + datatype e3 = this->e[3]; eulerangle _euler; //! formula from roenbaeck p34 - _euler.r = ATAN2( TWO * (e0*e1 + e2*e3) , e0*e0 - e1*e1 - e2*e2 + e3*e3 ); - _euler.p = ASIN( TWO * (e0*e2 - e3*e1)); - _euler.y = ATAN2( TWO * (e0*e3 + e1*e2) , e0*e0 + e1*e1 - e2*e2 - e3*e3 ); + _euler.roll = ATAN2( TWO * (e0*e1 + e2*e3) , e0*e0 - e1*e1 - e2*e2 + e3*e3 ); + _euler.pitch = ASIN( TWO * (e0*e2 - e3*e1)); + _euler.yaw = ATAN2( TWO * (e0*e3 + e1*e2) , e0*e0 + e1*e1 - e2*e2 - e3*e3 ); return _euler; } - //! linearized euler component e0 (approximate nick angle / rad) - inline datatype lin_e0( void) const - { - datatype e0 = vector::e[0]; - datatype e1 = vector::e[1]; - datatype e2 = vector::e[2]; - datatype e3 = vector::e[3]; - return TWO * (e0*e1 + e2*e3) / ( e0*e0 - e1*e1 - e2*e2 + e3*e3); - } - //! linearized euler component e1 (approximate roll angle / rad) - inline datatype lin_e1( void) const + //! quaternion chaining, multiplication + quaternion operator *( const quaternion right) const { - datatype e0 = vector::e[0]; - datatype e1 = vector::e[1]; - datatype e2 = vector::e[2]; - datatype e3 = vector::e[3]; - return TWO * (e0*e2 - e3*e1); - } - //! euler component e2 - inline datatype get_e2( void) const - { - datatype e0 = vector::e[0]; - datatype e1 = vector::e[1]; - datatype e2 = vector::e[2]; - datatype e3 = vector::e[3]; - return ATAN2( TWO * (e0*e3 + e1*e2) , e0*e0 + e1*e1 - e2*e2 - e3*e3 ); + quaternion result; + datatype w1 = vector::e[0]; + datatype x1 = vector::e[1]; + datatype y1 = vector::e[2]; + datatype z1 = vector::e[3]; + + datatype w2 = right[0]; + datatype x2 = right[1]; + datatype y2 = right[2]; + datatype z2 = right[3]; + + result[0] = w1*w2 - x1*x2 - y1*y2 - z1*z2; + result[1] = w1*x2 + x1*w2 + y1*z2 - z1*y2; + result[2] = w1*y2 - x1*z2 + y1*w2 + z1*x2; + result[3] = w1*z2 + x1*y2 - y1*x2 + z1*w2; + + return result; } //! get north component of attitude inline datatype get_north( void) const { - datatype e0 = vector::e[0]; - datatype e1 = vector::e[1]; - datatype e2 = vector::e[2]; - datatype e3 = vector::e[3]; + datatype e0 = this->e[0]; + datatype e1 = this->e[1]; + datatype e2 = this->e[2]; + datatype e3 = this->e[3]; return e0*e0 + e1*e1 - e2*e2 - e3*e3; } //! get east component of attitude inline datatype get_east( void) const { - datatype e0 = vector::e[0]; - datatype e1 = vector::e[1]; - datatype e2 = vector::e[2]; - datatype e3 = vector::e[3]; + datatype e0 = this->e[0]; + datatype e1 = this->e[1]; + datatype e2 = this->e[2]; + datatype e3 = this->e[3]; return TWO * (e0*e3 + e1*e2); } @@ -136,19 +130,30 @@ template class quaternion: public vector return TWO * (e1*e3-e0*e2); } - //! quaternion update using rotation vector - void rotate( datatype p, datatype q, datatype r) + //! get heading component of attitude + datatype get_heading( void) const { datatype e0 = vector::e[0]; datatype e1 = vector::e[1]; datatype e2 = vector::e[2]; datatype e3 = vector::e[3]; + return ATAN2( TWO * (e0*e3 + e1*e2) , e0*e0 + e1*e1 - e2*e2 - e3*e3 ); + } + + //! quaternion update using rotation vector + void rotate( datatype p, datatype q, datatype r) + { + datatype e0 = this->e[0]; + datatype e1 = this->e[1]; + datatype e2 = this->e[2]; + datatype e3 = this->e[3]; + //! R.Rogers formula 2.92 - vector::e[0] += (-e1*p -e2*q -e3*r); - vector::e[1] += ( e0*p +e2*r -e3*q); - vector::e[2] += ( e0*q -e1*r +e3*p); - vector::e[3] += ( e0*r +e1*q -e2*p); + this->e[0] += (-e1*p -e2*q -e3*r); + this->e[1] += ( e0*p +e2*r -e3*q); + this->e[2] += ( e0*q -e1*r +e3*p); + this->e[3] += ( e0*r +e1*q -e2*p); normalize(); } @@ -156,6 +161,7 @@ template class quaternion: public vector //! euler angle -> quaternion transformation void from_euler( datatype p, datatype q, datatype r) { + // 3-2-1: yaw, then pitch, then roll p *= HALF; q *= HALF; r *= HALF; datatype sinphi = SIN( p); datatype cosphi = COS( p); @@ -164,20 +170,20 @@ template class quaternion: public vector datatype sinpsi = SIN( r); datatype cospsi = COS( r); - vector::e[0] = cosphi*costheta*cospsi + sinphi*sintheta*sinpsi; - vector::e[1] = sinphi*costheta*cospsi + cosphi*sintheta*sinpsi; - vector::e[2] = cosphi*sintheta*cospsi + sinphi*costheta*sinpsi; - vector::e[3] = cosphi*costheta*sinpsi - sinphi*sintheta*cospsi; + this->e[0] = cosphi*costheta*cospsi + sinphi*sintheta*sinpsi; + this->e[1] = sinphi*costheta*cospsi - cosphi*sintheta*sinpsi; + this->e[2] = cosphi*sintheta*cospsi + sinphi*costheta*sinpsi; + this->e[3] = cosphi*costheta*sinpsi - sinphi*sintheta*cospsi; } //! quaternion -> rotation matrix transformation void get_rotation_matrix (matrix &m) const { //! R.Rogers formula 2.90 - datatype e0=vector::e[0]; - datatype e1=vector::e[1]; - datatype e2=vector::e[2]; - datatype e3=vector::e[3]; + datatype e0=this->e[0]; + datatype e1=this->e[1]; + datatype e2=this->e[2]; + datatype e3=this->e[3]; m.e[0][0] = TWO * (e0*e0+e1*e1) - ONE; m.e[0][1] = TWO * (e1*e2-e0*e3); @@ -191,21 +197,21 @@ template class quaternion: public vector m.e[2][1] = TWO * (e2*e3+e0*e1); m.e[2][2] = TWO * (e0*e0+e3*e3) - ONE; } - quaternion operator * ( quaternion & right) //!< quaternion multiplication + quaternion operator * ( const quaternion & right) const //!< quaternion multiplication { quaternion result; - datatype e0=vector::e[0]; - datatype e1=vector::e[1]; - datatype e2=vector::e[2]; - datatype e3=vector::e[3]; - datatype re0=right.vector::e[0]; - datatype re1=right.vector::e[1]; - datatype re2=right.vector::e[2]; - datatype re3=right.vector::e[3]; - result.vector::e[0] = e0 * re0 - e1 * re1 - e2 * re2 + e3 * re3; - result.vector::e[1] = e1 * re0 + e2 * re3 - e3 * re2 + e0 * re1; - result.vector::e[2] = e3 * re1 + e0 * re2 - e1 * re3 + e2 * re0; - result.vector::e[3] = e1 * re2 - e2 * re1 + e0 * re3 + e3 * re0; + datatype e0=this->e[0]; + datatype e1=this->e[1]; + datatype e2=this->e[2]; + datatype e3=this->e[3]; + datatype re0=right.e[0]; + datatype re1=right.e[1]; + datatype re2=right.e[2]; + datatype re3=right.e[3]; + result.e[0] = e0 * re0 - e1 * re1 - e2 * re2 + e3 * re3; + result.e[1] = e1 * re0 + e2 * re3 - e3 * re2 + e0 * re1; + result.e[2] = e3 * re1 + e0 * re2 - e1 * re3 + e2 * re0; + result.e[3] = e1 * re2 - e2 * re1 + e0 * re3 + e3 * re0; return result; } @@ -216,13 +222,12 @@ template class quaternion: public vector //! formula from roenbaeck p35 tmp = SQRT( tmp); tmp *= HALF; - vector::e[0] = tmp; + this->e[0] = tmp; tmp = QUARTER / tmp; - vector::e[1] = tmp * (rotm.e[2][1] - rotm.e[1][2]); - vector::e[2] = tmp * (rotm.e[0][2] - rotm.e[2][0]); - vector::e[3] = tmp * (rotm.e[1][0] - rotm.e[0][1]); + this->e[1] = tmp * (rotm.e[2][1] - rotm.e[1][2]); + this->e[2] = tmp * (rotm.e[0][2] - rotm.e[2][0]); + this->e[3] = tmp * (rotm.e[1][0] - rotm.e[0][1]); normalize(); // compensate computational inaccuracies }; }; - #endif // QUATERNION_H diff --git a/Generic_Algorithms/trigger.h b/Generic_Algorithms/trigger.h index e68bf49..d509b5b 100644 --- a/Generic_Algorithms/trigger.h +++ b/Generic_Algorithms/trigger.h @@ -25,7 +25,7 @@ #ifndef TRIGGER_H_ #define TRIGGER_H_ -//! class for hysteresis-based triggering on input data +//! Hysteresis-based triggering on input data class trigger { public: diff --git a/NAV_Algorithms/AHRS.cpp b/NAV_Algorithms/AHRS.cpp index 26f3490..3a99a57 100644 --- a/NAV_Algorithms/AHRS.cpp +++ b/NAV_Algorithms/AHRS.cpp @@ -28,6 +28,10 @@ #include "magnetic_induction_report.h" #include "embedded_memory.h" #include "NAV_tuning_parameters.h" +#include "soft_iron_compensator.h" +#include "compass_calibrator_3D.h" + +#define TEST_CALIBRATION_AND_COMPENSATION 0 #if USE_HARDWARE_EEPROM == 0 #include "EEPROM_emulation.h" @@ -43,8 +47,8 @@ AHRS_type::attitude_setup (const float3vector &acceleration, float3vector north, east, down; float3vector induction; - if( compass_calibration.isCalibrationDone()) // use calibration if available - induction = compass_calibration.calibrate(mag); + if( compass_calibration.available()) + induction = compass_calibration.calibrate( mag); else induction = mag; @@ -104,21 +108,28 @@ AHRS_type::update_circling_state () #endif } -void AHRS_type::feed_magnetic_induction_observer(const float3vector &mag_sensor) +void AHRS_type::feed_magnetic_induction_observer( const float3vector &mag_sensor, const float3vector &mag_delta) { - float3vector expected_body_induction = body2nav.reverse_map(expected_nav_induction); + float error_margin = nav_correction.abs(); + if( error_margin > NAV_CORRECTION_LIMIT) + return; + bool turning_right = turn_rate_averager.get_output() > 0.0f; +#if USE_SOFT_IRON_COMPENSATION + if( mag_delta.abs() < 0.05) // only if the precision is already reasonably good ... + { + bool calibration_data_complete = soft_iron_compensator.learn( mag_delta, attitude, turning_right, error_margin); + if( calibration_data_complete) + trigger_soft_iron_compensator_calculation(); + } +#endif + for (unsigned i = 0; i < 3; ++i) if( turning_right) mag_calibration_data_collector_right_turn[i].add_value ( MAG_SCALE * expected_body_induction[i], MAG_SCALE * mag_sensor[i]); else mag_calibration_data_collector_left_turn[i].add_value ( MAG_SCALE * expected_body_induction[i], MAG_SCALE * mag_sensor[i]); - -#if USE_EARTH_INDUCTION_DATA_COLLECTOR - // measurement of earth induction to find the local earth field parameters - earth_induction_data_collector.feed( induction_nav_frame, turning_right); -#endif } AHRS_type::AHRS_type (float sampling_time) @@ -141,17 +152,13 @@ AHRS_type::AHRS_type (float sampling_time) turn_rate_averager( ANGLE_F_BY_FS), G_load_averager( G_LOAD_F_BY_FS), compass_calibration(), -#if USE_EARTH_INDUCTION_DATA_COLLECTOR - earth_induction_data_collector( MAG_SCALE), -#endif antenna_DOWN_correction( configuration( ANT_SLAVE_DOWN) / configuration( ANT_BASELENGTH)), antenna_RIGHT_correction( configuration( ANT_SLAVE_RIGHT) / configuration( ANT_BASELENGTH)), heading_difference_AHRS_DGNSS(0.0f), cross_acc_correction(0.0f), magnetic_disturbance(0.0f), magnetic_control_gain(1.0f), - automatic_magnetic_calibration(configuration(MAG_AUTO_CALIB)), - automatic_earth_field_parameters( false), + automatic_magnetic_calibration( (magnetic_calibration_type)(round)(configuration(MAG_AUTO_CALIB))), magnetic_calibration_updated( false) { update_magnetic_loop_gain(); // adapt to magnetic inclination @@ -221,22 +228,41 @@ AHRS_type::update_diff_GNSS (const float3vector &gyro, const float3vector &GNSS_acceleration, float GNSS_heading) { - circle_state_t old_circle_state = circling_state; - update_circling_state (); + expected_body_induction = body2nav.reverse_map( expected_nav_induction); - if( compass_calibration.isCalibrationDone()) // use calibration if available - body_induction = compass_calibration.calibrate(mag_sensor); - else - body_induction = mag_sensor; +#if TEST_CALIBRATION_AND_COMPENSATION + float fcoordinates[] = + { + -0.05, 0.95, 0.03, + 1, 0.1, -0.1, + -0.03, 0.1, 1.1 + }; + + float3matrix rotator (fcoordinates); + + float3vector mag_sensor_tilted = rotator * mag_sensor; + + body_induction = compass_calibrator_3D.calibrate( mag_sensor_tilted, attitude); +#else + body_induction = compass_calibration.calibrate(mag_sensor); +#endif + + float3vector mag_delta = body_induction - expected_body_induction; // for the training of the compensator + +#if USE_SOFT_IRON_COMPENSATION + if( automatic_magnetic_calibration == AUTO_SOFT_IRON_COMPENSATE) + body_induction = body_induction - soft_iron_compensator.compensate( expected_body_induction, attitude); +#endif + + body_induction_error = body_induction - expected_body_induction; - body_induction_error = body_induction - body2nav.reverse_map( expected_nav_induction); float3vector nav_acceleration = body2nav * acc; float heading_gnss_work = GNSS_heading // correct for antenna alignment - + antenna_DOWN_correction * SIN (euler.r) - - antenna_RIGHT_correction * COS (euler.r); + + antenna_DOWN_correction * SIN (euler.roll) + - antenna_RIGHT_correction * COS (euler.roll); - heading_gnss_work = heading_gnss_work - euler.y; // = heading difference D-GNSS - AHRS + heading_gnss_work = heading_gnss_work - euler.yaw; // = heading difference D-GNSS - AHRS if (heading_gnss_work > M_PI_F) // map into { -PI PI} heading_gnss_work -= 2.0f * M_PI_F; @@ -252,6 +278,9 @@ AHRS_type::update_diff_GNSS (const float3vector &gyro, + nav_acceleration[NORTH] * GNSS_acceleration[EAST] - nav_acceleration[EAST] * GNSS_acceleration[NORTH]; + circle_state_t old_circle_state = circling_state; + update_circling_state (); + if( circling_state == CIRCLING) // heading correction using acceleration cross product GNSS * INS { @@ -275,12 +304,18 @@ AHRS_type::update_diff_GNSS (const float3vector &gyro, gyro_integrator += gyro_correction; // update integrator gyro_correction = gyro_correction + gyro_integrator * I_GAIN; + gyro_correction_power = SQR( gyro_correction[0]) + SQR( gyro_correction[1]) +SQR( gyro_correction[2]); + update_attitude (acc, gyro + gyro_correction, body_induction); // only here we get fresh magnetic entropy // and: wait for low control loop error - if ( (circling_state == CIRCLING) && ( nav_correction.abs() < NAV_CORRECTION_LIMIT)) - feed_magnetic_induction_observer (mag_sensor); + if ( circling_state == CIRCLING) +#if TEST_CALIBRATION_AND_COMPENSATION + feed_magnetic_induction_observer (mag_sensor_tilted, mag_delta); +#else + feed_magnetic_induction_observer (mag_sensor, mag_delta); +#endif // when circling is finished eventually update the magnetic calibration if (automatic_magnetic_calibration && (old_circle_state == CIRCLING) && (circling_state == TRANSITION)) @@ -295,12 +330,36 @@ AHRS_type::update_compass (const float3vector &gyro, const float3vector &acc, const float3vector &mag_sensor, const float3vector &GNSS_acceleration) { - if( compass_calibration.isCalibrationDone()) // use calibration if available + expected_body_induction = body2nav.reverse_map( expected_nav_induction); + +#if TEST_CALIBRATION_AND_COMPENSATION + float fcoordinates[] = + { + -0.05, 0.95, 0.03, + 1, 0.1, -0.1, + -0.03, 0.1, 1.1 + }; + + float3matrix rotator (fcoordinates); + + float3vector mag_sensor_tilted = rotator * mag_sensor; + + body_induction = compass_calibrator_3D.calibrate( mag_sensor_tilted, attitude); +#else body_induction = compass_calibration.calibrate(mag_sensor); - else - body_induction = mag_sensor; +#if USE_SOFT_IRON_COMPENSATION + if( automatic_magnetic_calibration == AUTO_SOFT_IRON_COMPENSATE) + body_induction = body_induction - soft_iron_compensator.compensate( expected_body_induction, attitude); +#endif + +#endif + + float3vector mag_delta = body_induction - expected_body_induction; // for the training of the compensator - body_induction_error = body_induction - body2nav.reverse_map( expected_nav_induction); +// if( (automatic_magnetic_calibration == AUTO_3D) +// body_induction = body_induction - soft_iron_compensator.calibrate( expected_body_induction, attitude); + + body_induction_error = body_induction - expected_body_induction; float3vector nav_acceleration = body2nav * acc; float3vector nav_induction = body2nav * body_induction; @@ -313,9 +372,6 @@ AHRS_type::update_compass (const float3vector &gyro, const float3vector &acc, // calculate heading error depending on the present circling state // on state changes handle MAG auto calibration - circle_state_t old_circle_state = circling_state; - update_circling_state (); - float mag_correction = + nav_induction[NORTH] * expected_nav_induction[EAST] - nav_induction[EAST] * expected_nav_induction[NORTH]; @@ -324,6 +380,9 @@ AHRS_type::update_compass (const float3vector &gyro, const float3vector &acc, + nav_acceleration[NORTH] * GNSS_acceleration[EAST] - nav_acceleration[EAST] * GNSS_acceleration[NORTH]; + circle_state_t old_circle_state = circling_state; + update_circling_state (); + switch (circling_state) { case STRAIGHT_FLIGHT: @@ -352,23 +411,25 @@ AHRS_type::update_compass (const float3vector &gyro, const float3vector &acc, } gyro_correction = gyro_correction + gyro_integrator * I_GAIN; + gyro_correction_power = SQR( gyro_correction[0]) + SQR( gyro_correction[1]) +SQR( gyro_correction[2]); // feed quaternion update with corrected sensor readings update_attitude (acc, gyro + gyro_correction, body_induction); - body_induction_error = mag_sensor - body2nav.reverse_map( expected_nav_induction); - // only here we get fresh magnetic entropy // and: wait for low control loop error - if ( (circling_state == CIRCLING) && ( nav_correction.abs() < NAV_CORRECTION_LIMIT)) - feed_magnetic_induction_observer (mag_sensor); +// if ( (circling_state == CIRCLING) && ( nav_correction.abs() < NAV_CORRECTION_LIMIT)) +#if TEST_CALIBRATION_AND_COMPENSATION + feed_magnetic_induction_observer (mag_sensor_tilted, mag_delta); +#else + feed_magnetic_induction_observer (mag_sensor, mag_delta); +#endif - // when circling is finished eventually update the magnetic calibration - if (automatic_magnetic_calibration && (old_circle_state == CIRCLING) && (circling_state == TRANSITION)) - handle_magnetic_calibration('m'); + // when circling is finished eventually update the magnetic calibration + if (automatic_magnetic_calibration && (old_circle_state == CIRCLING) && (circling_state == TRANSITION)) + handle_magnetic_calibration('m'); } - /** * @brief update attitude from IMU data NOT using magnetometer of D-GNSS */ @@ -376,11 +437,15 @@ void AHRS_type::update_ACC_only (const float3vector &gyro, const float3vector &a const float3vector &mag_sensor, const float3vector &GNSS_acceleration) { - float3vector mag; - if (compass_calibration.isCalibrationDone ()) // use calibration if available - mag = compass_calibration.calibrate (mag_sensor); - else - mag = mag_sensor; + expected_body_induction = body2nav.reverse_map( expected_nav_induction); + body_induction = compass_calibration.calibrate(mag_sensor); + +#if USE_SOFT_IRON_COMPENSATION + if( automatic_magnetic_calibration == AUTO_SOFT_IRON_COMPENSATE) + body_induction = body_induction - soft_iron_compensator.compensate( expected_body_induction, attitude); +#endif + + body_induction_error = body_induction - expected_body_induction; float3vector nav_acceleration = body2nav * acc; @@ -407,7 +472,7 @@ void AHRS_type::update_ACC_only (const float3vector &gyro, const float3vector &a gyro_correction = gyro_correction + gyro_integrator * I_GAIN; // use integrator // feed quaternion update with corrected sensor readings - update_attitude(acc, gyro + gyro_correction, mag); + update_attitude(acc, gyro + gyro_correction, body_induction); } void AHRS_type::write_calibration_into_EEPROM( void) @@ -427,40 +492,14 @@ void AHRS_type::handle_magnetic_calibration ( char type) { bool calibration_changed = compass_calibration.set_calibration_if_changed ( mag_calibration_data_collector_right_turn, mag_calibration_data_collector_left_turn, MAG_SCALE); - float induction_error = 0.0f; - float3vector new_induction_estimate; -#if USE_EARTH_INDUCTION_DATA_COLLECTOR - - if (earth_induction_data_collector.data_valid ()) - { - new_induction_estimate = earth_induction_data_collector.get_estimated_induction(); - induction_error = SQRT(earth_induction_data_collector.get_variance ()); - - if ( automatic_earth_field_parameters && ( induction_error < INDUCTION_STD_DEVIATION_LIMIT)) - { - expected_nav_induction = new_induction_estimate; - expected_nav_induction.normalize(); - update_magnetic_loop_gain(); // adapt to magnetic inclination - - calibration_changed = true; - } - earth_induction_data_collector.reset (); - } -#endif - if( calibration_changed) { magnetic_induction_report_t magnetic_induction_report; for( unsigned i=0; i<3; ++i) magnetic_induction_report.calibration[i] = (compass_calibration.get_calibration())[i]; -#if USE_EARTH_INDUCTION_DATA_COLLECTOR - magnetic_induction_report.nav_induction=new_induction_estimate; - magnetic_induction_report.nav_induction_std_deviation = induction_error; -#endif - report_magnetic_calibration_has_changed( &magnetic_induction_report, type); magnetic_calibration_updated = true; } diff --git a/NAV_Algorithms/AHRS.h b/NAV_Algorithms/AHRS.h index 4ab7988..1797130 100644 --- a/NAV_Algorithms/AHRS.h +++ b/NAV_Algorithms/AHRS.h @@ -31,8 +31,8 @@ #include "float3matrix.h" #include "integrator.h" #include "compass_calibration.h" +#include "soft_iron_compensator.h" #include "HP_LP_fusion.h" -#include "induction_observer.h" #include "pt2.h" extern float3vector nav_induction; @@ -45,7 +45,7 @@ typedef enum { STRAIGHT_FLIGHT, TRANSITION, CIRCLING} circle_state_t; typedef integrator vector3integrator; -//! Attitude and heading reference system class +//! Attitude and heading reference system class AHRS_type { public: @@ -91,18 +91,6 @@ class AHRS_type { return induction_nav_frame; } - inline float get_lin_e0(void) const - { - return attitude.lin_e0(); - } - inline float get_lin_e1(void) const - { - return attitude.lin_e1(); - } - inline float get_e2(void) const - { - return attitude.get_e2(); - } inline float get_north(void) const { return attitude.get_north(); @@ -198,6 +186,11 @@ class AHRS_type return body_induction; } + float getGyro_correction_Power () const + { + return gyro_correction_power; + } + enum magnetic_calibration_type { NONE, AUTO_1D, AUTO_SOFT_IRON_COMPENSATE}; private: void handle_magnetic_calibration( char type); @@ -210,7 +203,7 @@ class AHRS_type magnetic_control_gain = M_H_GAIN / expected_horizontal_induction; } - void feed_magnetic_induction_observer(const float3vector &mag_sensor); + void feed_magnetic_induction_observer( const float3vector &mag_sensor, const float3vector &mag_delta); circle_state_t update_circling_state( void); void update_diff_GNSS( const float3vector &gyro, const float3vector &acc, const float3vector &mag, @@ -230,6 +223,7 @@ class AHRS_type float3vector acceleration_nav_frame; float3vector induction_nav_frame; //!< observed NAV induction float3vector expected_nav_induction; //!< expected NAV induction + float3vector expected_body_induction; //!< expected body frame induction float3matrix body2nav; eulerangle euler; pt2 slip_angle_averager; @@ -239,20 +233,17 @@ class AHRS_type linear_least_square_fit mag_calibration_data_collector_right_turn[3]; linear_least_square_fit mag_calibration_data_collector_left_turn[3]; compass_calibration_t compass_calibration; -#if USE_EARTH_INDUCTION_DATA_COLLECTOR - induction_observer_t earth_induction_data_collector; -#endif float antenna_DOWN_correction; //!< slave antenna lower / DGNSS base length float antenna_RIGHT_correction; //!< slave antenna more right / DGNSS base length float heading_difference_AHRS_DGNSS; float cross_acc_correction; float magnetic_disturbance; //!< abs( observed_induction - expected_induction) float magnetic_control_gain; //!< declination-dependent magnetic control loop gain - bool automatic_magnetic_calibration; - bool automatic_earth_field_parameters; // todo unused, remove me some day + magnetic_calibration_type automatic_magnetic_calibration; bool magnetic_calibration_updated; float3vector body_induction; float3vector body_induction_error; + float gyro_correction_power; }; #endif /* AHRS_H_ */ diff --git a/NAV_Algorithms/GNSS.h b/NAV_Algorithms/GNSS.h index fdff995..07bf511 100644 --- a/NAV_Algorithms/GNSS.h +++ b/NAV_Algorithms/GNSS.h @@ -99,7 +99,7 @@ typedef enum { GNSS_HAVE_FIX, GNSS_NO_FIX, GNSS_ERROR} GNSS_Result; #define SAT_FIX 1 // bits within sat_fix #define SAT_HEADING 2 -//! this structure contains all important data from the GNSS +//! Contains all important data from the GNSS typedef struct { float3vector position; //!< NED / meters @@ -131,7 +131,7 @@ typedef struct uint16_t dummy; } coordinates_t; -//! this class is organizing the data transfer from uBlox-GNSS to Larus +//! Organizing the data transfer from a uBlox-GNSS receiver class GNSS_type { public: diff --git a/NAV_Algorithms/NAV_tuning_parameters.h b/NAV_Algorithms/NAV_tuning_parameters.h index 576dc91..c711aa2 100644 --- a/NAV_Algorithms/NAV_tuning_parameters.h +++ b/NAV_Algorithms/NAV_tuning_parameters.h @@ -25,11 +25,9 @@ #ifndef NAV_ALGORITHMS_NAV_TUNING_PARAMETERS_H_ #define NAV_ALGORITHMS_NAV_TUNING_PARAMETERS_H_ -#define USE_EARTH_INDUCTION_DATA_COLLECTOR 0 - #define MINIMUM_MAG_CALIBRATION_SAMPLES 6000 -#define MAG_OFFSET_CHANGE_LIMIT 0.01f -#define MAG_SCALE_CHANGE_LIMIT 0.01f +#define MAG_OFFSET_CHANGE_LIMIT 0.02f +#define MAG_SCALE_CHANGE_LIMIT 0.02f #define CIRCLE_LIMIT (10 * 100) //!< 10 * 1/100 s delay into / out of circling state @@ -51,18 +49,20 @@ #define I_GAIN 0.00006f //!< Attitude controller: integral gain #define H_GAIN 38.0f //!< Attitude controller: horizontal gain #define M_H_GAIN 6.0f //!< Attitude controller: horizontal gain magnetic -#define CROSS_GAIN 0.05f //!< Attitude controller: cross-product gain +#define CROSS_GAIN 0.05 //!< Attitude controller: cross-product gain #define INDUCTION_ERROR 0.015 //!< Maximum std deviation to update earth induction parameters #define NAV_CORRECTION_LIMIT 5.0f //!< limit for "low AHRS correcting variable" #define HIGH_TURN_RATE 8.0*M_PI/180.0f //!< turn rate high limit #define LOW_TURN_RATE 1.0*M_PI/180.0f //!< turn rate low limit #define SPEED_COMPENSATION_FUSIONER_FEEDBACK 0.998f // empirically tuned alpha +#define USE_OLD_FASHIONED_PRESSURE_VARIO 0 // for vario comparison tests (offline) -#define USE_ACCELERATION_CROSS_GAIN_ALONE_WHEN_CIRCLING 1 //!< if 1: do not use induction to control attitude while circling +#define USE_ACCELERATION_CROSS_GAIN_ALONE_WHEN_CIRCLING 0 //!< if 1: do not use induction to control attitude while circling #define DISABLE_CIRCLING_STATE 0 //!< for tests only: never use circling AHRS algorithm #define INDUCTION_STD_DEVIATION_LIMIT 0.03 //!< results outperforming this number will be used further on -#define AIRBORNE_TRIGGER_SPEED 0.5f //!< speed-compensator vario value m/s +#define AIRBORNE_TRIGGER_SPEED_COMP 0.5f //!< speed-compensator vario value m/s +#define AIRBORNE_TRIGGER_SPEED 15.0f //!< ground speed / m/s #define MAG_SCALE 10000.0f //!< scale factor for high-precision integer statistics #define FAST_SAMPLING_REQUENCY 100.0f @@ -71,5 +71,6 @@ #define SLOW_SAMPLING_TIME 0.1f #define GRAVITY 9.81f +#define VECTOR_AVERAGE_COUNT 100 //!< average count for sensor orientation setup #endif /* NAV_ALGORITHMS_NAV_TUNING_PARAMETERS_H_ */ diff --git a/NAV_Algorithms/air_density_observer.cpp b/NAV_Algorithms/air_density_observer.cpp index 7e4c1c5..45d3d21 100644 --- a/NAV_Algorithms/air_density_observer.cpp +++ b/NAV_Algorithms/air_density_observer.cpp @@ -25,60 +25,69 @@ #include "embedded_math.h" #include -air_data_result air_density_observer::feed_metering( float pressure, float MSL_altitude) +air_data_result air_density_observer_t::feed_metering( float pressure, float GNSS_altitude) { air_data_result air_data; -#if DENSITY_MEASURMENT_COLLECTS_INTEGER - density_QFF_calculator.add_value( MSL_altitude * 100.0f, pressure); -#else - // use values normalized around 1.0 - density_QFF_calculator.add_value( MSL_altitude * 1e-3, pressure * 1e-5); -#endif + pressure_decimation_filter.respond( pressure); + altitude_decimation_filter.respond( GNSS_altitude); + --decimation_counter; + if( decimation_counter > 0) + return air_data; + decimation_counter = AIR_DENSITY_DECIMATION; + + density_QFF_calculator.add_value( GNSS_altitude * 100.0f, pressure); - if( MSL_altitude > max_altitude) - max_altitude = MSL_altitude; + // update elevation range + if( GNSS_altitude > max_altitude) + max_altitude = GNSS_altitude; - if( MSL_altitude < min_altitude) - min_altitude = MSL_altitude; + if( GNSS_altitude < min_altitude) + min_altitude = GNSS_altitude; - if( false == altitude_trigger.process(MSL_altitude)) + // if range too low: continue + if( (max_altitude - min_altitude) < MINIMUM_ALTITUDE_RANGE) return air_data; - if( ((max_altitude - min_altitude) < MINIMUM_ALTITUDE_RANGE) // ... forget this measurement - || (density_QFF_calculator.get_count() < 3000)) - { - max_altitude = min_altitude = MSL_altitude; - density_QFF_calculator.reset(); - return air_data; - } + // elevation range triggering + if( (max_altitude - min_altitude < MAXIMUM_ALTITUDE_RANGE) && + false == altitude_trigger.process(GNSS_altitude)) + return air_data; + + // if data points too rare: continue + if (density_QFF_calculator.get_count() < 100) + return air_data; + // process last acquisition phase data linear_least_square_result result; density_QFF_calculator.evaluate(result); -// Due to numeric effects, when using float the -// variance has been observed to be negative in some cases ! -// Therefore using float this test can not be used. - assert( result.variance_slope > 0); - assert( result.variance_offset > 0); +// Due to numeric effects, the variance has been observed +// to be negative in some cases. +// If this is the case: Throw away this result. + if( ( result.variance_slope < 0) || ( result.variance_offset < 0)) + { + density_QFF_calculator.reset(); + air_data.valid=false; + return air_data; + } - if( result.variance_slope < MAX_ALLOWED_VARIANCE) + if( (result.variance_slope < MAX_ALLOWED_SLOPE_VARIANCE) && + (result.variance_offset < MAX_ALLOWED_OFFSET_VARIANCE) ) { air_data.QFF = (float)(result.y_offset); - float density = 100.0f * (float)(result.slope) * -0.10194f; // div by -9.81f; - -#if DENSITY_MEASURMENT_COLLECTS_INTEGER - float pressure = density_QFF_calculator.get_mean_y(); -#else - float pressure = 1e5 * density_QFF_calculator.get_mean_y(); -#endif + float density = 100.0f * (float)(result.slope) * -0.101936f; // div by -9.81f; - float std_density = 1.0496346613e-5f * pressure + 0.1671546011f; + float reference_altitude = density_QFF_calculator.get_mean_x() * 0.01f; + float std_density = + reference_altitude * reference_altitude * 0.000000003547494f + -0.000115412739613f * reference_altitude +1.224096628212817f; air_data.density_correction = density / std_density; + air_data.density_variance = result.variance_slope; air_data.valid = true; } - max_altitude = min_altitude = MSL_altitude; + max_altitude = min_altitude = GNSS_altitude; density_QFF_calculator.reset(); return air_data; diff --git a/NAV_Algorithms/air_density_observer.h b/NAV_Algorithms/air_density_observer.h index f5cbc0e..5f126f5 100644 --- a/NAV_Algorithms/air_density_observer.h +++ b/NAV_Algorithms/air_density_observer.h @@ -25,39 +25,49 @@ #ifndef AIR_DENSITY_OBSERVER_H_ #define AIR_DENSITY_OBSERVER_H_ +#include "pt2.h" #include "Linear_Least_Square_Fit.h" #include "trigger.h" -#define DENSITY_MEASURMENT_COLLECTS_INTEGER 1 typedef double evaluation_type; typedef uint64_t measurement_type; -#define MAX_ALLOWED_VARIANCE 1e-9 -#define MINIMUM_ALTITUDE_RANGE 300.0f -#define ALTITUDE_TRIGGER_HYSTERESIS 50.0f +#define ALTITUDE_TRIGGER_HYSTERESIS 50.0f +#define MAX_ALLOWED_SLOPE_VARIANCE 3e-9 +#define MAX_ALLOWED_OFFSET_VARIANCE 200 +#define MINIMUM_ALTITUDE_RANGE 300.0f +#define MAXIMUM_ALTITUDE_RANGE 800.0f +#define USE_AIR_DENSITY_LETHARGY 1 +#define AIR_DENSITY_LETHARGY 0.7 +#define AIR_DENSITY_DECIMATION 20 -//! this class maintains offset and slope of the air density measurement +//! Maintains offset and slope of the air density measurement class air_data_result { public: air_data_result( void) : density_correction(1.0f), + density_variance(1.0f), QFF(101325.0f), valid( false) {} float density_correction; + float density_variance; float QFF; bool valid; }; -//! this class measures air density and QFF -class air_density_observer +//! Measures air density and reference pressure +class air_density_observer_t { public: - air_density_observer (void) + air_density_observer_t (void) : min_altitude(10000.0f), max_altitude(0.0f), - altitude_trigger( ALTITUDE_TRIGGER_HYSTERESIS) + altitude_trigger( ALTITUDE_TRIGGER_HYSTERESIS), + decimation_counter( 20), + altitude_decimation_filter( 1.0f / AIR_DENSITY_DECIMATION * 0.25f), + pressure_decimation_filter( 1.0f / AIR_DENSITY_DECIMATION * 0.25f) { } air_data_result feed_metering( float pressure, float MSL_altitude); @@ -75,6 +85,9 @@ class air_density_observer float min_altitude; float max_altitude; trigger altitude_trigger; + unsigned decimation_counter; + pt2 altitude_decimation_filter; + pt2 pressure_decimation_filter; }; #endif /* AIR_DENSITY_OBSERVER_H_ */ diff --git a/NAV_Algorithms/airborne_detector.h b/NAV_Algorithms/airborne_detector.h index d3fb97e..2ccd63a 100644 --- a/NAV_Algorithms/airborne_detector.h +++ b/NAV_Algorithms/airborne_detector.h @@ -26,6 +26,7 @@ #ifndef NAV_ALGORITHMS_AIRBORNE_DETECTOR_H_ #define NAV_ALGORITHMS_AIRBORNE_DETECTOR_H_ +//! observation of the aircraft state (ground / flying) class airborne_detector_t { public: @@ -37,12 +38,14 @@ class airborne_detector_t { if( yes) { - if( airborne_counter < LEVEL) + if( airborne_counter == 0) + airborne_counter = 100; // create hysteresis + else if( airborne_counter < LEVEL) ++ airborne_counter; } else { - if( airborne_counter > 0) + if( airborne_counter > 0) { --airborne_counter; if( airborne_counter == 0) @@ -50,7 +53,13 @@ class airborne_detector_t } } } - bool detect_just_landed( void) + + bool is_airborne( void) + { + return airborne_counter > 0; + } + + bool detect_just_landed( void) { if( just_landed) { diff --git a/NAV_Algorithms/atmosphere.h b/NAV_Algorithms/atmosphere.h index 851ab12..cc72cb2 100644 --- a/NAV_Algorithms/atmosphere.h +++ b/NAV_Algorithms/atmosphere.h @@ -28,6 +28,7 @@ #include "embedded_math.h" #include +#include "NAV_tuning_parameters.h" #include #define RECIP_STD_DENSITY_TIMES_2 1.632f @@ -41,7 +42,7 @@ /*! The offest for the conversion from degree celsius to kelvin */ #define CELSIUS_TO_KELVIN_OFFSET 273.15f -//! this class maintains instant atmosphere data like pressure, density etc +//! Maintenance of atmosphere data like pressure, density etc. class atmosphere_t { public: @@ -52,18 +53,27 @@ class atmosphere_t temperature(20.0f), humidity( 0.0f), density_correction(1.0f), - density_correction_averager(0.001f), - QFF(101325) + extrapolated_sea_level_pressure(101325), + GNSS_altitude_based_density_available(false), + GNSS_altitude_based_density(1.2255f), + old_density_correction(1.0f), + weight_sum(0.0f), + density_factor_weighed_sum(0.0f) { - density_correction_averager.settle(1.0f); } - void update_density_correction( void) + void update_density( float GNSS_altitude, bool valid) { - density_correction_averager.respond(density_correction); + if( valid) + { + GNSS_altitude_based_density = get_std_density( GNSS_altitude) * density_correction; + GNSS_altitude_based_density_available = true; + } + else + GNSS_altitude_based_density_available = false; } void initialize( float altitude) { - density_QFF_calculator.initialize(altitude); + air_density_observer.initialize(altitude); } void set_pressure( float p_abs) { @@ -73,11 +83,25 @@ class atmosphere_t { return pressure; } + float get_std_density( float GNSS_altitude) + { + float std_density = + 0.000000003547494f * GNSS_altitude * GNSS_altitude + -0.000115412739613f * GNSS_altitude + 1.224096628212817f; + return std_density; + } + float get_pressure_density( float static_pressure) + { + return 1.0496346613e-5f * static_pressure + 0.1671546011f; + } float get_density( void) const { - return (1.0496346613e-5f * pressure + 0.1671546011f) * density_correction_averager.get_output(); + if( GNSS_altitude_based_density_available) + return GNSS_altitude_based_density; + else + return (1.0496346613e-5f * pressure + 0.1671546011f) * density_correction; } - float get_negative_altitude( void) const + float get_negative_pressure_altitude( void) const { float tmp = 8.104381531e-4f * pressure; return - tmp * tmp + 0.20867299170f * pressure - 14421.43945f; @@ -101,18 +125,28 @@ class atmosphere_t have_ambient_air_data = false; } - float get_QFF () const + float get_extrapolated_sea_level_pressure () const { - return QFF; + return extrapolated_sea_level_pressure; } - void feed_QFF_density_metering( float pressure, float MSL_altitude) + void air_density_metering( float pressure, float MSL_altitude) { - air_data_result result = density_QFF_calculator.feed_metering( pressure, MSL_altitude); + air_data_result result = air_density_observer.feed_metering( pressure, MSL_altitude); if( result.valid) { - QFF = result.QFF; +#if USE_AIR_DENSITY_LETHARGY + bool first_measurement = (weight_sum == 0.0f); + + weight_sum = weight_sum * AIR_DENSITY_LETHARGY + (AIR_DENSITY_LETHARGY -1) / result.density_variance; + density_factor_weighed_sum = density_factor_weighed_sum * AIR_DENSITY_LETHARGY + (AIR_DENSITY_LETHARGY -1) * result.density_correction / result.density_variance; + + // postpone update unless we have two measurements + if( ! first_measurement) + density_correction = density_factor_weighed_sum / weight_sum; +#else density_correction = result.density_correction; +#endif } } @@ -127,9 +161,13 @@ class atmosphere_t float temperature; float humidity; float density_correction; - pt2 density_correction_averager; - float QFF; - air_density_observer density_QFF_calculator; + float extrapolated_sea_level_pressure; + air_density_observer_t air_density_observer; + bool GNSS_altitude_based_density_available; + float GNSS_altitude_based_density; + float old_density_correction; + float weight_sum; + float density_factor_weighed_sum; }; #endif /* APPLICATION_ATMOSPHERE_H_ */ diff --git a/NAV_Algorithms/compass_calibration.h b/NAV_Algorithms/compass_calibration.h index 8e1b3cf..919b8b9 100644 --- a/NAV_Algorithms/compass_calibration.h +++ b/NAV_Algorithms/compass_calibration.h @@ -68,7 +68,7 @@ class single_axis_calibration_t float variance; //!< measure of precision: sensor calibration parameter variance }; -//! this class maintains 3d magnetic calibration data +//! Maintains 3 axes magnetic calibration data template class compass_calibration_t { public: @@ -91,7 +91,6 @@ template class compass_calibration_t bool set_default (void) { - float variance; calibration_done = true; for( unsigned i=0; i<3; ++i) { @@ -145,7 +144,7 @@ template class compass_calibration_t return true; } - bool isCalibrationDone () const + bool available () const { return calibration_done; } diff --git a/NAV_Algorithms/compass_calibrator_3D.cpp b/NAV_Algorithms/compass_calibrator_3D.cpp new file mode 100644 index 0000000..3178d74 --- /dev/null +++ b/NAV_Algorithms/compass_calibrator_3D.cpp @@ -0,0 +1,204 @@ +/***********************************************************************//** + * @file compass_calibrator_3D.cpp + * @brief induction sensor calibration and magnetic disturbance compensation + * @author Dr. Klaus Schaefer + * @copyright Copyright 20.8.2024 Dr. Klaus Schaefer. All rights reserved. + * @license This project is released under the GNU Public License GPL-3.0 + + + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + **************************************************************************/ + +#define PRINT_PARAMETERS 1 +#if PRINT_PARAMETERS +#include "stdio.h" +#endif + +#include "embedded_math.h" +#include +#include "compass_calibrator_3D.h" + +ROM float RECIP_SECTOR_SIZE = compass_calibrator_3D_t::OBSERVATIONS / M_PI_F / TWO / TWO; + +bool compass_calibrator_3D_t::learn (const float3vector &observed_induction,const float3vector &expected_induction, const quaternion &q, bool turning_right, float error_margin) +{ + float present_heading = q.get_heading(); + if( present_heading <0.0f) + present_heading += M_PI_F * TWO; + + unsigned sector_index = (turning_right ? OBSERVATIONS / TWO : 0) + (unsigned)(present_heading * RECIP_SECTOR_SIZE); + + // if we have just left the last sector to be collected: report ready for computation + if( ( last_sector_collected != -1) && ( sector_index != last_sector_collected) && (++measurement_counter > 10000)) + return true; + + if( heading_sector_error[sector_index] > 1e19) // this sector has not been written before + ++populated_sectors; + + if ( heading_sector_error[sector_index] < error_margin) + return false; // we have collected more precise data for this observation earlier + + heading_sector_error[sector_index] = error_margin; + + for( unsigned axis = 0; axis < AXES; ++axis) + { + target_vector[axis][sector_index] = expected_induction[axis]; + + observation_matrix[axis][sector_index][0] = 1.0f; + observation_matrix[axis][sector_index][1] = observed_induction[0]; + observation_matrix[axis][sector_index][2] = observed_induction[1]; + observation_matrix[axis][sector_index][3] = observed_induction[2]; + } + + if( (last_sector_collected == INVALID) && populated_sectors >= OBSERVATIONS) + // now we are collecting data within the last sector to be collected + last_sector_collected = sector_index; + + return false; +} + +bool compass_calibrator_3D_t::calculate( void) +{ +// if( buffer_used_for_calibration != INVALID) +// return false; + + computation_float_type temporary_solution_matrix[PARAMETERS][PARAMETERS]; + ARM_MATRIX_INSTANCE solution; + solution.numCols=PARAMETERS; + solution.numRows=PARAMETERS; + solution.pData = (computation_float_type *)temporary_solution_matrix; + + ARM_MATRIX_INSTANCE observations; + observations.numCols=PARAMETERS; + observations.numRows=OBSERVATIONS; + + computation_float_type transposed_matrix[PARAMETERS][OBSERVATIONS]; + ARM_MATRIX_INSTANCE observations_transposed; + observations_transposed.numCols=OBSERVATIONS; + observations_transposed.numRows=PARAMETERS; + observations_transposed.pData = (computation_float_type *)transposed_matrix; + + computation_float_type matrix_to_be_inverted_data[PARAMETERS][PARAMETERS]; + ARM_MATRIX_INSTANCE matrix_to_be_inverted; + matrix_to_be_inverted.numCols=PARAMETERS; + matrix_to_be_inverted.numRows=PARAMETERS; + matrix_to_be_inverted.pData = (computation_float_type *)matrix_to_be_inverted_data; + + computation_float_type solution_mapping_data[PARAMETERS][OBSERVATIONS]; + ARM_MATRIX_INSTANCE solution_mapping; + solution_mapping.numCols=OBSERVATIONS; + solution_mapping.numRows=PARAMETERS; + solution_mapping.pData = (computation_float_type *)solution_mapping_data; + + int next_buffer; + if( buffer_used_for_calibration == 0) + next_buffer = 1; + else + next_buffer = 0; + + for( unsigned axis = 0; axis < AXES; ++axis) + { + observations.pData = &(observation_matrix[axis][0][0]); + + // calculation, once per axis (FRONT, RIGHT, DOWN): + // target vector: T = ideal induction values for all observations + // single measurement: < 1 induction q0q1 q0q2 q0q3 q1q1 q1q2 q1q3 q2q2 q2q3 q3q3 > (single row) + // measurement matrix: M = single measurement * # OBSERVATIONS + // solution matrix: S = inverse( M_transposed * M) * M_transposed + // axis parameter set: P = S * T + + arm_status result = ARM_MAT_TRANS( &observations, &observations_transposed); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + result = ARM_MAT_MULT( &observations_transposed, &observations, &matrix_to_be_inverted); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + result = ARM_MAT_INVERSE( &matrix_to_be_inverted, &solution); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + result = ARM_MAT_MULT( &solution, &observations_transposed, &solution_mapping); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + ARM_MATRIX_INSTANCE target_vector_inst; + target_vector_inst.numCols=1; + target_vector_inst.numRows=OBSERVATIONS; + target_vector_inst.pData=&(target_vector[axis][0]); + + ARM_MATRIX_INSTANCE axis_parameter_set; + axis_parameter_set.numCols=1; + axis_parameter_set.numRows=PARAMETERS; + axis_parameter_set.pData=&(c[next_buffer][axis][0]); + result = ARM_MAT_MULT( &solution_mapping, &target_vector_inst, &axis_parameter_set); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + } + + buffer_used_for_calibration = next_buffer; // switch now in a thread-save manner + +#if PRINT_PARAMETERS + + for( unsigned k=0; k < AXES; ++k) + { + for( unsigned i=0; i &q) + { + if( buffer_used_for_calibration == INVALID) // we do not have a valid calibration + return induction; + + unsigned b=buffer_used_for_calibration; // just to save expensive characters ... + + float3vector retv; + for( int i = 0; i < AXES; ++i) + { + retv[i] = + c[b][i][0] + + c[b][i][1] * induction[0] + + c[b][i][2] * induction[1] + + c[b][i][3] * induction[2]; + } + + return retv; + } diff --git a/NAV_Algorithms/compass_calibrator_3D.h b/NAV_Algorithms/compass_calibrator_3D.h new file mode 100644 index 0000000..100cf9f --- /dev/null +++ b/NAV_Algorithms/compass_calibrator_3D.h @@ -0,0 +1,116 @@ +/***********************************************************************//** + * @file compass_calibrator_3D.h + * @brief induction sensor calibration and magnetic disturbance compensation + * @author Dr. Klaus Schaefer + * @copyright Copyright 20.8.2024 Dr. Klaus Schaefer. All rights reserved. + * @license This project is released under the GNU Public License GPL-3.0 + + + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + **************************************************************************/ + +#ifndef NAV_ALGORITHMS_COMPASS_CALIBRATOR_3D_H_ +#define NAV_ALGORITHMS_COMPASS_CALIBRATOR_3D_H_ + +#include "embedded_memory.h" +#include "embedded_math.h" +#include "float3vector.h" +#include "quaternion.h" + +#include "matrix_functions.h" + +#if 0 +typedef double computation_float_type; +#define ARM_MATRIX_INSTANCE arm_matrix_instance_f64 +#define ARM_MAT_TRANS arm_mat_trans_f64 +#define ARM_MAT_MULT arm_mat_mult_f64 +#define ARM_MAT_INVERSE arm_mat_inverse_f64 +#else +typedef float computation_float_type; +#define ARM_MATRIX_INSTANCE arm_matrix_instance_f32 +#define ARM_MAT_TRANS arm_mat_trans_f32 +#define ARM_MAT_MULT arm_mat_mult_f32 +#define ARM_MAT_INVERSE arm_mat_inverse_f32 +#endif + +//! 3 dimensional magnetic calibration and error compensation mechanism +class compass_calibrator_3D_t +{ +public: + enum { AXES=3, PARAMETERS=4, OBSERVATIONS=22, INVALID=-1}; + + compass_calibrator_3D_t( void) + : buffer_used_for_calibration(INVALID), + measurement_counter(0) +{ + start_learning(); + } + + void start_learning( void) + { + populated_sectors = 0; + last_sector_collected = INVALID; + measurement_counter=0; + for( unsigned i=0; i < OBSERVATIONS; ++i) + heading_sector_error[i]=1e20f; + } + + bool learn (const float3vector &observed_induction,const float3vector &expected_induction, const quaternion &q, bool turning_right, float error_margin); + float3vector calibrate( const float3vector &induction, const quaternion &q); + bool calculate( void); + bool available( void) const + { + return buffer_used_for_calibration != INVALID; + } + + const computation_float_type * get_current_parameters( void) const + { + if( buffer_used_for_calibration == INVALID) + return 0; + + return &(c[buffer_used_for_calibration][0][0]); + } + + void set_current_parameters( const float * source) + { + int next_buffer; + if( buffer_used_for_calibration != 0) + next_buffer = 0; + else + next_buffer = 1; + + computation_float_type * destination = &(c[next_buffer][0][0]); + for( unsigned i=0; i < AXES * PARAMETERS; ++i) + *destination++ = *source++; + + buffer_used_for_calibration = next_buffer; + } + +private: + int buffer_used_for_calibration; + unsigned populated_sectors; + unsigned last_sector_collected; + unsigned measurement_counter; + computation_float_type c[2][AXES][PARAMETERS]; // double buffering for multi-thrading support + computation_float_type target_vector[AXES][OBSERVATIONS]; + computation_float_type observation_matrix[AXES][OBSERVATIONS][PARAMETERS]; + computation_float_type heading_sector_error[OBSERVATIONS]; +}; + +extern compass_calibrator_3D_t compass_calibrator_3D; +void trigger_compass_calibrator_3D_calculation(void); + +#endif /* NAV_ALGORITHMS_COMPASS_CALIBRATOR_3D_H_ */ diff --git a/NAV_Algorithms/compass_ground_calibration.h b/NAV_Algorithms/compass_ground_calibration.h index 69ab926..1d070c8 100644 --- a/NAV_Algorithms/compass_ground_calibration.h +++ b/NAV_Algorithms/compass_ground_calibration.h @@ -8,6 +8,7 @@ #define CUTOFF_DIV_BY_SAMPLING_FREQ 0.01f +//! helper class to support manual compass calibration class compass_ground_calibration_t { public: diff --git a/NAV_Algorithms/data_structures.h b/NAV_Algorithms/data_structures.h index fb8f0c7..b10aa43 100644 --- a/NAV_Algorithms/data_structures.h +++ b/NAV_Algorithms/data_structures.h @@ -31,8 +31,7 @@ #pragma pack(push, 1) - -//! contains all calibrated data from the sensors +//! contains all input data from the sensors typedef struct { float3vector acc; //XSENSE MTi1 IMU @@ -48,6 +47,15 @@ typedef struct #endif } measurement_data_t; +//! this structure contains all the observations from all sensors and the GNSS-receiver +typedef struct +{ + float3vector acc; + float3vector gyro; + float3vector mag; + float temperature; +} extra_sensor_data_t; + //! this structure contains all the observations from sensors and GNSS typedef struct { @@ -57,6 +65,9 @@ typedef struct float dummy2; #endif coordinates_t c; +#if WITH_EXTERNAL_IMU + extra_sensor_data_t extra; +#endif #if RUN_MICROPHONE float sound_intensity; #endif @@ -67,6 +78,9 @@ typedef struct { measurement_data_t m; coordinates_t c; +#if WITH_EXTERNAL_IMU + extra_sensor_data_t extra; +#endif #if RUN_MICROPHONE float sound_intensity; #endif @@ -120,6 +134,7 @@ typedef struct float vario_wind_E; float3vector body_induction; float3vector body_induction_error; + float gyro_correction_power; #endif } output_data_t; diff --git a/NAV_Algorithms/earth_induction_model.h b/NAV_Algorithms/earth_induction_model.h index 6ec8b98..0f8a285 100644 --- a/NAV_Algorithms/earth_induction_model.h +++ b/NAV_Algorithms/earth_induction_model.h @@ -30,6 +30,7 @@ enum { N_AREAS=8, N_COEFFICIENTS=10}; +//! struct containing magnetic induction data for a point typedef struct { float declination; //!< positive to the east @@ -37,6 +38,7 @@ typedef struct bool valid; } induction_values; +//! struct containing magnetic induction data for a regional region typedef struct { double longitude_limit_west; @@ -47,6 +49,7 @@ typedef struct double coefficients_inclination[N_COEFFICIENTS]; } induction_model_area_t; +//! Providing worldwide magnetic induction data class earth_induction_model_t { private: diff --git a/NAV_Algorithms/induction_observer.h b/NAV_Algorithms/induction_observer.h deleted file mode 100644 index f1ac2c9..0000000 --- a/NAV_Algorithms/induction_observer.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * induction_observer.h - * - * Created on: Feb 6, 2023 - * Author: schaefer - */ - -#ifndef NAV_ALGORITHMS_INDUCTION_OBSERVER_H_ -#define NAV_ALGORITHMS_INDUCTION_OBSERVER_H_ -#include "mean_and_variance_finder.h" -#include "float3vector.h" - -template class induction_observer_t -{ -public: - induction_observer_t( float _scale_factor) - : scale_factor( _scale_factor) - {} - void feed (float3vector induction, bool right_turn) - { - if (right_turn) - for( unsigned i=0; i<3; ++i) - induction_observer_right[i].feed (induction[i] * scale_factor); - else - for( unsigned i=0; i<3; ++i) - induction_observer_left[i].feed (induction[i] * scale_factor); - } - void - reset (void) - { - for( unsigned i=0; i<3; ++i) - { - induction_observer_right[i].reset (); - induction_observer_left[i].reset (); - } -} - bool data_valid( void) const - { - return ( - (induction_observer_right[0].get_samples() > MINIMUM_MAG_CALIBRATION_SAMPLES) && - (induction_observer_left[0].get_samples() > MINIMUM_MAG_CALIBRATION_SAMPLES) ); - } - float3vector get_estimated_induction( void) const - { - float3vector retv; - for( unsigned i=0; i<3; ++i) - retv[i] = (induction_observer_right[i].get_mean() + induction_observer_left[i].get_mean()) * 0.5f / scale_factor; - return retv; - } - float get_variance( void ) const - { - float sum = 0.0f; - for( unsigned i=0; i<3; ++i) - { - sum += induction_observer_right[i].get_variance(); - sum += induction_observer_left[i].get_variance(); - } - return sum * 0.1666666666f / scale_factor / scale_factor; - } -private: - enum{ MINIMUM_SAMPLES = 10000}; - float scale_factor; - mean_and_variance_finder_t induction_observer_right[3]; - mean_and_variance_finder_t induction_observer_left[3]; -}; - -#endif /* NAV_ALGORITHMS_INDUCTION_OBSERVER_H_ */ diff --git a/NAV_Algorithms/magnetic_induction_report.h b/NAV_Algorithms/magnetic_induction_report.h index fda876d..12dce15 100644 --- a/NAV_Algorithms/magnetic_induction_report.h +++ b/NAV_Algorithms/magnetic_induction_report.h @@ -12,13 +12,11 @@ #include "float3vector.h" #include "NAV_tuning_parameters.h" +//! helper struct containing magnetic calibration data struct magnetic_induction_report_t { single_axis_calibration_t calibration[3]; -#if USE_EARTH_INDUCTION_DATA_COLLECTOR - float3vector nav_induction; - float nav_induction_std_deviation; -#endif + bool valid; }; void report_magnetic_calibration_has_changed( magnetic_induction_report_t *p_magnetic_induction_report, char type); diff --git a/NAV_Algorithms/navigator.cpp b/NAV_Algorithms/navigator.cpp index 5949775..d71d630 100644 --- a/NAV_Algorithms/navigator.cpp +++ b/NAV_Algorithms/navigator.cpp @@ -52,7 +52,7 @@ void navigator_t::update_at_100Hz ( ahrs.get_nav_acceleration (), heading_vector, GNSS_negative_altitude, - atmosphere.get_negative_altitude(), + atmosphere.get_negative_pressure_altitude(), IAS, wind_observer.get_speed_compensator_wind(), (GNSS_fix_type != 0) @@ -85,24 +85,39 @@ void navigator_t::update_GNSS_data( const coordinates_t &coordinates) bool navigator_t::update_at_10Hz () { bool landing_detected=false; - atmosphere.feed_QFF_density_metering( - air_pressure_resampler_100Hz_10Hz.get_output(), - flight_observer.get_filtered_GNSS_altitude()); - atmosphere.update_density_correction(); // here because of the 10 Hz call frequency + atmosphere.update_density( -GNSS_negative_altitude, GNSS_fix_type > 0); wind_observer.process_at_10_Hz( ahrs); vario_integrator.update (flight_observer.get_vario_GNSS(), // here because of the update rate 10Hz - ahrs.get_euler ().y, + ahrs.get_euler ().yaw, ahrs.get_circling_state ()); - airborne_detector.report_to_be_airborne( abs( flight_observer.get_speed_compensation_GNSS()) > AIRBORNE_TRIGGER_SPEED); + unsigned airborne_criteria_fulfilled = 0; + if( abs( flight_observer.get_speed_compensation_GNSS()) > AIRBORNE_TRIGGER_SPEED_COMP) + ++ airborne_criteria_fulfilled; + + if( GNSS_velocity.abs() > AIRBORNE_TRIGGER_SPEED) + ++ airborne_criteria_fulfilled; + + if( IAS > AIRBORNE_TRIGGER_SPEED) + ++ airborne_criteria_fulfilled; + + // if at least 2 of 3 criteria apply, we believe to be airborne + airborne_detector.report_to_be_airborne( airborne_criteria_fulfilled > 1); if( airborne_detector.detect_just_landed()) { ahrs.write_calibration_into_EEPROM(); landing_detected = true; } + + if( airborne_detector.is_airborne()) + atmosphere.air_density_metering( + air_pressure_resampler_100Hz_10Hz.get_output(), + flight_observer.get_filtered_GNSS_altitude()); + + return landing_detected; } @@ -133,14 +148,14 @@ void navigator_t::report_data( output_data_t &d) d.slip_angle = ahrs.getSlipAngle(); d.pitch_angle = ahrs.getPitchAngle(); d.G_load = ahrs.get_G_load(); - d.pressure_altitude = - atmosphere.get_negative_altitude(); + d.pressure_altitude = - atmosphere.get_negative_pressure_altitude(); d.magnetic_disturbance = ahrs.getMagneticDisturbance(); d.air_density = atmosphere.get_density(); d.nav_induction_gnss = ahrs.get_nav_induction(); #if DEVELOPMENT_ADDITIONS - d.QFF = atmosphere.get_QFF(); + d.QFF = atmosphere.get_extrapolated_sea_level_pressure(); d.nav_correction = ahrs.get_nav_correction(); d.gyro_correction = ahrs.get_gyro_correction(); d.nav_acceleration_gnss = ahrs.get_nav_acceleration(); @@ -165,5 +180,7 @@ void navigator_t::report_data( output_data_t &d) d.vario_wind_E = wind_observer.get_speed_compensator_wind()[EAST]; d.body_induction = ahrs.getBodyInduction(); d.body_induction_error = ahrs.getBodyInductionError(); +// d.body_induction_error = ahrs_magnetic.getBodyInductionError(); + d.gyro_correction_power = ahrs.getGyro_correction_Power(); #endif } diff --git a/NAV_Algorithms/navigator.h b/NAV_Algorithms/navigator.h index 1fe285f..815993f 100644 --- a/NAV_Algorithms/navigator.h +++ b/NAV_Algorithms/navigator.h @@ -50,7 +50,7 @@ class navigator_t flight_observer(), wind_observer(), airborne_detector(), - air_pressure_resampler_100Hz_10Hz(0.04f), // f / fcutoff = 80% * 0.5 * 0.1 + air_pressure_resampler_100Hz_10Hz(0.025f), // = 2.5 Hz @ 100Hz pitot_pressure(0.0f), TAS( 0.0f), IAS( 0.0f), @@ -74,7 +74,6 @@ class navigator_t ahrs_magnetic.update_magnetic_induction_data( declination, inclination); #endif } - void set_density_data( float temperature, float humidity) { if( ! isnan( temperature) && ! isnan( humidity) ) @@ -89,7 +88,7 @@ class navigator_t void feed_QFF_density_metering( float pressure, float MSL_altitude) { - atmosphere.feed_QFF_density_metering( pressure, MSL_altitude); + atmosphere.air_density_metering( pressure, MSL_altitude); } void disregard_density_data( void) @@ -119,7 +118,7 @@ class navigator_t void reset_altitude( void) { - flight_observer.reset( atmosphere.get_negative_altitude(), GNSS_negative_altitude); + flight_observer.reset( atmosphere.get_negative_pressure_altitude(), GNSS_negative_altitude); } /** * @brief update pitot pressure diff --git a/NAV_Algorithms/old_data_structures.h b/NAV_Algorithms/old_data_structures.h deleted file mode 100644 index aed6212..0000000 --- a/NAV_Algorithms/old_data_structures.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * old_data_structures.h - * - * Created on: Nov 5, 2022 - * Author: schaefer - */ - -#ifndef OLD_DATA_STRUCTURES_H_ -#define OLD_DATA_STRUCTURES_H_ - -#include "system_configuration.h" -#include "quaternion.h" -#include "GNSS.h" -#include "data_structures.h" -#include "cmath" - -#pragma pack(push, 1) - -typedef struct -{ - float3vector acc; - float3vector gyro; - float3vector mag; - float3vector lowcost_acc; - float3vector lowcost_gyro; - float3vector lowcost_mag; - float pitot_pressure; - float static_pressure; - float absolute_pressure; //this is the second ms5611 on the PCB. - float static_sensor_temperature; //log temperature to monitor temperature in enclosure - float absolute_sensor_temperature; - float supply_voltage; //Measuring the supply voltage. Might be related to sensor noise. -#if WITH_DENSITY_DATA - float outside_air_temperature; //!< OAT from external sensor if installed - float outside_air_humidity; //!< 0.0 -> 1.0 NOT percent -#endif -} old_measurement_data_t; - -typedef struct -{ - float3vector position; //!< NED / meters - float3vector velocity; //!< NED / m/s - float3vector acceleration; //!< NED / m/s^2 (from velocity delta) - float heading_motion; // degrees - float speed_motion; // m/s - float3vector relPosNED; // - float relPosHeading; - float speed_acc; // speed accuracy m/s - double latitude; //!< degrees - double longitude; //!< degrees - - uint8_t year; - uint8_t month; - uint8_t day; - uint8_t hour; - - uint8_t minute; - uint8_t second; - uint8_t SATS_number; //!< number of tracked satellites - uint8_t sat_fix_type; //!< bit 0: SAT FIX, bit 1: SAT HEADING availale - -#if INCLUDING_NANO - int32_t nano; // nanoseconds from time stamp -#endif - - int16_t geo_sep_dm; //!< (WGS ellipsoid height - elevation MSL) in 0.1m units - uint16_t dummy; -} old_coordinates_t; - -typedef struct -{ - old_measurement_data_t m; - old_coordinates_t c; -} old_input_data_t; - -#pragma pack(pop) - -inline void new_format_from_old( measurement_data_t & out_m, coordinates_t & out_c, const old_input_data_t & in) -{ - out_m.acc = in.m.acc; - out_m.mag = in.m.mag; - out_m.gyro = in.m.gyro; - - out_m.static_pressure = in.m.static_pressure; - out_m.static_sensor_temperature = in.m.static_sensor_temperature; - out_m.pitot_pressure = in.m.pitot_pressure; - out_m.supply_voltage = in.m.supply_voltage; - - out_c.position = in.c.position; - out_c.velocity = in.c.velocity; - out_c.acceleration = in.c.acceleration; //!< NED / m/s^2 (from velocity delta) - out_c.heading_motion = in.c.heading_motion; // degrees - out_c.speed_motion = in.c.speed_motion; - out_c.relPosNED = in.c.relPosNED; // - out_c.relPosHeading = in.c.relPosHeading; - out_c.speed_acc = in.c.speed_acc; - out_c.latitude = in.c.latitude; - out_c.longitude = in.c.longitude; - - out_c.year = in.c.year; - out_c.month = in.c.month; - out_c.day = in.c.day; - out_c.hour = in.c.hour; - - out_c.minute = in.c.minute; - out_c.second = in.c.second; - out_c.geo_sep_dm = in.c.geo_sep_dm; - - out_c.nano = 0xefffffff; // not used in old format - - // patches - if( isnormal(out_c.relPosHeading)) - out_c.sat_fix_type = 3; // D-GNSS available - else - out_c.sat_fix_type = 1; - - out_c.SATS_number = 55; // just a joke ... - -#if GNSS_VERTICAL_SPEED_INVERTED // for simulation with some old data - out_c.velocity[DOWN] *= -1.0f; // earlier we recorded the wrong sign -#endif -} - -#endif /* OLD_DATA_STRUCTURES_H_ */ diff --git a/NAV_Algorithms/organizer.h b/NAV_Algorithms/organizer.h index 73efd95..a53ea20 100644 --- a/NAV_Algorithms/organizer.h +++ b/NAV_Algorithms/organizer.h @@ -30,6 +30,13 @@ #include "navigator.h" #include "earth_induction_model.h" +typedef struct +{ + float3vector acc_observed_left; + float3vector acc_observed_right; + float3vector acc_observed_level; +} vector_average_collection_t; + //! set of algorithms and data to be used by Larus flight sensor class organizer_t { @@ -43,20 +50,55 @@ class organizer_t } + void update_sensor_orientation_data( const vector_average_collection_t & values) + { + // resolve sensor orientation using measurements + float3vector front_down_sensor_helper = values.acc_observed_right.vector_multiply( values.acc_observed_left); + float3vector u_right_sensor = front_down_sensor_helper.vector_multiply( values.acc_observed_level); + u_right_sensor.normalize(); + + float3vector u_down_sensor = values.acc_observed_level * -1.0f; + u_down_sensor.normalize(); + + float3vector u_front_sensor=u_right_sensor.vector_multiply(u_down_sensor); + u_front_sensor.normalize(); + + // calculate the new rotation matrix using our calibration data + sensor_mapping.e[0][0]=u_front_sensor[0]; + sensor_mapping.e[0][1]=u_front_sensor[1]; + sensor_mapping.e[0][2]=u_front_sensor[2]; + + sensor_mapping.e[1][0]=u_right_sensor[0]; + sensor_mapping.e[1][1]=u_right_sensor[1]; + sensor_mapping.e[1][2]=u_right_sensor[2]; + + sensor_mapping.e[2][0]=u_down_sensor[0]; + sensor_mapping.e[2][1]=u_down_sensor[1]; + sensor_mapping.e[2][2]=u_down_sensor[2]; + + quaternion q; + q.from_rotation_matrix( sensor_mapping); + eulerangle euler = q; + + // make the change permanent + lock_EEPROM( false); + write_EEPROM_value( SENS_TILT_ROLL, euler.roll); + write_EEPROM_value( SENS_TILT_PITCH, euler.pitch); + write_EEPROM_value( SENS_TILT_YAW, euler.yaw); + lock_EEPROM( true); + } + void initialize_before_measurement( void) { pitot_offset= configuration (PITOT_OFFSET); pitot_span = configuration (PITOT_SPAN); QNH_offset = configuration (QNH_OFFSET); - { - quaternion q; - q.from_euler (configuration (SENS_TILT_ROLL), - configuration (SENS_TILT_PITCH), - configuration (SENS_TILT_YAW)); - q.get_rotation_matrix (sensor_mapping); - } - + quaternion q; + q.from_euler (configuration (SENS_TILT_ROLL), + configuration (SENS_TILT_PITCH), + configuration (SENS_TILT_YAW)); + q.get_rotation_matrix (sensor_mapping); } void update_magnetic_induction_data( double latitude, double longitude) @@ -104,10 +146,11 @@ class organizer_t return landing_detected; } - void set_attitude ( float roll, float nick, float present_heading) + void set_attitude ( float roll, float pitch, float present_heading) { - navigator.set_attitude ( roll, nick, present_heading); + navigator.set_attitude ( roll, pitch, present_heading); } + void update_GNSS_data( const coordinates_t &coordinates) { navigator.update_GNSS_data(coordinates); @@ -116,15 +159,9 @@ class organizer_t void update_every_10ms( output_data_t & output_data) { // rotate sensor coordinates into airframe coordinates -#if USE_LOWCOST_IMU == 1 - acc = sensor_mapping * output_data.m.lowcost_acc; - mag = sensor_mapping * output_data.m.lowcost_mag; - gyro = sensor_mapping * output_data.m.lowcost_gyro; -#else acc = sensor_mapping * output_data.m.acc; mag = sensor_mapping * output_data.m.mag; gyro = sensor_mapping * output_data.m.gyro; -#endif #if DEVELOPMENT_ADDITIONS output_data.body_acc = acc; diff --git a/NAV_Algorithms/persistent_data.cpp b/NAV_Algorithms/persistent_data.cpp index 9bc7c34..ea40781 100644 --- a/NAV_Algorithms/persistent_data.cpp +++ b/NAV_Algorithms/persistent_data.cpp @@ -45,7 +45,7 @@ ROM persistent_data_t PERSISTENT_DATA[]= {MAG_Z_OFF, "Mag_Z_Off", false, 0.0f, 0}, //! Induction sensor x offset signed / ( 10.0f / 32768 ) {MAG_Z_SCALE, "Mag_Z_Scale", false, 1.0f, 0}, //! Induction sensor x gain signed ( scale-factor = 1.0f + value / 32768 ) {MAG_STD_DEVIATION, "Mag_Calib_Err", false, 1e-2f, 0}, //! Magnetic calibration STD deviation / ( 1 % / 65536 ) - {MAG_AUTO_CALIB, "Mag_Auto_Calib", false, 1.0f, 0}, //! Magnetic calibration adjusted automatically + {MAG_AUTO_CALIB, "Mag_Auto_Calib", false, 2.0f, 0}, //! Magnetic calibration automatic { OFF=0, 2D=1, 3D=2 } {VARIO_TC, "Vario_TC", false, 2.0f, 0}, //! Vario time constant unsigned s / ( 100.0f / 65536 ) {VARIO_INT_TC, "Vario_Int_TC", false, 30.0f, 0}, //! Vario integrator time constant unsigned s / ( 100.0f / 65536 ) diff --git a/NAV_Algorithms/setup_tester.cpp b/NAV_Algorithms/setup_tester.cpp new file mode 100644 index 0000000..3f94c9c --- /dev/null +++ b/NAV_Algorithms/setup_tester.cpp @@ -0,0 +1,90 @@ +#include "float3vector.h" +#include "float3matrix.h" +#include "quaternion.h" + +const float ldi[]={0.1f, +0.25f, -10.0f}; // left wing down +const float rdi[]={0.1f, -0.15f, -10.0f}; // right wing down +const float hi[] ={0.0f, 0.0f, -10.0f}; // horizontal + +const float test1[] ={ 1.0f, 0.0f, 0.0f}; +const float test2[] ={ 0.57735f, 0.57735f, -0.57735f}; +const float test3[] ={ 0.57735f, -0.57735f, 0.57735f}; + +void setup_tester(void) +{ + // create measurements in aircraft body frame + float3vector left_down_body( ldi); + float3vector right_down_body( rdi); + float3vector horiz_body( hi); + + // setup the sensor orientation to test + quaternionsensor_q; +// sensor_q.from_euler( 45.0 * M_PI/180.0, 135.0 * M_PI/180.0, -30.0 * M_PI/180.0); +// sensor_q.from_euler( -135.0 * M_PI/180.0, 45.0 * M_PI/180.0, 150.0 * M_PI/180.0); +// sensor_q.from_euler( -10.0 * M_PI/180.0, 75.0 * M_PI/180.0, -45.0 * M_PI/180.0); + sensor_q.from_euler( 10.0 * M_PI/180.0, -75.0 * M_PI/180.0, +125.0 * M_PI/180.0); + + float3matrix sensor_2_body_assumption; + sensor_q.get_rotation_matrix(sensor_2_body_assumption); + + //check if we get the rotation angles using the matrix given + quaternion qin; + qin.from_rotation_matrix(sensor_2_body_assumption); + eulerangle euler_in = qin; + euler_in.roll *= 180/M_PI; + euler_in.pitch *= 180/M_PI; + euler_in.yaw *= 180/M_PI; + + // map measurements into the sensor frame + float3vector left_down_sensor = sensor_2_body_assumption.reverse_map( left_down_body); + float3vector right_down_sensor= sensor_2_body_assumption.reverse_map( right_down_body); + float3vector horiz_sensor = sensor_2_body_assumption.reverse_map( horiz_body); + + // resolve sensor orientation using measurements + float3vector front_down_sensor_helper = right_down_sensor.vector_multiply(left_down_sensor); + float3vector u_right_sensor = front_down_sensor_helper.vector_multiply(horiz_sensor); + u_right_sensor.normalize(); + + float3vector u_down_sensor = horiz_sensor * -1.0f; + u_down_sensor.normalize(); + + float3vector u_front_sensor=u_right_sensor.vector_multiply(u_down_sensor); + u_front_sensor.normalize(); + + // calculate the rotation matrix using our calibration data + float3matrix sensor_2_body; + sensor_2_body.e[0][0]=u_front_sensor[0]; + sensor_2_body.e[0][1]=u_front_sensor[1]; + sensor_2_body.e[0][2]=u_front_sensor[2]; + + sensor_2_body.e[1][0]=u_right_sensor[0]; + sensor_2_body.e[1][1]=u_right_sensor[1]; + sensor_2_body.e[1][2]=u_right_sensor[2]; + + sensor_2_body.e[2][0]=u_down_sensor[0]; + sensor_2_body.e[2][1]=u_down_sensor[1]; + sensor_2_body.e[2][2]=u_down_sensor[2]; + + // test if the rotation matrix recovers the body frame data + float3vector test_left_down_body = sensor_2_body * left_down_sensor; + float3vector test_right_down_body = sensor_2_body * right_down_sensor; + float3vector test_horiz_body = sensor_2_body * horiz_sensor; + + // test if the matrix provides pure rotation + float3vector vtest1 = sensor_2_body * float3vector( test1); + float test1_abs=vtest1.abs(); + float3vector vtest2 = sensor_2_body * float3vector( test2); + float test2_abs=vtest2.abs(); + float3vector vtest3 = sensor_2_body * float3vector( test3); + float test3_abs=vtest3.abs(); + + //check if we get the rotation angles using the matrix we have calculated + quaternion q; + q.from_rotation_matrix(sensor_2_body); + eulerangle euler = q; + euler.roll *= 180/M_PI; + euler.pitch *= 180/M_PI; + euler.yaw *= 180/M_PI; +} + + diff --git a/NAV_Algorithms/soaring_flight_averager.h b/NAV_Algorithms/soaring_flight_averager.h index f912401..9aeddd4 100644 --- a/NAV_Algorithms/soaring_flight_averager.h +++ b/NAV_Algorithms/soaring_flight_averager.h @@ -88,7 +88,8 @@ template + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + **************************************************************************/ + +#define PRINT_PARAMETERS 1 +#if PRINT_PARAMETERS +#include "stdio.h" +#endif + +#include "soft_iron_compensator.h" +#include "embedded_math.h" + +#include + +ROM float RECIP_SECTOR_SIZE = soft_iron_compensator_t::OBSERVATIONS / M_PI_F / TWO / TWO; + +bool soft_iron_compensator_t::learn ( const float3vector &induction_error, const quaternion &q, bool turning_right, float error_margin) +{ + float present_heading = q.get_heading(); + if( present_heading <0.0f) + present_heading += M_PI_F * TWO; + + int sector_index = (turning_right ? OBSERVATIONS / TWO : 0) + (unsigned)(present_heading * RECIP_SECTOR_SIZE); + + // if we have just left the last sector to be collected: report ready for computation + if( ( last_sector_collected != -1) && ( sector_index != last_sector_collected) && (++measurement_counter > 10000)) + return true; + + if( heading_sector_error[sector_index] > 1e19) // this sector has not been written before + ++populated_sectors; + + if ( heading_sector_error[sector_index] < error_margin) + return false; // we have collected more precise data for this observation earlier + + heading_sector_error[sector_index] = error_margin; + + for( unsigned axis = 0; axis < AXES; ++axis) + { + target_vector[axis][sector_index] = induction_error[axis]; + + observation_matrix[axis][sector_index][0] = 1.0f; + observation_matrix[axis][sector_index][1] = q[0] * q[1]; + observation_matrix[axis][sector_index][2] = q[0] * q[2]; + observation_matrix[axis][sector_index][3] = q[0] * q[3]; + observation_matrix[axis][sector_index][4] = q[1] * q[1]; + observation_matrix[axis][sector_index][5] = q[1] * q[2]; + observation_matrix[axis][sector_index][6] = q[1] * q[3]; + observation_matrix[axis][sector_index][7] = q[2] * q[2]; + observation_matrix[axis][sector_index][8] = q[2] * q[3]; + observation_matrix[axis][sector_index][9] = q[3] * q[3]; + } + + if( (last_sector_collected == -1) && populated_sectors >= OBSERVATIONS) + last_sector_collected = sector_index; + + return false; +} + +bool soft_iron_compensator_t::calculate( void) +{ +// if( buffer_used_for_calibration != INVALID) +// return false; + + computation_float_type temporary_solution_matrix[PARAMETERS][PARAMETERS]; + ARM_MATRIX_INSTANCE solution; + solution.numCols=PARAMETERS; + solution.numRows=PARAMETERS; + solution.pData = (computation_float_type *)temporary_solution_matrix; + + ARM_MATRIX_INSTANCE observations; + observations.numCols=PARAMETERS; + observations.numRows=OBSERVATIONS; + + computation_float_type transposed_matrix[PARAMETERS][OBSERVATIONS]; + ARM_MATRIX_INSTANCE observations_transposed; + observations_transposed.numCols=OBSERVATIONS; + observations_transposed.numRows=PARAMETERS; + observations_transposed.pData = (computation_float_type *)transposed_matrix; + + computation_float_type matrix_to_be_inverted_data[PARAMETERS][PARAMETERS]; + ARM_MATRIX_INSTANCE matrix_to_be_inverted; + matrix_to_be_inverted.numCols=PARAMETERS; + matrix_to_be_inverted.numRows=PARAMETERS; + matrix_to_be_inverted.pData = (computation_float_type *)matrix_to_be_inverted_data; + + computation_float_type solution_mapping_data[PARAMETERS][OBSERVATIONS]; + ARM_MATRIX_INSTANCE solution_mapping; + solution_mapping.numCols=OBSERVATIONS; + solution_mapping.numRows=PARAMETERS; + solution_mapping.pData = (computation_float_type *)solution_mapping_data; + + int next_buffer; + if( buffer_used_for_calibration == 0) + next_buffer = 1; + else + next_buffer = 0; + + for( unsigned axis = 0; axis < AXES; ++axis) + { + observations.pData = &(observation_matrix[axis][0][0]); + + // calculation, once per axis (FRONT, RIGHT, DOWN): + // target vector: T = ideal induction values for all observations + // single measurement: < 1 induction q0q1 q0q2 q0q3 q1q1 q1q2 q1q3 q2q2 q2q3 q3q3 > (single row) + // measurement matrix: M = single measurement * # OBSERVATIONS + // solution matrix: S = inverse( M_transposed * M) * M_transposed + // axis parameter set: P = S * T + + arm_status result = ARM_MAT_TRANS( &observations, &observations_transposed); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + result = ARM_MAT_MULT( &observations_transposed, &observations, &matrix_to_be_inverted); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + result = ARM_MAT_INVERSE( &matrix_to_be_inverted, &solution); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + result = ARM_MAT_MULT( &solution, &observations_transposed, &solution_mapping); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + + ARM_MATRIX_INSTANCE target_vector_inst; + target_vector_inst.numCols=1; + target_vector_inst.numRows=OBSERVATIONS; + target_vector_inst.pData=&(target_vector[axis][0]); + + ARM_MATRIX_INSTANCE axis_parameter_set; + axis_parameter_set.numCols=1; + axis_parameter_set.numRows=PARAMETERS; + axis_parameter_set.pData=&(c[next_buffer][axis][0]); + result = ARM_MAT_MULT( &solution_mapping, &target_vector_inst, &axis_parameter_set); + if( result != ARM_MATH_SUCCESS) + { + start_learning(); // discard data + return false; + } + if( buffer_used_for_calibration != INVALID) // if we already had a valid parameter set + { + int other_buffer = next_buffer == 1 ? 0 : 1; + for( unsigned i=0; i &q) + { + if( buffer_used_for_calibration == INVALID) // we do not have a valid calibration + return float3vector(); + + unsigned b=buffer_used_for_calibration; // just to save expensive characters ... + + float3vector retv; + for( int i = 0; i < 3; ++i) + { + retv[i] = + c[b][i][0] + + c[b][i][1] * q[0] * q[1] + c[b][i][2] * q[0] * q[2] + c[b][i][3] * q[0] * q[3] + + c[b][i][4] * q[1] * q[1] + c[b][i][5] * q[1] * q[2] + c[b][i][6] * q[1] * q[3] + + c[b][i][7] * q[2] * q[2] + c[b][i][8] * q[2] * q[3] + c[b][i][9] * q[3] * q[3] ; + } + return retv; + } diff --git a/NAV_Algorithms/soft_iron_compensator.h b/NAV_Algorithms/soft_iron_compensator.h new file mode 100644 index 0000000..328ebbf --- /dev/null +++ b/NAV_Algorithms/soft_iron_compensator.h @@ -0,0 +1,116 @@ +/***********************************************************************//** + * @file compass_calibrator_3D.h + * @brief induction sensor calibration and magnetic disturbance compensation + * @author Dr. Klaus Schaefer + * @copyright Copyright 20.8.2024 Dr. Klaus Schaefer. All rights reserved. + * @license This project is released under the GNU Public License GPL-3.0 + + + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + + **************************************************************************/ + +#ifndef NAV_ALGORITHMS_SOFT_IRON_COMPENSATOR_H_ +#define NAV_ALGORITHMS_SOFT_IRON_COMPENSATOR_H_ + +#include "embedded_memory.h" +#include "embedded_math.h" +#include "float3vector.h" +#include "quaternion.h" + +#include "matrix_functions.h" + +#if 0 +typedef double computation_float_type; +#define ARM_MATRIX_INSTANCE arm_matrix_instance_f64 +#define ARM_MAT_TRANS arm_mat_trans_f64 +#define ARM_MAT_MULT arm_mat_mult_f64 +#define ARM_MAT_INVERSE arm_mat_inverse_f64 +#else +typedef float computation_float_type; +#define ARM_MATRIX_INSTANCE arm_matrix_instance_f32 +#define ARM_MAT_TRANS arm_mat_trans_f32 +#define ARM_MAT_MULT arm_mat_mult_f32 +#define ARM_MAT_INVERSE arm_mat_inverse_f32 +#endif + +//! 3 dimensional magnetic calibration and error compensation mechanism +class soft_iron_compensator_t +{ +public: + enum { AXES=3, PARAMETERS=10, OBSERVATIONS=24, INVALID=-1}; + + soft_iron_compensator_t( void) + : buffer_used_for_calibration(INVALID), + measurement_counter(0) + { + start_learning(); + } + + void start_learning( void) + { + populated_sectors = 0; + last_sector_collected=-1; + measurement_counter=0; + for( unsigned i=0; i &q, bool turning_right, float error_margin); +float3vector compensate( const float3vector &induction, const quaternion &q); + bool calculate( void); + bool available( void) const + { + return buffer_used_for_calibration != INVALID; + } + + const computation_float_type * get_current_parameters( void) const + { + if( buffer_used_for_calibration == INVALID) + return 0; + + return &(c[buffer_used_for_calibration][0][0]); + } + + void set_current_parameters( const float * source) + { + int next_buffer; + if( buffer_used_for_calibration != 0) + next_buffer = 0; + else + next_buffer = 1; + + computation_float_type * destination = &(c[next_buffer][0][0]); + for( unsigned i=0; i < AXES * PARAMETERS; ++i) + *destination++ = *source++; + + buffer_used_for_calibration = next_buffer; + } + +private: + int buffer_used_for_calibration; + unsigned populated_sectors; + int last_sector_collected; + unsigned measurement_counter; + computation_float_type c[2][AXES][PARAMETERS]; // double buffering for multi-thrading support + computation_float_type target_vector[AXES][OBSERVATIONS]; + computation_float_type observation_matrix[AXES][OBSERVATIONS][PARAMETERS]; + computation_float_type heading_sector_error[OBSERVATIONS]; +}; + +extern soft_iron_compensator_t soft_iron_compensator; +void trigger_soft_iron_compensator_calculation(void); + +#endif /* NAV_ALGORITHMS_SOFT_IRON_COMPENSATOR_H_ */ diff --git a/NAV_Algorithms/variometer.cpp b/NAV_Algorithms/variometer.cpp index bec1e47..a10caee 100644 --- a/NAV_Algorithms/variometer.cpp +++ b/NAV_Algorithms/variometer.cpp @@ -41,8 +41,12 @@ void variometer_t::update_at_100Hz ( bool GNSS_fix_avaliable ) { +#if USE_OLD_FASHIONED_PRESSURE_VARIO + vario_uncompensated_pressure = pressure_vario_differentiator.respond( pressure_altitude); +#else vario_uncompensated_pressure = KalmanVario_pressure.update ( pressure_altitude, ahrs_acceleration[DOWN]); +#endif speed_compensation_IAS = kinetic_energy_differentiator.respond ( IAS * IAS * ONE_DIV_BY_GRAVITY_TIMES_2); vario_averager_pressure.respond ( @@ -90,8 +94,23 @@ void variometer_t::update_at_100Hz ( // speed compensation type 3 comes from the derivative of the specific energy speed_compensation_energy_3 = specific_energy_differentiator.respond ( specific_energy); + // speed-compensation type 4 is the product of acceleration and velocity, both calculated along the heading axis + float3vector kalman_air_velocity; + kalman_air_velocity[NORTH] = Kalman_v_a_observer_N.get_x ( Kalman_V_A_Aoff_observer_t::VELOCITY); + kalman_air_velocity[EAST] = Kalman_v_a_observer_E.get_x ( Kalman_V_A_Aoff_observer_t::VELOCITY); + kalman_air_velocity[DOWN] = KalmanVario_GNSS.get_x ( KalmanVario_PVA_t::VARIO); + float air_velocity_projected = kalman_air_velocity * heading_vector; + + acceleration[NORTH] = Kalman_v_a_observer_N.get_x ( Kalman_V_A_Aoff_observer_t::ACCELERATION); + acceleration[EAST] = Kalman_v_a_observer_E.get_x ( Kalman_V_A_Aoff_observer_t::ACCELERATION); + acceleration[DOWN] = KalmanVario_GNSS.get_x ( KalmanVario_PVA_t::ACCELERATION_OBSERVED); + float acceleration_projected = acceleration * heading_vector; + + speed_compensation_projected_4 = air_velocity_projected * acceleration_projected * RECIP_GRAVITY; + // blending of three mechanisms for speed-compensation - speed_compensation_GNSS = GNSS_INS_speedcomp_fusioner.respond( 0.5f * (speed_compensation_INS_GNSS_1 + speed_compensation_kalman_2), speed_compensation_energy_3); +// speed_compensation_GNSS = GNSS_INS_speedcomp_fusioner.respond( 0.3333333f * (speed_compensation_INS_GNSS_1 + speed_compensation_kalman_2 + speed_compensation_projected_4), speed_compensation_energy_3); + speed_compensation_GNSS = GNSS_INS_speedcomp_fusioner.respond( 0.5 * (speed_compensation_INS_GNSS_1 + speed_compensation_kalman_2), speed_compensation_energy_3); vario_averager_GNSS.respond ( vario_uncompensated_GNSS + speed_compensation_GNSS); } } diff --git a/NAV_Algorithms/variometer.h b/NAV_Algorithms/variometer.h index f1ffcfd..6938a20 100644 --- a/NAV_Algorithms/variometer.h +++ b/NAV_Algorithms/variometer.h @@ -56,6 +56,9 @@ class variometer_t KalmanVario_GNSS( 0.0f, 0.0f, 0.0f, - GRAVITY), KalmanVario_pressure( 0.0f, 0.0f, 0.0f, - GRAVITY), specific_energy_differentiator( 1.0f, FAST_SAMPLING_TIME), +#if USE_OLD_FASHIONED_PRESSURE_VARIO + pressure_vario_differentiator( 1.0f, FAST_SAMPLING_TIME), +#endif Kalman_v_a_observer_N(), Kalman_v_a_observer_E(), GNSS_INS_speedcomp_fusioner(SPEED_COMPENSATION_FUSIONER_FEEDBACK), @@ -66,7 +69,8 @@ class variometer_t specific_energy(0.0f), speed_compensation_INS_GNSS_1(0.0f), speed_compensation_kalman_2(0.0f), - speed_compensation_energy_3(0.0f) + speed_compensation_energy_3(0.0f), + speed_compensation_projected_4(0.0f) { }; void update_at_100Hz @@ -97,9 +101,12 @@ class variometer_t return speed_compensation_kalman_2; break; - default: + case 2: return speed_compensation_energy_3; break; + default: + return speed_compensation_projected_4; + break; } } @@ -147,6 +154,9 @@ class variometer_t KalmanVario_PVA_t KalmanVario_GNSS; KalmanVario_t KalmanVario_pressure; differentiatorspecific_energy_differentiator; +#if USE_OLD_FASHIONED_PRESSURE_VARIO + differentiatorpressure_vario_differentiator; +#endif Kalman_V_A_Aoff_observer_t Kalman_v_a_observer_N; Kalman_V_A_Aoff_observer_t Kalman_v_a_observer_E; HP_LP_fusion GNSS_INS_speedcomp_fusioner; @@ -161,6 +171,7 @@ class variometer_t float speed_compensation_INS_GNSS_1; float speed_compensation_kalman_2; float speed_compensation_energy_3; + float speed_compensation_projected_4; }; #endif /* VARIOMETER_H_ */ diff --git a/NAV_Algorithms/wind_observer.h b/NAV_Algorithms/wind_observer.h index 308cb94..0ebe74e 100644 --- a/NAV_Algorithms/wind_observer.h +++ b/NAV_Algorithms/wind_observer.h @@ -31,6 +31,7 @@ #include "accumulating_averager.h" #include "persistent_data.h" +//! mechanisms to filter wind data class wind_oberserver_t { public: @@ -95,7 +96,7 @@ class wind_oberserver_t else wind_average_observer.update( instant_wind, - ahrs.get_euler ().y, + ahrs.get_euler ().yaw, circling_state); float3vector relative_wind_NAV = wind_resampler_100_10Hz.get_output() - wind_average_observer.get_output(); @@ -109,11 +110,12 @@ class wind_oberserver_t if( old_circling_state == TRANSITION) // when starting to circle { circling_wind_averager.reset( wind_average_observer.get_output(), 100); + corrected_wind_averager.settle(wind_average_observer.get_output()); relative_wind_observer.reset({0}); } else { - relative_wind_observer.update( relative_wind_BODY, ahrs.get_euler ().y, ahrs.get_circling_state ()); + relative_wind_observer.update( relative_wind_BODY, ahrs.get_euler ().yaw, ahrs.get_circling_state ()); wind_correction_nav = ahrs.get_body2nav() * relative_wind_observer.get_output(); wind_correction_nav[DOWN]=0.0f; @@ -155,7 +157,7 @@ class wind_oberserver_t pt2 wind_resampler_100_10Hz; pt2 instant_wind_averager; soaring_flight_averager< float3vector, true> wind_average_observer; // configure wind average clamping on first circle - soaring_flight_averager< float3vector, false, false> relative_wind_observer; + soaring_flight_averager< float3vector, true, true> relative_wind_observer; pt2 corrected_wind_averager; accumulating_averager < float3vector> circling_wind_averager; circle_state_t circling_state; diff --git a/Output_Formatter/CAN_output.cpp b/Output_Formatter/CAN_output.cpp index 747d21b..2fbdab8 100644 --- a/Output_Formatter/CAN_output.cpp +++ b/Output_Formatter/CAN_output.cpp @@ -29,13 +29,15 @@ #include "system_state.h" #include "CAN_gateway.h" +extern uint32_t UNIQUE_ID[4]; // MPU silicon ID + #define DEGREE_2_RAD 1.7453292e-2f -#ifdef CAN_FORMAT_2021 +#if CAN_FORMAT_2021 enum CAN_ID_SENSOR { - c_CAN_Id_EulerAngles = 0x101, //!< int16_t roll nick yaw / 1/1000 rad + c_CAN_Id_EulerAngles = 0x101, //!< int16_t roll pitch yaw / 1/1000 rad c_CAN_Id_Airspeed = 0x102, //!< int16_t TAS, IAS / km/h c_CAN_Id_Vario = 0x103, //!< int16_t vario, vario-integrator / mm/s c_CAN_Id_GPS_Date_Time= 0x104, //!< uint8_t year-2000, month, day, hour, mins, secs @@ -46,7 +48,7 @@ enum CAN_ID_SENSOR c_CAN_Id_Atmosphere = 0x109, //!< uint32_t pressure / Pa uint32_t density / g/m^3 c_CAN_Id_GPS_Sats = 0x10a, //!< uin8_t No of Sats, uint8_t Fix-Type NO=0 2D=1 3D=2 RTK=3 c_CAN_Id_Acceleration = 0x10b, //!< int16_t G-force mm / s^2, int16_t vertical acc mm / m^2, vario uncomp mm / s, u_int8_t circle mode - c_CAN_Id_TurnCoord = 0x10c, //!< slip angle int16_t 1/1000 rad, turn rate int16_t 1/1000 rad/s, nick angle 1/1000 rad + c_CAN_Id_TurnCoord = 0x10c, //!< slip angle int16_t 1/1000 rad, turn rate int16_t 1/1000 rad/s, pitch angle 1/1000 rad c_CAN_Id_SystemState = 0x10d, //!< u32 system_state, u32 git_tag dec c_CID_KSB_Vdd = 0x112, //!< unit16_t as voltage * 10 }; @@ -59,9 +61,9 @@ void CAN_output ( const output_data_t &x, bool horizon_activated) { p.id=c_CAN_Id_EulerAngles; // 0x101 p.dlc=6; - p.data_sh[0] = (int16_t)(round(x.euler.r * 1000.0f)); // unit = 1/1000 RAD - p.data_sh[1] = (int16_t)(round(x.euler.p * 1000.0f)); - p.data_sh[2] = (int16_t)(round(x.euler.y * 1000.0f)); + p.data_sh[0] = (int16_t)(round(x.euler.roll * 1000.0f)); // unit = 1/1000 RAD + p.data_sh[1] = (int16_t)(round(x.euler.pitch * 1000.0f)); + p.data_sh[2] = (int16_t)(round(x.euler.yaw * 1000.0f)); CAN_send(p, 1); } else @@ -70,7 +72,7 @@ void CAN_output ( const output_data_t &x, bool horizon_activated) p.dlc=6; p.data_sh[0] = ZERO; p.data_sh[1] = ZERO; - p.data_sh[2] = (int16_t)(round(x.euler.y * 1000.0f)); + p.data_sh[2] = (int16_t)(round(x.euler.yaw * 1000.0f)); CAN_send(p, 1); } @@ -160,7 +162,7 @@ void CAN_output ( const output_data_t &x, bool horizon_activated) p.dlc=6; p.data_sh[0] = (int16_t)(round(x.slip_angle * 1000.0f)); // slip angle in radiant from body acceleration p.data_sh[1] = (int16_t)(round(x.turn_rate * 1000.0f)); // turn rate rad/s - p.data_sh[2] = (int16_t)(round(x.pitch_angle * 1000.0f)); // nick angle in radiant from body acceleration + p.data_sh[2] = (int16_t)(round(x.pitch_angle * 1000.0f)); // pitch angle in radiant from body acceleration if( CAN_send(p, 1)) // check CAN for timeout this time system_state |= CAN_OUTPUT_ACTIVE; else @@ -186,98 +188,106 @@ enum CAN_ID_SENSOR { // all values in SI-STD- (metric) units, angles in radians // format IEEE float32 little-endian - CAN_Id_Roll_Nick = 0x400, //!< float roll-angle, float nick-angle (FRONT-RIGHT-DOWN-system) - CAN_Id_Heading = 0x401, //!< float true heading, [OPTIONAL float magnetic declination] - CAN_Id_Airspeed = 0x402, //!< float TAS, float IAS / m/s - CAN_Id_Vario = 0x403, //!< float vario, float vario-average / m/s - - CAN_Id_GPS_Date_Time = 0x404, //!< uint8_t year-2000, month, day, hour, mins, secs - CAN_Id_GPS_LatLon = 0x405, //!< float latitude, float longitude / degrees(!) - CAN_Id_GPS_Alt = 0x406, //!< float MSL altitude,float geo separation - CAN_Id_GPS_Trk_Spd = 0x407, //!< float ground track, float groundspeed / m/s - CAN_Id_GPS_Sats = 0x408, //!< uin8_t No of Sats, (uint8_t)bool SAT FIX valid, (uint8_t)bool SAT HEADING available - - CAN_Id_Wind = 0x409, //!< float wind direction (where from), float wind speed - CAN_Id_Wind_Average = 0x40a, //!< float average wind direction, float average wind speed m/s - CAN_Id_Atmosphere = 0x40b, //!< float ambient pressure / Pa, float air density / kg/m^3 - CAN_Id_Acceleration = 0x40c, //!< float body-frame G-load m/s^2, body acceleration angle roll-axis - CAN_Id_TurnRate = 0x40d, //!< float turn rate to the right, (uint8_t) (enum { STRAIGHT_FLIGHT, TRANSITION, CIRCLING} ) - CAN_Id_SystemState = 0x40e, //!< u32 system_state, u32 git_tag_dec - CAN_Id_Voltage = 0x40f, //!< float supply voltage + CAN_Id_Roll_Pitch = 0x120, //!< float roll-angle, float pitch-angle (FRONT-RIGHT-DOWN-system) + CAN_Id_Heading = 0x121, //!< float true heading, turn-rate + CAN_Id_Airspeed = 0x122, //!< float TAS, float IAS / m/s + CAN_Id_Vario = 0x123, //!< float vario, float vario-average / m/s + CAN_Id_Wind = 0x124, //!< float wind direction (where from), float wind speed + CAN_Id_Wind_Average = 0x125, //!< float average wind direction, float average wind speed m/s + CAN_Id_Atmosphere = 0x126, //!< float ambient pressure / Pa, float air density / kg/m^3 + CAN_Id_Acceleration = 0x127, //!< float body-frame G-load m/s^2, vertical acceleration world frame + CAN_Id_SlipPitch = 0x128, //!< float slip angle from body-acc, float pitch angle from body-acc + CAN_Id_Voltage_Circle = 0x129, //!< float supply voltage, uint8_t circle-mode + CAN_Id_SystemState = 0x12a, //!< u32 system_state, u32 git_tag dec + + CAN_Id_GPS_Date_Time = 0x140, //!< uint8_t year-2000, month, day, hour, mins, secs + CAN_Id_GPS_Lat = 0x141, //!< double latitude + CAN_Id_GPS_Lon = 0x142, //!< double longitude + CAN_Id_GPS_Alt = 0x143, //!< float MSL altitude, float geo separation + CAN_Id_GPS_Trk_Spd = 0x144, //!< float ground track, float groundspeed / m/s + CAN_Id_GPS_Sats = 0x145, //!< uin8_t No of Sats, (uint8_t)bool SAT FIX type + + CAN_Id_Heartbeat_Sens = 0x520, + CAN_Id_Heartbeat_GNSS = 0x540, + CAN_Id_Heartbeat_IMU = 0x560 + }; -void CAN_output ( const output_data_t &x) +void CAN_output ( const output_data_t &x, bool horizon_activated) { - CANpacket p; + CANpacket p( 0x7ff, 8); + if( horizon_activated) + { + p.id=CAN_Id_Roll_Pitch; + p.dlc=8; + p.data_f[0] = x.euler.roll; + p.data_f[1] = x.euler.pitch; + CAN_send(p, 1); + } -#if HORIZON_DATA_SECRET == 0 - p.id=CAN_Id_Roll_Nick; - p.dlc=8; - p.data_f[0] = x.euler.r; - p.data_f[1] = x.euler.n; - CAN_send(p, 1); -#endif p.id=CAN_Id_Heading; - p.dlc=4; - p.data_f[0] = x.euler.y; + p.data_f[0] = x.euler.yaw; + p.data_f[1] = x.turn_rate; CAN_send(p, 1); p.id=CAN_Id_Airspeed; - p.dlc=8; p.data_f[0] = x.TAS; p.data_f[1] = x.IAS; CAN_send(p, 1); p.id=CAN_Id_Vario; - p.dlc=8; p.data_f[0] = x.vario; p.data_f[1] = x.integrator_vario; CAN_send(p, 1); p.id=CAN_Id_Wind; - p.dlc=8; p.data_f[0] = ATAN2( - x.wind[EAST], - x.wind[NORTH]); p.data_f[1] = x.wind.abs(); CAN_send(p, 1); p.id=CAN_Id_Wind_Average; - p.dlc=8; p.data_f[0] = ATAN2( - x.wind_average[EAST], - x.wind_average[NORTH]); - p.data_f[1] = x.wind.abs(); + p.data_f[1] = x.wind_average.abs(); CAN_send(p, 1); p.id=CAN_Id_Atmosphere; - p.dlc=8; p.data_f[0] = x.m.static_pressure; p.data_f[1] = x.air_density; CAN_send(p, 1); p.id=CAN_Id_Acceleration; - p.dlc=7; p.data_f[0] = x.G_load; - p.data_f[1] = x.slip_angle; + p.data_f[1] = x.effective_vertical_acceleration; CAN_send(p, 1); - p.id=CAN_Id_TurnRate; + p.id=CAN_Id_SlipPitch; + p.data_f[0] = x.slip_angle; // pendulum readings + p.data_f[1] = x.pitch_angle; + CAN_send(p, 1); + + p.id=CAN_Id_Voltage_Circle; p.dlc=5; - p.data_f[0] = x.turn_rate; + p.data_f[0] = x.m.supply_voltage; p.data_b[4] = (uint8_t)(x.circle_mode); - + CAN_send(p, 1); p.id=CAN_Id_GPS_Date_Time; - p.dlc=6; - p.data_b[0] = x.c.year; - p.data_b[1] = x.c.month; - p.data_b[2] = x.c.day; - p.data_b[3] = x.c.hour; - p.data_b[4] = x.c.minute; - p.data_b[5] = x.c.second; + p.dlc=7; + p.data_h[0] = x.c.year + 2000; // GNSS reports only 2000 + x + p.data_b[2] = x.c.month; + p.data_b[3] = x.c.day; + p.data_b[4] = x.c.hour; + p.data_b[5] = x.c.minute; + p.data_b[6] = x.c.second; CAN_send(p, 1); - p.id=CAN_Id_GPS_LatLon; + p.id=CAN_Id_GPS_Lat; p.dlc=8; - p.data_f[0] = (float)(x.c.latitude); // -> 4m of accuracy - p.data_f[1] = (float)(x.c.longitude); + p.data_d = x.c.latitude; + CAN_send(p, 1); + + p.id=CAN_Id_GPS_Lon; + p.data_d = x.c.longitude; CAN_send(p, 1); p.id=CAN_Id_GPS_Alt; // 0x106 @@ -287,8 +297,7 @@ void CAN_output ( const output_data_t &x) CAN_send(p, 1); p.id=CAN_Id_GPS_Trk_Spd; - p.dlc=8; - p.data_f[0] = x.c.heading_motion * DEGREE_2_RAD; + p.data_f[0] = x.c.heading_motion; p.data_f[1] = x.c.speed_motion; CAN_send(p, 1); @@ -296,21 +305,12 @@ void CAN_output ( const output_data_t &x) p.dlc=2; p.data_b[0] = x.c.SATS_number; p.data_b[1] = x.c.sat_fix_type; - CAN_send(p, 1); - - p.id=CAN_Id_Voltage; - p.dlc=4; - p.data_f[0] = x.m.supply_voltage; if( CAN_send(p, 1)) // check CAN for timeout this time system_state |= CAN_OUTPUT_ACTIVE; else system_state &= ~CAN_OUTPUT_ACTIVE; -#ifndef GIT_TAG_DEC -#define GIT_TAG_DEC 0xffffffff -#endif - p.id=CAN_Id_SystemState; p.dlc=8; p.data_w[0] = system_state; @@ -318,5 +318,20 @@ void CAN_output ( const output_data_t &x) CAN_send(p, 1); } +void CAN_heartbeat( void) +{ + CANpacket p( CAN_Id_Heartbeat_Sens, 8); + + p.data_h[0] = 2; + p.data_h[1] = 0; + p.data_w[1] = UNIQUE_ID[0]; + CAN_send(p, 1); + + p.id=CAN_Id_Heartbeat_GNSS; + p.data_h[0] = 3; + p.data_h[1] = 0; + p.data_w[1] = UNIQUE_ID[0]; + CAN_send(p, 1); +} #endif diff --git a/Output_Formatter/CAN_output.h b/Output_Formatter/CAN_output.h index 84b5587..a024c0f 100644 --- a/Output_Formatter/CAN_output.h +++ b/Output_Formatter/CAN_output.h @@ -36,7 +36,29 @@ #define SYSWIDECONFIG_ITEM_ID_PILOT_KG 5 #define SYSWIDECONFIG_ITEM_ID_VARIO_MODE 6 +#define SENS_CONFIG_ITEM_ROLL 0x2000 +#define SENS_CONFIG_ITEM_PITCH 0x2001 +#define SENS_CONFIG_ITEM_YAW 0x2002 +#define SENS_CONFIG_ITEM_PIT_OFF 0x2003 +#define SENS_CONFIG_ITEM_PIT_SPAN 0x2004 +#define SENS_CONFIG_ITEM_QNH_OFF 0x2005 +#define SENS_CONFIG_MAG_CALIB 0x2006 +#define SENS_CONFIG_VARIO_TC 0x2007 +#define SENS_CONFIG_VARIO_INT_TC 0x2008 +#define SENS_CONFIG_WIND_TC 0x2009 +#define SENS_CONFIG_WIND_MEAN_TC 0x200a +#define SENS_CONFIG_GNSS_CONFIG 0x200b +#define SENS_CONFIG_ANT_BASELEN 0x200c +#define SENS_CONFIG_ANT_SLAVE_DOWN 0x200d +#define SENS_CONFIG_ANT_SLAVE_RIGHT 0x200e + +#define CMD_MEASURE_LEFT 0x3000 +#define CMD_MEASURE_RIGHT 0x3001 +#define CMD_MEASURE_LEVEL 0x3002 +#define CMD_CALCULATE 0x3003 +#define CMD_TUNE 0x3004 void CAN_output ( const output_data_t &x, bool horizon_activated); +void CAN_heartbeat( void); #endif /* SRC_CAN_OUTPUT_H_ */ diff --git a/Output_Formatter/NMEA_format.cpp b/Output_Formatter/NMEA_format.cpp index 32c5d70..824d58b 100644 --- a/Output_Formatter/NMEA_format.cpp +++ b/Output_Formatter/NMEA_format.cpp @@ -439,9 +439,9 @@ void format_NMEA_string_fast( const output_data_t &output_data, string_buffer_t // aircraft attitude if( horizon_available) - format_PLARA(output_data.euler.r, output_data.euler.p, output_data.euler.y, next); + format_PLARA(output_data.euler.roll, output_data.euler.pitch, output_data.euler.yaw, next); else - format_PLARA( ZERO, ZERO, output_data.euler.y, next); + format_PLARA( ZERO, ZERO, output_data.euler.yaw, next); // report instant and average total-energy-compensated variometer, pressure altitude, TAS format_PLARV ( output_data.vario, diff --git a/Output_Formatter/generic_CAN_driver.h b/Output_Formatter/generic_CAN_driver.h index 3eb4385..65bce50 100644 --- a/Output_Formatter/generic_CAN_driver.h +++ b/Output_Formatter/generic_CAN_driver.h @@ -53,6 +53,7 @@ class CANpacket uint32_t data_w[2]; //!< data seen as 2 times uint32_t int32_t data_sw[2]; //!< data seen as 2 times int32_t float data_f[2]; //!< data seen as 2 times 32-bit floats + double data_d; //!< data seen as 64-bit float uint64_t data_l; //!< data seen as 64-bit integer }; } ; diff --git a/README.md b/README.md index cbb1951..5a0e10e 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ The algorithms include: The sensor output as a set of NMEA sentences ist defined in our subproject [larus-NMEA-protocol](https://github.com/larus-breeze/doc_larus/blob/master/documentation/Larus_NMEA_Protocol.md) +To get an idea about the software structure and the algorithms used you may want to browse the [Doxygen-generated documentation of the library.](https://schaefer.eit.h-da.de/Larus_SIL) + # This library is designed to be imported into another project via a .gitmodules file. Add as submodule to repository: