arm_mat_trans_f16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_mat_trans_f16.c 4 * Description: Floating-point matrix transpose 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/matrix_functions_f16.h" 30 31 #if defined(ARM_FLOAT16_SUPPORTED) 32 33 34 /** 35 @ingroup groupMatrix 36 */ 37 38 /** 39 @addtogroup MatrixTrans 40 @{ 41 */ 42 43 /** 44 @brief Floating-point matrix transpose. 45 @param[in] pSrc points to input matrix 46 @param[out] pDst points to output matrix 47 @return execution status 48 - \ref ARM_MATH_SUCCESS : Operation successful 49 - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed 50 */ 51 #if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) 52 53 #include "arm_helium_utils.h" 54 55 arm_status arm_mat_trans_f16( 56 const arm_matrix_instance_f16 * pSrc, 57 arm_matrix_instance_f16 * pDst) 58 { 59 arm_status status; /* status of matrix transpose */ 60 61 #ifdef ARM_MATH_MATRIX_CHECK 62 63 /* Check for matrix mismatch condition */ 64 if ((pSrc->numRows != pDst->numCols) || 65 (pSrc->numCols != pDst->numRows) ) 66 { 67 /* Set status as ARM_MATH_SIZE_MISMATCH */ 68 status = ARM_MATH_SIZE_MISMATCH; 69 } 70 else 71 72 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 73 74 { 75 if (pDst->numRows == pDst->numCols) 76 { 77 if (pDst->numCols == 1) 78 { 79 pDst->pData[0] = pSrc->pData[0]; 80 return(ARM_MATH_SUCCESS); 81 } 82 if (pDst->numCols == 2) 83 return arm_mat_trans_16bit_2x2((uint16_t *)pSrc->pData, (uint16_t *)pDst->pData); 84 if (pDst->numCols == 3) 85 return arm_mat_trans_16bit_3x3_mve((uint16_t *)pSrc->pData, (uint16_t *)pDst->pData); 86 if (pDst->numCols == 4) 87 return arm_mat_trans_16bit_4x4_mve((uint16_t *)pSrc->pData, (uint16_t *)pDst->pData); 88 } 89 90 arm_mat_trans_16bit_generic(pSrc->numRows, pSrc->numCols, (uint16_t *)pSrc->pData, (uint16_t *)pDst->pData); 91 /* Set status as ARM_MATH_SUCCESS */ 92 status = ARM_MATH_SUCCESS; 93 } 94 95 /* Return to application */ 96 return (status); 97 } 98 99 #else 100 101 arm_status arm_mat_trans_f16( 102 const arm_matrix_instance_f16 * pSrc, 103 arm_matrix_instance_f16 * pDst) 104 { 105 float16_t *pIn = pSrc->pData; /* input data matrix pointer */ 106 float16_t *pOut = pDst->pData; /* output data matrix pointer */ 107 float16_t *px; /* Temporary output data matrix pointer */ 108 uint16_t nRows = pSrc->numRows; /* number of rows */ 109 uint16_t nCols = pSrc->numCols; /* number of columns */ 110 uint32_t col, row = nRows, i = 0U; /* Loop counters */ 111 arm_status status; /* status of matrix transpose */ 112 113 #ifdef ARM_MATH_MATRIX_CHECK 114 115 /* Check for matrix mismatch condition */ 116 if ((pSrc->numRows != pDst->numCols) || 117 (pSrc->numCols != pDst->numRows) ) 118 { 119 /* Set status as ARM_MATH_SIZE_MISMATCH */ 120 status = ARM_MATH_SIZE_MISMATCH; 121 } 122 else 123 124 #endif /* #ifdef ARM_MATH_MATRIX_CHECK */ 125 126 { 127 /* Matrix transpose by exchanging the rows with columns */ 128 /* row loop */ 129 do 130 { 131 /* Pointer px is set to starting address of column being processed */ 132 px = pOut + i; 133 134 #if defined (ARM_MATH_LOOPUNROLL) 135 136 /* Loop unrolling: Compute 4 outputs at a time */ 137 col = nCols >> 2U; 138 139 while (col > 0U) /* column loop */ 140 { 141 /* Read and store input element in destination */ 142 *px = *pIn++; 143 /* Update pointer px to point to next row of transposed matrix */ 144 px += nRows; 145 146 *px = *pIn++; 147 px += nRows; 148 149 *px = *pIn++; 150 px += nRows; 151 152 *px = *pIn++; 153 px += nRows; 154 155 /* Decrement column loop counter */ 156 col--; 157 } 158 159 /* Loop unrolling: Compute remaining outputs */ 160 col = nCols % 0x4U; 161 162 #else 163 164 /* Initialize col with number of samples */ 165 col = nCols; 166 167 #endif /* #if defined (ARM_MATH_LOOPUNROLL) */ 168 169 while (col > 0U) 170 { 171 /* Read and store input element in destination */ 172 *px = *pIn++; 173 174 /* Update pointer px to point to next row of transposed matrix */ 175 px += nRows; 176 177 /* Decrement column loop counter */ 178 col--; 179 } 180 181 i++; 182 183 /* Decrement row loop counter */ 184 row--; 185 186 } while (row > 0U); /* row loop end */ 187 188 /* Set status as ARM_MATH_SUCCESS */ 189 status = ARM_MATH_SUCCESS; 190 } 191 192 /* Return to application */ 193 return (status); 194 } 195 #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */ 196 197 /** 198 * @} end of MatrixTrans group 199 */ 200 201 #endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 202