/ Drivers / CMSIS / DSP / Source / MatrixFunctions / arm_mat_solve_upper_triangular_f32.c
arm_mat_solve_upper_triangular_f32.c
  1  /* ----------------------------------------------------------------------
  2   * Project:      CMSIS DSP Library
  3   * Title:        arm_mat_solve_upper_triangular_f32.c
  4   * Description:  Solve linear system UT X = A with UT upper triangular matrix
  5   *
  6   * $Date:        23 April 2021
  7   * $Revision:    V1.9.0
  8   *
  9   * Target Processor: Cortex-M and Cortex-A cores
 10   * -------------------------------------------------------------------- */
 11  /*
 12   * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
 13   *
 14   * SPDX-License-Identifier: Apache-2.0
 15   *
 16   * Licensed under the Apache License, Version 2.0 (the License); you may
 17   * not use this file except in compliance with the License.
 18   * You may obtain a copy of the License at
 19   *
 20   * www.apache.org/licenses/LICENSE-2.0
 21   *
 22   * Unless required by applicable law or agreed to in writing, software
 23   * distributed under the License is distributed on an AS IS BASIS, WITHOUT
 24   * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 25   * See the License for the specific language governing permissions and
 26   * limitations under the License.
 27   */
 28  
 29  #include "dsp/matrix_functions.h"
 30  
 31  
 32  /**
 33    @ingroup groupMatrix
 34   */
 35  
 36  
 37  /**
 38    @addtogroup MatrixInv
 39    @{
 40   */
 41  
 42  /**
 43     * @brief Solve UT . X = A where UT is an upper triangular matrix
 44     * @param[in]  ut  The upper triangular matrix
 45     * @param[in]  a  The matrix a
 46     * @param[out] dst The solution X of UT . X = A
 47     * @return The function returns ARM_MATH_SINGULAR, if the system can't be solved.
 48    */
 49  
 50  #if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
 51  
 52  #include "arm_helium_utils.h"
 53  
 54    arm_status arm_mat_solve_upper_triangular_f32(
 55    const arm_matrix_instance_f32 * ut,
 56    const arm_matrix_instance_f32 * a,
 57    arm_matrix_instance_f32 * dst)
 58    {
 59  arm_status status;                             /* status of matrix inverse */
 60  
 61  
 62  #ifdef ARM_MATH_MATRIX_CHECK
 63  
 64    /* Check for matrix mismatch condition */
 65    if ((ut->numRows != ut->numCols) ||
 66        (ut->numRows != a->numRows)   )
 67    {
 68      /* Set status as ARM_MATH_SIZE_MISMATCH */
 69      status = ARM_MATH_SIZE_MISMATCH;
 70    }
 71    else
 72  
 73  #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
 74  
 75    {
 76  
 77      int i,j,k,n,cols;
 78  
 79      n = dst->numRows;
 80      cols = dst->numCols;
 81  
 82      float32_t *pX = dst->pData;
 83      float32_t *pUT = ut->pData;
 84      float32_t *pA = a->pData;
 85  
 86      float32_t *ut_row;
 87      float32_t *a_col;
 88  
 89      float32_t invUT;
 90  
 91      f32x4_t vecA;
 92      f32x4_t vecX;
 93      
 94      for(i=n-1; i >= 0 ; i--)
 95      {
 96        for(j=0; j+3 < cols; j +=4)
 97        {
 98              vecA = vld1q_f32(&pA[i * cols + j]);
 99              
100              for(k=n-1; k > i; k--)
101              {
102                  vecX = vld1q_f32(&pX[cols*k+j]);          
103                  vecA = vfmsq(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
104              }
105  
106              if (pUT[n*i + i]==0.0f)
107              {
108                return(ARM_MATH_SINGULAR);
109              }
110  
111              invUT = 1.0f / pUT[n*i + i];
112              vecA = vmulq(vecA,vdupq_n_f32(invUT));
113             
114  
115              vst1q(&pX[i*cols+j],vecA);
116        }
117  
118        for(; j < cols; j ++)
119        {
120              a_col = &pA[j];
121  
122              ut_row = &pUT[n*i];
123  
124              float32_t tmp=a_col[i * cols];
125              
126              for(k=n-1; k > i; k--)
127              {
128                  tmp -= ut_row[k] * pX[cols*k+j];
129              }
130  
131              if (ut_row[i]==0.0f)
132              {
133                return(ARM_MATH_SINGULAR);
134              }
135              tmp = tmp / ut_row[i];
136              pX[i*cols+j] = tmp;
137         }
138  
139      }
140      status = ARM_MATH_SUCCESS;
141  
142    }
143  
144    
145    /* Return to application */
146    return (status);
147  }
148  
149  #else
150  #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE)
151    arm_status arm_mat_solve_upper_triangular_f32(
152    const arm_matrix_instance_f32 * ut,
153    const arm_matrix_instance_f32 * a,
154    arm_matrix_instance_f32 * dst)
155    {
156  arm_status status;                             /* status of matrix inverse */
157  
158  
159  #ifdef ARM_MATH_MATRIX_CHECK
160  
161    /* Check for matrix mismatch condition */
162    if ((ut->numRows != ut->numCols) ||
163        (ut->numRows != a->numRows)   )
164    {
165      /* Set status as ARM_MATH_SIZE_MISMATCH */
166      status = ARM_MATH_SIZE_MISMATCH;
167    }
168    else
169  
170  #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
171  
172    {
173  
174      int i,j,k,n,cols;
175  
176      n = dst->numRows;
177      cols = dst->numCols;
178  
179      float32_t *pX = dst->pData;
180      float32_t *pUT = ut->pData;
181      float32_t *pA = a->pData;
182  
183      float32_t *ut_row;
184      float32_t *a_col;
185  
186      float32_t invUT;
187  
188      f32x4_t vecA;
189      f32x4_t vecX;
190      
191      for(i=n-1; i >= 0 ; i--)
192      {
193        for(j=0; j+3 < cols; j +=4)
194        {
195              vecA = vld1q_f32(&pA[i * cols + j]);
196              
197              for(k=n-1; k > i; k--)
198              {
199                  vecX = vld1q_f32(&pX[cols*k+j]);          
200                  vecA = vfmsq_f32(vecA,vdupq_n_f32(pUT[n*i + k]),vecX);
201              }
202  
203              if (pUT[n*i + i]==0.0f)
204              {
205                return(ARM_MATH_SINGULAR);
206              }
207  
208              invUT = 1.0f / pUT[n*i + i];
209              vecA = vmulq_f32(vecA,vdupq_n_f32(invUT));
210             
211  
212              vst1q_f32(&pX[i*cols+j],vecA);
213        }
214  
215        for(; j < cols; j ++)
216        {
217              a_col = &pA[j];
218  
219              ut_row = &pUT[n*i];
220  
221              float32_t tmp=a_col[i * cols];
222              
223              for(k=n-1; k > i; k--)
224              {
225                  tmp -= ut_row[k] * pX[cols*k+j];
226              }
227  
228              if (ut_row[i]==0.0f)
229              {
230                return(ARM_MATH_SINGULAR);
231              }
232              tmp = tmp / ut_row[i];
233              pX[i*cols+j] = tmp;
234         }
235  
236      }
237      status = ARM_MATH_SUCCESS;
238  
239    }
240  
241    
242    /* Return to application */
243    return (status);
244  }
245  
246  #else
247    arm_status arm_mat_solve_upper_triangular_f32(
248    const arm_matrix_instance_f32 * ut,
249    const arm_matrix_instance_f32 * a,
250    arm_matrix_instance_f32 * dst)
251    {
252  arm_status status;                             /* status of matrix inverse */
253  
254  
255  #ifdef ARM_MATH_MATRIX_CHECK
256  
257    /* Check for matrix mismatch condition */
258    if ((ut->numRows != ut->numCols) ||
259        (ut->numRows != a->numRows)   )
260    {
261      /* Set status as ARM_MATH_SIZE_MISMATCH */
262      status = ARM_MATH_SIZE_MISMATCH;
263    }
264    else
265  
266  #endif /* #ifdef ARM_MATH_MATRIX_CHECK */
267  
268    {
269  
270      int i,j,k,n,cols;
271  
272      float32_t *pX = dst->pData;
273      float32_t *pUT = ut->pData;
274      float32_t *pA = a->pData;
275  
276      float32_t *ut_row;
277      float32_t *a_col;
278  
279      n = dst->numRows;
280      cols = dst->numCols;
281  
282      for(j=0; j < cols; j ++)
283      {
284         a_col = &pA[j];
285  
286         for(i=n-1; i >= 0 ; i--)
287         {
288              float32_t tmp=a_col[i * cols];
289  
290              ut_row = &pUT[n*i];
291  
292              for(k=n-1; k > i; k--)
293              {
294                  tmp -= ut_row[k] * pX[cols*k+j];
295              }
296  
297              if (ut_row[i]==0.0f)
298              {
299                return(ARM_MATH_SINGULAR);
300              }
301              tmp = tmp / ut_row[i];
302              pX[i*cols+j] = tmp;
303         }
304  
305      }
306      status = ARM_MATH_SUCCESS;
307  
308    }
309  
310    
311    /* Return to application */
312    return (status);
313  }
314  #endif /* #if defined(ARM_MATH_NEON) */
315  #endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
316  
317  /**
318    @} end of MatrixInv group
319   */