arm_or_u16.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_or_u16.c 4 * Description: uint16_t bitwise inclusive OR 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions.h" 30 31 /** 32 @ingroup groupMath 33 */ 34 35 /** 36 @defgroup Or Vector bitwise inclusive OR 37 38 Compute the logical bitwise OR. 39 40 There are separate functions for uint32_t, uint16_t, and uint8_t data types. 41 */ 42 43 /** 44 @addtogroup Or 45 @{ 46 */ 47 48 /** 49 @brief Compute the logical bitwise OR of two fixed-point vectors. 50 @param[in] pSrcA points to input vector A 51 @param[in] pSrcB points to input vector B 52 @param[out] pDst points to output vector 53 @param[in] blockSize number of samples in each vector 54 @return none 55 */ 56 57 void arm_or_u16( 58 const uint16_t * pSrcA, 59 const uint16_t * pSrcB, 60 uint16_t * pDst, 61 uint32_t blockSize) 62 { 63 uint32_t blkCnt; /* Loop counter */ 64 65 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 66 uint16x8_t vecSrcA, vecSrcB; 67 68 /* Compute 8 outputs at a time */ 69 blkCnt = blockSize >> 3; 70 71 while (blkCnt > 0U) 72 { 73 vecSrcA = vld1q(pSrcA); 74 vecSrcB = vld1q(pSrcB); 75 76 vst1q(pDst, vorrq_u16(vecSrcA, vecSrcB) ); 77 78 pSrcA += 8; 79 pSrcB += 8; 80 pDst += 8; 81 82 /* Decrement the loop counter */ 83 blkCnt--; 84 } 85 86 /* Tail */ 87 blkCnt = blockSize & 7; 88 89 if (blkCnt > 0U) 90 { 91 mve_pred16_t p0 = vctp16q(blkCnt); 92 vecSrcA = vld1q(pSrcA); 93 vecSrcB = vld1q(pSrcB); 94 vstrhq_p(pDst, vorrq_u16(vecSrcA, vecSrcB), p0); 95 } 96 #else 97 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) 98 uint16x8_t vecA, vecB; 99 100 /* Compute 8 outputs at a time */ 101 blkCnt = blockSize >> 3U; 102 103 while (blkCnt > 0U) 104 { 105 vecA = vld1q_u16(pSrcA); 106 vecB = vld1q_u16(pSrcB); 107 108 vst1q_u16(pDst, vorrq_u16(vecA, vecB) ); 109 110 pSrcA += 8; 111 pSrcB += 8; 112 pDst += 8; 113 114 /* Decrement the loop counter */ 115 blkCnt--; 116 } 117 118 /* Tail */ 119 blkCnt = blockSize & 7; 120 #else 121 /* Initialize blkCnt with number of samples */ 122 blkCnt = blockSize; 123 #endif 124 125 while (blkCnt > 0U) 126 { 127 *pDst++ = (*pSrcA++)|(*pSrcB++); 128 129 /* Decrement the loop counter */ 130 blkCnt--; 131 } 132 #endif /* if defined(ARM_MATH_MVEI) */ 133 } 134 135 /** 136 @} end of Or group 137 */