arm_and_u8.c
1 /* ---------------------------------------------------------------------- 2 * Project: CMSIS DSP Library 3 * Title: arm_and_u8.c 4 * Description: uint8_t bitwise AND 5 * 6 * $Date: 23 April 2021 7 * $Revision: V1.9.0 8 * 9 * Target Processor: Cortex-M and Cortex-A cores 10 * -------------------------------------------------------------------- */ 11 /* 12 * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved. 13 * 14 * SPDX-License-Identifier: Apache-2.0 15 * 16 * Licensed under the Apache License, Version 2.0 (the License); you may 17 * not use this file except in compliance with the License. 18 * You may obtain a copy of the License at 19 * 20 * www.apache.org/licenses/LICENSE-2.0 21 * 22 * Unless required by applicable law or agreed to in writing, software 23 * distributed under the License is distributed on an AS IS BASIS, WITHOUT 24 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 * See the License for the specific language governing permissions and 26 * limitations under the License. 27 */ 28 29 #include "dsp/basic_math_functions.h" 30 31 /** 32 @ingroup groupMath 33 */ 34 35 36 /** 37 @addtogroup And 38 @{ 39 */ 40 41 /** 42 @brief Compute the logical bitwise AND of two fixed-point vectors. 43 @param[in] pSrcA points to input vector A 44 @param[in] pSrcB points to input vector B 45 @param[out] pDst points to output vector 46 @param[in] blockSize number of samples in each vector 47 @return none 48 */ 49 50 void arm_and_u8( 51 const uint8_t * pSrcA, 52 const uint8_t * pSrcB, 53 uint8_t * pDst, 54 uint32_t blockSize) 55 { 56 uint32_t blkCnt; /* Loop counter */ 57 58 #if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) 59 uint8x16_t vecSrcA, vecSrcB; 60 61 /* Compute 16 outputs at a time */ 62 blkCnt = blockSize >> 4; 63 64 while (blkCnt > 0U) 65 { 66 vecSrcA = vld1q(pSrcA); 67 vecSrcB = vld1q(pSrcB); 68 69 vst1q(pDst, vandq_u8(vecSrcA, vecSrcB) ); 70 71 pSrcA += 16; 72 pSrcB += 16; 73 pDst += 16; 74 75 /* Decrement the loop counter */ 76 blkCnt--; 77 } 78 79 /* Tail */ 80 blkCnt = blockSize & 0xF; 81 82 if (blkCnt > 0U) 83 { 84 mve_pred16_t p0 = vctp8q(blkCnt); 85 vecSrcA = vld1q(pSrcA); 86 vecSrcB = vld1q(pSrcB); 87 vstrbq_p(pDst, vandq_u8(vecSrcA, vecSrcB), p0); 88 } 89 #else 90 #if defined(ARM_MATH_NEON) && !defined(ARM_MATH_AUTOVECTORIZE) 91 uint8x16_t vecA, vecB; 92 93 /* Compute 16 outputs at a time */ 94 blkCnt = blockSize >> 4U; 95 96 while (blkCnt > 0U) 97 { 98 vecA = vld1q_u8(pSrcA); 99 vecB = vld1q_u8(pSrcB); 100 101 vst1q_u8(pDst, vandq_u8(vecA, vecB) ); 102 103 pSrcA += 16; 104 pSrcB += 16; 105 pDst += 16; 106 107 /* Decrement the loop counter */ 108 blkCnt--; 109 } 110 111 /* Tail */ 112 blkCnt = blockSize & 0xF; 113 #else 114 /* Initialize blkCnt with number of samples */ 115 blkCnt = blockSize; 116 #endif 117 118 while (blkCnt > 0U) 119 { 120 *pDst++ = (*pSrcA++)&(*pSrcB++); 121 122 /* Decrement the loop counter */ 123 blkCnt--; 124 } 125 #endif /* if defined(ARM_MATH_MVEI) */ 126 } 127 128 /** 129 @} end of And group 130 */