cpu_neon_vfpv4.c
1 #ifdef _MSC_VER 2 #include <Intrin.h> 3 #endif 4 #include <arm_neon.h> 5 6 int main(int argc, char **argv) 7 { 8 float *src = (float*)argv[argc-1]; 9 float32x4_t v1 = vdupq_n_f32(src[0]); 10 float32x4_t v2 = vdupq_n_f32(src[1]); 11 float32x4_t v3 = vdupq_n_f32(src[2]); 12 int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0); 13 #ifdef __aarch64__ 14 double *src2 = (double*)argv[argc-2]; 15 float64x2_t vd1 = vdupq_n_f64(src2[0]); 16 float64x2_t vd2 = vdupq_n_f64(src2[1]); 17 float64x2_t vd3 = vdupq_n_f64(src2[2]); 18 ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0); 19 #endif 20 return ret; 21 }