/ src / crc32c / src / crc32c_arm64.cc
crc32c_arm64.cc
  1  // Copyright 2017 The CRC32C Authors. All rights reserved.
  2  // Use of this source code is governed by a BSD-style license that can be
  3  // found in the LICENSE file. See the AUTHORS file for names of contributors.
  4  
  5  #include "./crc32c_arm64.h"
  6  
  7  // In a separate source file to allow this accelerated CRC32C function to be
  8  // compiled with the appropriate compiler flags to enable ARM NEON CRC32C
  9  // instructions.
 10  
 11  // This implementation is based on https://github.com/google/leveldb/pull/490.
 12  
 13  #include <cstddef>
 14  #include <cstdint>
 15  #include <cstring>
 16  
 17  #include "./crc32c_internal.h"
 18  #ifdef CRC32C_HAVE_CONFIG_H
 19  #include "crc32c/crc32c_config.h"
 20  #endif
 21  
 22  #if HAVE_ARM64_CRC32C
 23  
 24  #include <arm_acle.h>
 25  #include <arm_neon.h>
 26  
 27  #define KBYTES 1032
 28  #define SEGMENTBYTES 256
 29  
 30  // compute 8bytes for each segment parallelly
 31  #define CRC32C32BYTES(P, IND)                                             \
 32    do {                                                                    \
 33      std::memcpy(&d64, (P) + SEGMENTBYTES * 1 + (IND) * 8, sizeof(d64));   \
 34      crc1 = __crc32cd(crc1, d64);                                          \
 35      std::memcpy(&d64, (P) + SEGMENTBYTES * 2 + (IND) * 8, sizeof(d64));   \
 36      crc2 = __crc32cd(crc2, d64);                                          \
 37      std::memcpy(&d64, (P) + SEGMENTBYTES * 3 + (IND) * 8, sizeof(d64));   \
 38      crc3 = __crc32cd(crc3, d64);                                          \
 39      std::memcpy(&d64, (P) + SEGMENTBYTES * 0 + (IND) * 8, sizeof(d64));   \
 40      crc0 = __crc32cd(crc0, d64);                                          \
 41    } while (0);
 42  
 43  // compute 8*8 bytes for each segment parallelly
 44  #define CRC32C256BYTES(P, IND)      \
 45    do {                              \
 46      CRC32C32BYTES((P), (IND)*8 + 0) \
 47      CRC32C32BYTES((P), (IND)*8 + 1) \
 48      CRC32C32BYTES((P), (IND)*8 + 2) \
 49      CRC32C32BYTES((P), (IND)*8 + 3) \
 50      CRC32C32BYTES((P), (IND)*8 + 4) \
 51      CRC32C32BYTES((P), (IND)*8 + 5) \
 52      CRC32C32BYTES((P), (IND)*8 + 6) \
 53      CRC32C32BYTES((P), (IND)*8 + 7) \
 54    } while (0);
 55  
 56  // compute 4*8*8 bytes for each segment parallelly
 57  #define CRC32C1024BYTES(P)   \
 58    do {                       \
 59      CRC32C256BYTES((P), 0)   \
 60      CRC32C256BYTES((P), 1)   \
 61      CRC32C256BYTES((P), 2)   \
 62      CRC32C256BYTES((P), 3)   \
 63      (P) += 4 * SEGMENTBYTES; \
 64    } while (0)
 65  
 66  namespace crc32c {
 67  
 68  uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) {
 69    int64_t length = size;
 70    uint32_t crc0, crc1, crc2, crc3;
 71    uint64_t t0, t1, t2;
 72    uint16_t d16;
 73    uint32_t d32;
 74    uint64_t d64;
 75  
 76    // k0=CRC(x^(3*SEGMENTBYTES*8)), k1=CRC(x^(2*SEGMENTBYTES*8)),
 77    // k2=CRC(x^(SEGMENTBYTES*8))
 78    const poly64_t k0 = 0x8d96551c, k1 = 0xbd6f81f8, k2 = 0xdcb17aa4;
 79  
 80    crc = crc ^ kCRC32Xor;
 81  
 82    while (length >= KBYTES) {
 83      crc0 = crc;
 84      crc1 = 0;
 85      crc2 = 0;
 86      crc3 = 0;
 87  
 88      // Process 1024 bytes in parallel.
 89      CRC32C1024BYTES(data);
 90  
 91      // Merge the 4 partial CRC32C values.
 92      t2 = (uint64_t)vmull_p64(crc2, k2);
 93      t1 = (uint64_t)vmull_p64(crc1, k1);
 94      t0 = (uint64_t)vmull_p64(crc0, k0);
 95      std::memcpy(&d64, data, sizeof(d64));
 96      crc = __crc32cd(crc3, d64);
 97      data += sizeof(uint64_t);
 98      crc ^= __crc32cd(0, t2);
 99      crc ^= __crc32cd(0, t1);
100      crc ^= __crc32cd(0, t0);
101  
102      length -= KBYTES;
103    }
104  
105    while (length >= 8) {
106      std::memcpy(&d64, data, sizeof(d64));
107      crc = __crc32cd(crc, d64);
108      data += 8;
109      length -= 8;
110    }
111  
112    if (length & 4) {
113      std::memcpy(&d32, data, sizeof(d32));
114      crc = __crc32cw(crc, d32);
115      data += 4;
116    }
117  
118    if (length & 2) {
119      std::memcpy(&d16, data, sizeof(d16));
120      crc = __crc32ch(crc, d16);
121      data += 2;
122    }
123  
124    if (length & 1) {
125      crc = __crc32cb(crc, *data);
126    }
127  
128    return crc ^ kCRC32Xor;
129  }
130  
131  }  // namespace crc32c
132  
133  #endif  // HAVE_ARM64_CRC32C