fastcpy.rs
1 //! # FastCpy 2 //! 3 //! The Rust Compiler calls `memcpy` for slices of unknown length. 4 //! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). 5 //! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. 6 //! 7 //! `fastcpy` is designed to contain not too much assembly, so the overhead is low. 8 //! 9 //! As fall back the standard `memcpy` is called 10 //! 11 //! ## Double Copy Trick 12 //! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). 13 //! E.g. Slice of length 6 can be copied with two uncoditional copy operations. 14 //! 15 //! /// [1, 2, 3, 4, 5, 6] 16 //! /// [1, 2, 3, 4] 17 //! /// [3, 4, 5, 6] 18 //! 19 20 #[inline] 21 pub fn slice_copy(src: &[u8], dst: &mut [u8]) { 22 #[inline(never)] 23 #[cold] 24 #[track_caller] 25 fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { 26 panic!( 27 "source slice length ({}) does not match destination slice length ({})", 28 src_len, dst_len, 29 ); 30 } 31 32 if src.len() != dst.len() { 33 len_mismatch_fail(src.len(), dst.len()); 34 } 35 let len = src.len(); 36 37 if src.is_empty() { 38 return; 39 } 40 41 if len < 4 { 42 short_copy(src, dst); 43 return; 44 } 45 46 if len < 8 { 47 double_copy_trick::<4>(src, dst); 48 return; 49 } 50 51 if len <= 16 { 52 double_copy_trick::<8>(src, dst); 53 return; 54 } 55 56 if len <= 32 { 57 double_copy_trick::<16>(src, dst); 58 return; 59 } 60 61 /// The code will use the vmovdqu instruction to copy 32 bytes at a time. 62 #[cfg(target_feature = "avx")] 63 { 64 if len <= 64 { 65 double_copy_trick::<32>(src, dst); 66 return; 67 } 68 } 69 70 // For larger sizes we use the default, which calls memcpy 71 // memcpy does some virtual memory tricks to copy large chunks of memory. 72 // 73 // The theory should be that the checks above don't cost much relative to the copy call for 74 // larger copies. 75 // The bounds checks in `copy_from_slice` are elided. 76 dst.copy_from_slice(src); 77 } 78 79 #[inline(always)] 80 fn short_copy(src: &[u8], dst: &mut [u8]) { 81 let len = src.len(); 82 83 // length 1-3 84 dst[0] = src[0]; 85 if len >= 2 { 86 double_copy_trick::<2>(src, dst); 87 } 88 } 89 90 #[inline(always)] 91 /// [1, 2, 3, 4, 5, 6] 92 /// [1, 2, 3, 4] 93 /// [3, 4, 5, 6] 94 fn double_copy_trick<const SIZE: usize>(src: &[u8], dst: &mut [u8]) { 95 dst[0..SIZE].copy_from_slice(&src[0..SIZE]); 96 dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]); 97 } 98 99 #[cfg(test)] 100 mod tests { 101 use super::slice_copy; 102 use alloc::vec::Vec; 103 use proptest::prelude::*; 104 proptest! { 105 #[test] 106 fn test_fast_short_slice_copy(left: Vec<u8>) { 107 let mut right = vec![0u8; left.len()]; 108 slice_copy(&left, &mut right); 109 prop_assert_eq!(&left, &right); 110 } 111 } 112 113 #[test] 114 fn test_fast_short_slice_copy_edge_cases() { 115 for len in 0..(512 * 2) { 116 let left = (0..len).map(|i| i as u8).collect::<Vec<_>>(); 117 let mut right = vec![0u8; len]; 118 slice_copy(&left, &mut right); 119 assert_eq!(left, right); 120 } 121 } 122 123 #[test] 124 fn test_fail2() { 125 let left = vec![ 126 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 127 24, 25, 26, 27, 28, 29, 30, 31, 32, 128 ]; 129 let mut right = vec![0u8; left.len()]; 130 slice_copy(&left, &mut right); 131 assert_eq!(left, right); 132 } 133 134 #[test] 135 fn test_fail() { 136 let left = vec![ 137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 138 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 139 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 140 ]; 141 let mut right = vec![0u8; left.len()]; 142 slice_copy(&left, &mut right); 143 assert_eq!(left, right); 144 } 145 }