bzero.s
1 /* 2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 * 28 * This file implements the following functions for the arm64 architecture: 29 * 30 * void bzero(void *buffer, size_t length); 31 * void __bzero(void *buffer, size_t length); 32 * void *memset(void *buffer, int value, size_t length); 33 * 34 * The first two zero-fill a buffer. The third fills the buffer with the low 35 * byte of its second argument. 36 */ 37 38 #include "asm.h" 39 40 .globl _bzero 41 .globl ___bzero 42 .globl _memset 43 .globl _secure_memset 44 45 /***************************************************************************** 46 * bzero entrypoint * 47 *****************************************************************************/ 48 49 .text 50 .align 4 51 _bzero: 52 ___bzero: 53 ARM64_STACK_PROLOG 54 PUSH_FRAME 55 mov x2, x1 56 eor x1, x1, x1 57 mov x3, x0 58 cmp x2, #128 59 b.cc L_memsetSmall 60 61 /***************************************************************************** 62 * Large buffer zero engine * 63 *****************************************************************************/ 64 65 L_bzeroLarge: 66 // Write the first 64 bytes of the buffer without regard to alignment, then 67 // advance x3 to point to a cacheline-aligned location within the buffer, and 68 // decrement the length accordingly. 69 stp x1, x1, [x0] 70 stp x1, x1, [x0, #16] 71 stp x1, x1, [x0, #32] 72 stp x1, x1, [x0, #48] 73 add x3, x0, #64 74 and x3, x3, #-64 75 add x2, x2, x0 // end of buffer 76 add x4, x3, #64 // end of first cacheline to zero 77 subs x2, x2, x4 // if the end of the buffer comes first, jump 78 b.ls 1f // directly to the cleanup pass. 79 0: dc zva, x3 // zero cacheline 80 add x3, x3, #64 // increment pointer 81 subs x2, x2, #64 // decrement length 82 b.hi 0b 83 1: add x3, x3, x2 // back up pointer to (end of buffer) - 64. 84 stp x1, x1, [x3] // and store 64 bytes to reach end of buffer. 85 stp x1, x1, [x3, #16] 86 stp x1, x1, [x3, #32] 87 stp x1, x1, [x3, #48] 88 POP_FRAME 89 ARM64_STACK_EPILOG 90 91 /***************************************************************************** 92 * memset entrypoint * 93 *****************************************************************************/ 94 95 .align 4 96 /* 97 * It is important that secure_memset remains defined in assembly to avoid 98 * compiler optimizations. 99 */ 100 _secure_memset: 101 _memset: 102 ARM64_STACK_PROLOG 103 PUSH_FRAME 104 and x1, x1, #0xff 105 orr x3, xzr,#0x0101010101010101 106 mul x1, x1, x3 107 mov x3, x0 108 cmp x2, #64 109 b.cc L_memsetSmall 110 111 /***************************************************************************** 112 * Large buffer store engine * 113 *****************************************************************************/ 114 115 L_memsetLarge: 116 // Write the first 64 bytes of the buffer without regard to alignment, then 117 // advance x3 to point to an aligned location within the buffer, and 118 // decrement the length accordingly. 119 stp x1, x1, [x0] 120 add x3, x0, #16 121 and x3, x3, #-16 122 add x2, x2, x0 // end of buffer 123 add x4, x3, #64 // end of first aligned 64-byte store 124 subs x2, x2, x4 // if the end of the buffer comes first, jump 125 b.ls 1f // directly to the cleanup store. 126 0: stnp x1, x1, [x3] 127 stnp x1, x1, [x3, #16] 128 stnp x1, x1, [x3, #32] 129 stnp x1, x1, [x3, #48] 130 add x3, x3, #64 131 subs x2, x2, #64 132 b.hi 0b 133 1: add x3, x3, x2 // back up pointer to (end of buffer) - 64. 134 stp x1, x1, [x3] 135 stp x1, x1, [x3, #16] 136 stp x1, x1, [x3, #32] 137 stp x1, x1, [x3, #48] 138 POP_FRAME 139 ARM64_STACK_EPILOG 140 141 /***************************************************************************** 142 * Small buffer store engine * 143 *****************************************************************************/ 144 145 0: str x1, [x3],#8 146 L_memsetSmall: 147 subs x2, x2, #8 148 b.cs 0b 149 adds x2, x2, #8 150 b.eq 2f 151 1: strb w1, [x3],#1 152 subs x2, x2, #1 153 b.ne 1b 154 2: POP_FRAME 155 ARM64_STACK_EPILOG 156