/ duct-tape / xnu / osfmk / arm64 / bzero.s
bzero.s
  1  /*
  2   * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
  3   *
  4   * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  5   * 
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. The rights granted to you under the License
 10   * may not be used to create, or enable the creation or redistribution of,
 11   * unlawful or unlicensed copies of an Apple operating system, or to
 12   * circumvent, violate, or enable the circumvention or violation of, any
 13   * terms of an Apple operating system software license agreement.
 14   * 
 15   * Please obtain a copy of the License at
 16   * http://www.opensource.apple.com/apsl/ and read it before using this file.
 17   * 
 18   * The Original Code and all software distributed under the License are
 19   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 20   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 21   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 22   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 23   * Please see the License for the specific language governing rights and
 24   * limitations under the License.
 25   * 
 26   * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 27   *
 28   * This file implements the following functions for the arm64 architecture:
 29   *
 30   *  void bzero(void *buffer, size_t length);
 31   *  void __bzero(void *buffer, size_t length);
 32   *  void *memset(void *buffer, int value, size_t length);
 33   *
 34   * The first two zero-fill a buffer.  The third fills the buffer with the low
 35   * byte of its second argument.
 36   */
 37  
 38  #include "asm.h"
 39  
 40  .globl _bzero
 41  .globl ___bzero
 42  .globl _memset
 43  .globl _secure_memset
 44  
 45  /*****************************************************************************
 46   *  bzero entrypoint                                                         *
 47   *****************************************************************************/
 48  
 49  .text
 50  .align 4
 51  _bzero:
 52  ___bzero:
 53      ARM64_STACK_PROLOG
 54      PUSH_FRAME
 55      mov     x2,      x1
 56      eor     x1,      x1, x1
 57      mov     x3,      x0
 58      cmp     x2,      #128
 59      b.cc    L_memsetSmall
 60  
 61  /*****************************************************************************
 62   *  Large buffer zero engine                                                 *
 63   *****************************************************************************/
 64  
 65  L_bzeroLarge:
 66  //  Write the first 64 bytes of the buffer without regard to alignment, then
 67  //  advance x3 to point to a cacheline-aligned location within the buffer, and
 68  //  decrement the length accordingly.
 69      stp     x1, x1, [x0]
 70      stp     x1, x1, [x0, #16]
 71      stp     x1, x1, [x0, #32]
 72      stp     x1, x1, [x0, #48]
 73      add     x3,      x0, #64
 74      and     x3,      x3, #-64
 75      add     x2,      x2, x0   // end of buffer
 76      add     x4,      x3, #64  // end of first cacheline to zero
 77      subs    x2,      x2, x4   // if the end of the buffer comes first, jump
 78      b.ls    1f                //    directly to the cleanup pass.
 79  0:  dc      zva,     x3       // zero cacheline
 80      add     x3,      x3, #64  // increment pointer
 81      subs    x2,      x2, #64  // decrement length
 82      b.hi    0b
 83  1:  add     x3,      x3, x2   // back up pointer to (end of buffer) - 64.
 84      stp     x1, x1, [x3]      // and store 64 bytes to reach end of buffer.
 85      stp     x1, x1, [x3, #16]
 86      stp     x1, x1, [x3, #32]
 87      stp     x1, x1, [x3, #48]
 88      POP_FRAME
 89      ARM64_STACK_EPILOG
 90  
 91  /*****************************************************************************
 92   *  memset entrypoint                                                        *
 93   *****************************************************************************/
 94  
 95  .align 4
 96  /*
 97   * It is important that secure_memset remains defined in assembly to avoid
 98   * compiler optimizations.
 99   */
100  _secure_memset:
101  _memset:
102      ARM64_STACK_PROLOG
103      PUSH_FRAME
104      and     x1,      x1, #0xff
105      orr     x3,      xzr,#0x0101010101010101
106      mul     x1,      x1, x3
107      mov     x3,      x0
108      cmp     x2,      #64
109      b.cc    L_memsetSmall
110  
111  /*****************************************************************************
112   *  Large buffer store engine                                                *
113   *****************************************************************************/
114  
115  L_memsetLarge:
116  //  Write the first 64 bytes of the buffer without regard to alignment, then
117  //  advance x3 to point to an aligned location within the buffer, and
118  //  decrement the length accordingly.
119      stp     x1, x1, [x0]
120      add     x3,      x0, #16
121      and     x3,      x3, #-16
122      add     x2,      x2, x0   // end of buffer
123      add     x4,      x3, #64  // end of first aligned 64-byte store
124      subs    x2,      x2, x4   // if the end of the buffer comes first, jump
125      b.ls    1f                //    directly to the cleanup store.
126  0:  stnp    x1, x1, [x3]
127      stnp    x1, x1, [x3, #16]
128      stnp    x1, x1, [x3, #32]
129      stnp    x1, x1, [x3, #48]
130      add     x3,      x3, #64
131      subs    x2,      x2, #64
132      b.hi    0b
133  1:  add     x3,      x3, x2   // back up pointer to (end of buffer) - 64.
134      stp     x1, x1, [x3]
135      stp     x1, x1, [x3, #16]
136      stp     x1, x1, [x3, #32]
137      stp     x1, x1, [x3, #48]
138      POP_FRAME
139      ARM64_STACK_EPILOG
140  
141  /*****************************************************************************
142   *  Small buffer store engine                                                *
143   *****************************************************************************/
144  
145  0:  str     x1,     [x3],#8
146  L_memsetSmall:
147      subs    x2,      x2, #8
148      b.cs    0b
149      adds    x2,      x2, #8
150      b.eq    2f
151  1:  strb    w1,     [x3],#1
152      subs    x2,      x2, #1
153      b.ne    1b
154  2:  POP_FRAME
155      ARM64_STACK_EPILOG
156