/ duct-tape / xnu / osfmk / kern / arithmetic_128.h
arithmetic_128.h
  1  /*
  2   * Copyright (c) 1999, 2003, 2006, 2007, 2010 Apple Inc. All rights reserved.
  3   *
  4   * @APPLE_LICENSE_HEADER_START@
  5   *
  6   * This file contains Original Code and/or Modifications of Original Code
  7   * as defined in and that are subject to the Apple Public Source License
  8   * Version 2.0 (the 'License'). You may not use this file except in
  9   * compliance with the License. Please obtain a copy of the License at
 10   * http://www.opensource.apple.com/apsl/ and read it before using this
 11   * file.
 12   *
 13   * The Original Code and all software distributed under the License are
 14   * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 15   * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 16   * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 17   * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 18   * Please see the License for the specific language governing rights and
 19   * limitations under the License.
 20   *
 21   * @APPLE_LICENSE_HEADER_END@
 22   */
 23  /*
 24   * Code duplicated from Libc/gen/nanosleep.c
 25   */
 26  
 27  #ifndef _ARITHMETIC_128_H_
 28  #define _ARITHMETIC_128_H_
 29  
 30  #include <stdint.h>
 31  
 32  #if __LP64__
 33  
 34  static __inline uint64_t
 35  multi_overflow(uint64_t a, uint64_t b)
 36  {
 37  	__uint128_t prod;
 38  	prod = (__uint128_t)a * (__uint128_t)b;
 39  	return (uint64_t) (prod >> 64);
 40  }
 41  
 42  #else
 43  
 44  typedef struct {
 45  	uint64_t high;
 46  	uint64_t low;
 47  } uint128_data_t;
 48  
 49  /* 128-bit addition: acc += add */
 50  static __inline void
 51  add128_128(uint128_data_t *acc, uint128_data_t *add)
 52  {
 53  	acc->high += add->high;
 54  	acc->low += add->low;
 55  	if (acc->low < add->low) {
 56  		acc->high++; // carry
 57  	}
 58  }
 59  
 60  /* 64x64 -> 128 bit multiplication */
 61  static __inline void
 62  mul64x64(uint64_t x, uint64_t y, uint128_data_t *prod)
 63  {
 64  	uint128_data_t add;
 65  	/*
 66  	 * Split the two 64-bit multiplicands into 32-bit parts:
 67  	 * x => 2^32 * x1 + x2
 68  	 * y => 2^32 * y1 + y2
 69  	 */
 70  	uint32_t x1 = (uint32_t)(x >> 32);
 71  	uint32_t x2 = (uint32_t)x;
 72  	uint32_t y1 = (uint32_t)(y >> 32);
 73  	uint32_t y2 = (uint32_t)y;
 74  	/*
 75  	 * direct multiplication:
 76  	 * x * y => 2^64 * (x1 * y1) + 2^32 (x1 * y2 + x2 * y1) + (x2 * y2)
 77  	 * The first and last terms are direct assignmenet into the uint128_t
 78  	 * structure.  Then we add the middle two terms separately, to avoid
 79  	 * 64-bit overflow.  (We could use the Karatsuba algorithm to save
 80  	 * one multiply, but it is harder to deal with 64-bit overflows.)
 81  	 */
 82  	prod->high = (uint64_t)x1 * (uint64_t)y1;
 83  	prod->low = (uint64_t)x2 * (uint64_t)y2;
 84  	add.low = (uint64_t)x1 * (uint64_t)y2;
 85  	add.high = (add.low >> 32);
 86  	add.low <<= 32;
 87  	add128_128(prod, &add);
 88  	add.low = (uint64_t)x2 * (uint64_t)y1;
 89  	add.high = (add.low >> 32);
 90  	add.low <<= 32;
 91  	add128_128(prod, &add);
 92  }
 93  
 94  static __inline uint64_t
 95  multi_overflow(uint64_t a, uint64_t b)
 96  {
 97  	uint128_data_t prod;
 98  	mul64x64(a, b, &prod);
 99  	return prod.high;
100  }
101  
102  #endif  /* __LP64__ */
103  #endif  /* _ARITHMETIC_128_H_ */