math128.h 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include "util/coding_lean.h"
  7. #include "util/math.h"
  8. #ifdef TEST_UINT128_COMPAT
  9. #undef HAVE_UINT128_EXTENSION
  10. #endif
  11. namespace ROCKSDB_NAMESPACE {
  12. // Unsigned128 is a 128 bit value supporting (at least) bitwise operators,
  13. // shifts, and comparisons. __uint128_t is not always available.
  14. #ifdef HAVE_UINT128_EXTENSION
  15. using Unsigned128 = __uint128_t;
  16. #else
  17. struct Unsigned128 {
  18. uint64_t lo;
  19. uint64_t hi;
  20. inline Unsigned128() {
  21. static_assert(sizeof(Unsigned128) == 2 * sizeof(uint64_t),
  22. "unexpected overhead in representation");
  23. lo = 0;
  24. hi = 0;
  25. }
  26. inline Unsigned128(uint64_t lower) {
  27. lo = lower;
  28. hi = 0;
  29. }
  30. inline Unsigned128(uint64_t lower, uint64_t upper) {
  31. lo = lower;
  32. hi = upper;
  33. }
  34. // Convert to any integer 64 bits or less.
  35. template <typename T,
  36. typename = std::enable_if_t<std::is_integral_v<T> &&
  37. sizeof(T) <= sizeof(uint64_t)> >
  38. explicit operator T() {
  39. return static_cast<T>(lo);
  40. }
  41. };
  42. inline Unsigned128 operator<<(const Unsigned128& lhs, unsigned shift) {
  43. shift &= 127;
  44. Unsigned128 rv;
  45. if (shift >= 64) {
  46. rv.lo = 0;
  47. rv.hi = lhs.lo << (shift & 63);
  48. } else {
  49. uint64_t tmp = lhs.lo;
  50. rv.lo = tmp << shift;
  51. // Ensure shift==0 shifts away everything. (This avoids another
  52. // conditional branch on shift == 0.)
  53. tmp = tmp >> 1 >> (63 - shift);
  54. rv.hi = tmp | (lhs.hi << shift);
  55. }
  56. return rv;
  57. }
  58. inline Unsigned128& operator<<=(Unsigned128& lhs, unsigned shift) {
  59. lhs = lhs << shift;
  60. return lhs;
  61. }
  62. inline Unsigned128 operator>>(const Unsigned128& lhs, unsigned shift) {
  63. shift &= 127;
  64. Unsigned128 rv;
  65. if (shift >= 64) {
  66. rv.hi = 0;
  67. rv.lo = lhs.hi >> (shift & 63);
  68. } else {
  69. uint64_t tmp = lhs.hi;
  70. rv.hi = tmp >> shift;
  71. // Ensure shift==0 shifts away everything
  72. tmp = tmp << 1 << (63 - shift);
  73. rv.lo = tmp | (lhs.lo >> shift);
  74. }
  75. return rv;
  76. }
  77. inline Unsigned128& operator>>=(Unsigned128& lhs, unsigned shift) {
  78. lhs = lhs >> shift;
  79. return lhs;
  80. }
  81. inline Unsigned128 operator&(const Unsigned128& lhs, const Unsigned128& rhs) {
  82. return Unsigned128(lhs.lo & rhs.lo, lhs.hi & rhs.hi);
  83. }
  84. inline Unsigned128& operator&=(Unsigned128& lhs, const Unsigned128& rhs) {
  85. lhs = lhs & rhs;
  86. return lhs;
  87. }
  88. inline Unsigned128 operator|(const Unsigned128& lhs, const Unsigned128& rhs) {
  89. return Unsigned128(lhs.lo | rhs.lo, lhs.hi | rhs.hi);
  90. }
  91. inline Unsigned128& operator|=(Unsigned128& lhs, const Unsigned128& rhs) {
  92. lhs = lhs | rhs;
  93. return lhs;
  94. }
  95. inline Unsigned128 operator^(const Unsigned128& lhs, const Unsigned128& rhs) {
  96. return Unsigned128(lhs.lo ^ rhs.lo, lhs.hi ^ rhs.hi);
  97. }
  98. inline Unsigned128& operator^=(Unsigned128& lhs, const Unsigned128& rhs) {
  99. lhs = lhs ^ rhs;
  100. return lhs;
  101. }
  102. inline Unsigned128 operator~(const Unsigned128& v) {
  103. return Unsigned128(~v.lo, ~v.hi);
  104. }
  105. inline bool operator==(const Unsigned128& lhs, const Unsigned128& rhs) {
  106. return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
  107. }
  108. inline bool operator!=(const Unsigned128& lhs, const Unsigned128& rhs) {
  109. return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
  110. }
  111. inline bool operator>(const Unsigned128& lhs, const Unsigned128& rhs) {
  112. return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo > rhs.lo);
  113. }
  114. inline bool operator<(const Unsigned128& lhs, const Unsigned128& rhs) {
  115. return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo < rhs.lo);
  116. }
  117. inline bool operator>=(const Unsigned128& lhs, const Unsigned128& rhs) {
  118. return lhs.hi > rhs.hi || (lhs.hi == rhs.hi && lhs.lo >= rhs.lo);
  119. }
  120. inline bool operator<=(const Unsigned128& lhs, const Unsigned128& rhs) {
  121. return lhs.hi < rhs.hi || (lhs.hi == rhs.hi && lhs.lo <= rhs.lo);
  122. }
  123. #endif
  124. inline uint64_t Lower64of128(Unsigned128 v) {
  125. #ifdef HAVE_UINT128_EXTENSION
  126. return static_cast<uint64_t>(v);
  127. #else
  128. return v.lo;
  129. #endif
  130. }
  131. inline uint64_t Upper64of128(Unsigned128 v) {
  132. #ifdef HAVE_UINT128_EXTENSION
  133. return static_cast<uint64_t>(v >> 64);
  134. #else
  135. return v.hi;
  136. #endif
  137. }
  138. // This generally compiles down to a single fast instruction on 64-bit.
  139. // This doesn't really make sense as operator* because it's not a
  140. // general 128x128 multiply and provides more output than 64x64 multiply.
  141. inline Unsigned128 Multiply64to128(uint64_t a, uint64_t b) {
  142. #ifdef HAVE_UINT128_EXTENSION
  143. return Unsigned128{a} * Unsigned128{b};
  144. #else
  145. // Full decomposition
  146. // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
  147. // -> 128-bit multiplication and optimize it appropriately.
  148. uint64_t tmp = uint64_t{b & 0xffffFFFF} * uint64_t{a & 0xffffFFFF};
  149. uint64_t lower = tmp & 0xffffFFFF;
  150. tmp >>= 32;
  151. tmp += uint64_t{b & 0xffffFFFF} * uint64_t{a >> 32};
  152. // Avoid overflow: first add lower 32 of tmp2, and later upper 32
  153. uint64_t tmp2 = uint64_t{b >> 32} * uint64_t{a & 0xffffFFFF};
  154. tmp += tmp2 & 0xffffFFFF;
  155. lower |= tmp << 32;
  156. tmp >>= 32;
  157. tmp += tmp2 >> 32;
  158. tmp += uint64_t{b >> 32} * uint64_t{a >> 32};
  159. return Unsigned128(lower, tmp);
  160. #endif
  161. }
  162. template <>
  163. inline Unsigned128 BottomNBits(Unsigned128 v, int nbits) {
  164. if (nbits < 64) {
  165. return BottomNBits(Lower64of128(v), nbits);
  166. } else {
  167. return (Unsigned128{BottomNBits(Upper64of128(v), nbits - 64)} << 64) |
  168. Lower64of128(v);
  169. }
  170. }
  171. template <>
  172. inline int FloorLog2(Unsigned128 v) {
  173. if (Upper64of128(v) == 0) {
  174. return FloorLog2(Lower64of128(v));
  175. } else {
  176. return FloorLog2(Upper64of128(v)) + 64;
  177. }
  178. }
  179. template <>
  180. inline int CountTrailingZeroBits(Unsigned128 v) {
  181. if (Lower64of128(v) != 0) {
  182. return CountTrailingZeroBits(Lower64of128(v));
  183. } else {
  184. return CountTrailingZeroBits(Upper64of128(v)) + 64;
  185. }
  186. }
  187. template <>
  188. inline int BitsSetToOne(Unsigned128 v) {
  189. return BitsSetToOne(Lower64of128(v)) + BitsSetToOne(Upper64of128(v));
  190. }
  191. template <>
  192. inline int BitParity(Unsigned128 v) {
  193. return BitParity(Lower64of128(v) ^ Upper64of128(v));
  194. }
  195. template <>
  196. inline Unsigned128 EndianSwapValue(Unsigned128 v) {
  197. return (Unsigned128{EndianSwapValue(Lower64of128(v))} << 64) |
  198. EndianSwapValue(Upper64of128(v));
  199. }
  200. template <>
  201. inline Unsigned128 ReverseBits(Unsigned128 v) {
  202. return (Unsigned128{ReverseBits(Lower64of128(v))} << 64) |
  203. ReverseBits(Upper64of128(v));
  204. }
  205. template <>
  206. inline Unsigned128 DownwardInvolution(Unsigned128 v) {
  207. return (Unsigned128{DownwardInvolution(Upper64of128(v))} << 64) |
  208. DownwardInvolution(Upper64of128(v) ^ Lower64of128(v));
  209. }
  210. template <typename A>
  211. inline std::remove_reference_t<A> BitwiseAnd(A a, Unsigned128 b) {
  212. static_assert(sizeof(A) <= sizeof(Unsigned128));
  213. return static_cast<A>(a & b);
  214. }
  215. template <typename B>
  216. inline std::remove_reference_t<B> BitwiseAnd(Unsigned128 a, B b) {
  217. static_assert(sizeof(B) <= sizeof(Unsigned128));
  218. return static_cast<B>(a & b);
  219. }
  220. template <typename T>
  221. struct IsUnsignedUpTo128
  222. : std::integral_constant<bool, std::is_unsigned<T>::value ||
  223. std::is_same<T, Unsigned128>::value> {};
  224. inline void EncodeFixed128(char* dst, Unsigned128 value) {
  225. EncodeFixed64(dst, Lower64of128(value));
  226. EncodeFixed64(dst + 8, Upper64of128(value));
  227. }
  228. inline Unsigned128 DecodeFixed128(const char* ptr) {
  229. Unsigned128 rv = DecodeFixed64(ptr + 8);
  230. return (rv << 64) | DecodeFixed64(ptr);
  231. }
  232. // A version of EncodeFixed* for generic algorithms. Likely to be used
  233. // with Unsigned128, so lives here for now.
  234. template <typename T>
  235. inline void EncodeFixedGeneric(char* /*dst*/, T /*value*/) {
  236. // Unfortunately, GCC does not appear to optimize this simple code down
  237. // to a trivial load on Intel:
  238. //
  239. // T ret_val = 0;
  240. // for (size_t i = 0; i < sizeof(T); ++i) {
  241. // ret_val |= (static_cast<T>(static_cast<unsigned char>(ptr[i])) << (8 *
  242. // i));
  243. // }
  244. // return ret_val;
  245. //
  246. // But does unroll the loop, and does optimize manually unrolled version
  247. // for specific sizes down to a trivial load. I have no idea why it doesn't
  248. // do both on this code.
  249. // So instead, we rely on specializations
  250. static_assert(sizeof(T) == 0, "No specialization provided for this type");
  251. }
  252. template <>
  253. inline void EncodeFixedGeneric(char* dst, uint16_t value) {
  254. return EncodeFixed16(dst, value);
  255. }
  256. template <>
  257. inline void EncodeFixedGeneric(char* dst, uint32_t value) {
  258. return EncodeFixed32(dst, value);
  259. }
  260. template <>
  261. inline void EncodeFixedGeneric(char* dst, uint64_t value) {
  262. return EncodeFixed64(dst, value);
  263. }
  264. template <>
  265. inline void EncodeFixedGeneric(char* dst, Unsigned128 value) {
  266. return EncodeFixed128(dst, value);
  267. }
  268. // A version of EncodeFixed* for generic algorithms.
  269. template <typename T>
  270. inline T DecodeFixedGeneric(const char* /*dst*/) {
  271. static_assert(sizeof(T) == 0, "No specialization provided for this type");
  272. }
  273. template <>
  274. inline uint16_t DecodeFixedGeneric(const char* dst) {
  275. return DecodeFixed16(dst);
  276. }
  277. template <>
  278. inline uint32_t DecodeFixedGeneric(const char* dst) {
  279. return DecodeFixed32(dst);
  280. }
  281. template <>
  282. inline uint64_t DecodeFixedGeneric(const char* dst) {
  283. return DecodeFixed64(dst);
  284. }
  285. template <>
  286. inline Unsigned128 DecodeFixedGeneric(const char* dst) {
  287. return DecodeFixed128(dst);
  288. }
  289. } // namespace ROCKSDB_NAMESPACE