math.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <assert.h>
  7. #ifdef _MSC_VER
  8. #include <intrin.h>
  9. #endif
  10. #ifdef __BMI2__
  11. #include <immintrin.h>
  12. #endif
  13. #include <cstdint>
  14. #include <type_traits>
  15. #include "port/lang.h"
  16. #include "rocksdb/rocksdb_namespace.h"
  17. ASSERT_FEATURE_COMPAT_HEADER();
  18. namespace ROCKSDB_NAMESPACE {
  19. // Fast implementation of extracting the bottom n bits of an integer.
  20. // To ensure fast implementation, undefined if n bits is full width or more.
  21. template <typename T>
  22. inline T BottomNBits(T v, int nbits) {
  23. static_assert(std::is_integral_v<T>, "non-integral type");
  24. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  25. assert(nbits >= 0);
  26. assert(nbits < int{8 * sizeof(T)});
  27. #ifdef __BMI2__
  28. if constexpr (sizeof(T) <= 4) {
  29. return static_cast<T>(_bzhi_u32(static_cast<uint32_t>(v), nbits));
  30. }
  31. if constexpr (sizeof(T) <= 8) {
  32. return static_cast<T>(_bzhi_u64(static_cast<uint64_t>(v), nbits));
  33. }
  34. #endif
  35. // Newer compilers compile this down to bzhi on x86, but some older
  36. // ones don't, thus the need for the intrinsic above.
  37. return static_cast<T>(v & ((T{1} << nbits) - 1));
  38. }
  39. // Fast implementation of floor(log2(v)). Undefined for 0 or negative
  40. // numbers (in case of signed type).
  41. template <typename T>
  42. inline int FloorLog2(T v) {
  43. static_assert(std::is_integral_v<T>, "non-integral type");
  44. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  45. assert(v > 0);
  46. #ifdef _MSC_VER
  47. static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
  48. unsigned long idx = 0;
  49. if (sizeof(T) <= sizeof(uint32_t)) {
  50. _BitScanReverse(&idx, static_cast<uint32_t>(v));
  51. } else {
  52. #if defined(_M_X64) || defined(_M_ARM64)
  53. _BitScanReverse64(&idx, static_cast<uint64_t>(v));
  54. #else
  55. const auto vh = static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32);
  56. if (vh != 0) {
  57. _BitScanReverse(&idx, static_cast<uint32_t>(vh));
  58. idx += 32;
  59. } else {
  60. _BitScanReverse(&idx, static_cast<uint32_t>(v));
  61. }
  62. #endif
  63. }
  64. return idx;
  65. #else
  66. static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  67. if (sizeof(T) <= sizeof(unsigned int)) {
  68. int lz = __builtin_clz(static_cast<unsigned int>(v));
  69. return int{sizeof(unsigned int)} * 8 - 1 - lz;
  70. } else if (sizeof(T) <= sizeof(unsigned long)) {
  71. int lz = __builtin_clzl(static_cast<unsigned long>(v));
  72. return int{sizeof(unsigned long)} * 8 - 1 - lz;
  73. } else {
  74. int lz = __builtin_clzll(static_cast<unsigned long long>(v));
  75. return int{sizeof(unsigned long long)} * 8 - 1 - lz;
  76. }
  77. #endif
  78. }
  79. // Constexpr version of FloorLog2
  80. template <typename T>
  81. constexpr int ConstexprFloorLog2(T v) {
  82. // NOTE: not checking is_integral so that this works with Unsigned128
  83. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  84. int rv = 0;
  85. while (v > T{1}) {
  86. ++rv;
  87. v >>= 1;
  88. }
  89. return rv;
  90. }
  91. // Number of low-order zero bits before the first 1 bit. Undefined for 0.
  92. template <typename T>
  93. inline int CountTrailingZeroBits(T v) {
  94. static_assert(std::is_integral_v<T>, "non-integral type");
  95. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  96. assert(v != 0);
  97. #ifdef _MSC_VER
  98. static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
  99. unsigned long tz = 0;
  100. if (sizeof(T) <= sizeof(uint32_t)) {
  101. _BitScanForward(&tz, static_cast<uint32_t>(v));
  102. } else {
  103. #if defined(_M_X64) || defined(_M_ARM64)
  104. _BitScanForward64(&tz, static_cast<uint64_t>(v));
  105. #else
  106. _BitScanForward(&tz, static_cast<uint32_t>(v));
  107. if (tz == 0) {
  108. _BitScanForward(&tz,
  109. static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32));
  110. tz += 32;
  111. }
  112. #endif
  113. }
  114. return static_cast<int>(tz);
  115. #else
  116. static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  117. if (sizeof(T) <= sizeof(unsigned int)) {
  118. return __builtin_ctz(static_cast<unsigned int>(v));
  119. } else if (sizeof(T) <= sizeof(unsigned long)) {
  120. return __builtin_ctzl(static_cast<unsigned long>(v));
  121. } else {
  122. return __builtin_ctzll(static_cast<unsigned long long>(v));
  123. }
  124. #endif
  125. }
  126. // Not all MSVC compile settings will use `BitsSetToOneFallback()`. We include
  127. // the following code at coarse granularity for simpler macros. It's important
  128. // to exclude at least so our non-MSVC unit test coverage tool doesn't see it.
  129. #ifdef _MSC_VER
  130. namespace detail {
  131. template <typename T>
  132. int BitsSetToOneFallback(T v) {
  133. static_assert(std::is_integral_v<T>, "non-integral type");
  134. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  135. const int kBits = static_cast<int>(sizeof(T)) * 8;
  136. static_assert((kBits & (kBits - 1)) == 0, "must be power of two bits");
  137. // we static_cast these bit patterns in order to truncate them to the correct
  138. // size. Warning C4309 dislikes this technique, so disable it here.
  139. #pragma warning(disable : 4309)
  140. v = static_cast<T>(v - ((v >> 1) & static_cast<T>(0x5555555555555555ull)));
  141. v = static_cast<T>((v & static_cast<T>(0x3333333333333333ull)) +
  142. ((v >> 2) & static_cast<T>(0x3333333333333333ull)));
  143. v = static_cast<T>((v + (v >> 4)) & static_cast<T>(0x0F0F0F0F0F0F0F0Full));
  144. #pragma warning(default : 4309)
  145. for (int shift_bits = 8; shift_bits < kBits; shift_bits <<= 1) {
  146. v += static_cast<T>(v >> shift_bits);
  147. }
  148. // we want the bottom "slot" that's big enough to represent a value up to
  149. // (and including) kBits.
  150. return static_cast<int>(v & static_cast<T>(kBits | (kBits - 1)));
  151. }
  152. } // namespace detail
  153. #endif // _MSC_VER
  154. // Number of bits set to 1. Also known as "population count".
  155. template <typename T>
  156. inline int BitsSetToOne(T v) {
  157. static_assert(std::is_integral_v<T>, "non-integral type");
  158. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  159. #ifdef _MSC_VER
  160. static_assert(sizeof(T) <= sizeof(uint64_t), "type too big");
  161. if (sizeof(T) < sizeof(uint32_t)) {
  162. // This bit mask is to avoid a compiler warning on unused path
  163. constexpr auto mm = 8 * sizeof(uint32_t) - 1;
  164. // The bit mask is to neutralize sign extension on small signed types
  165. constexpr uint32_t m = (uint32_t{1} << ((8 * sizeof(T)) & mm)) - 1;
  166. #if __POPCNT__
  167. return static_cast<int>(__popcnt(static_cast<uint32_t>(v) & m));
  168. #else
  169. return static_cast<int>(detail::BitsSetToOneFallback(v) & m);
  170. #endif // __POPCNT__
  171. } else if (sizeof(T) == sizeof(uint32_t)) {
  172. #if __POPCNT__
  173. return static_cast<int>(__popcnt(static_cast<uint32_t>(v)));
  174. #else
  175. return detail::BitsSetToOneFallback(static_cast<uint32_t>(v));
  176. #endif // __POPCNT__
  177. } else {
  178. #if __POPCNT__
  179. #ifdef _M_X64
  180. return static_cast<int>(__popcnt64(static_cast<uint64_t>(v)));
  181. #else
  182. return static_cast<int>(
  183. __popcnt(static_cast<uint32_t>(static_cast<uint64_t>(v) >> 32) +
  184. __popcnt(static_cast<uint32_t>(v))));
  185. #endif // _M_X64
  186. #else
  187. return detail::BitsSetToOneFallback(static_cast<uint64_t>(v));
  188. #endif // __POPCNT__
  189. }
  190. #else
  191. static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  192. if (sizeof(T) < sizeof(unsigned int)) {
  193. // This bit mask is to avoid a compiler warning on unused path
  194. constexpr auto mm = 8 * sizeof(unsigned int) - 1;
  195. // This bit mask is to neutralize sign extension on small signed types
  196. constexpr unsigned int m = (1U << ((8 * sizeof(T)) & mm)) - 1;
  197. return __builtin_popcount(static_cast<unsigned int>(v) & m);
  198. } else if (sizeof(T) == sizeof(unsigned int)) {
  199. return __builtin_popcount(static_cast<unsigned int>(v));
  200. } else if (sizeof(T) <= sizeof(unsigned long)) {
  201. return __builtin_popcountl(static_cast<unsigned long>(v));
  202. } else {
  203. return __builtin_popcountll(static_cast<unsigned long long>(v));
  204. }
  205. #endif
  206. }
  207. template <typename T>
  208. inline int BitParity(T v) {
  209. static_assert(std::is_integral_v<T>, "non-integral type");
  210. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  211. #ifdef _MSC_VER
  212. // bit parity == oddness of popcount
  213. return BitsSetToOne(v) & 1;
  214. #else
  215. static_assert(sizeof(T) <= sizeof(unsigned long long), "type too big");
  216. if (sizeof(T) <= sizeof(unsigned int)) {
  217. // On any sane systen, potential sign extension here won't change parity
  218. return __builtin_parity(static_cast<unsigned int>(v));
  219. } else if (sizeof(T) <= sizeof(unsigned long)) {
  220. return __builtin_parityl(static_cast<unsigned long>(v));
  221. } else {
  222. return __builtin_parityll(static_cast<unsigned long long>(v));
  223. }
  224. #endif
  225. }
  226. // Swaps between big and little endian. Can be used in combination with the
  227. // little-endian encoding/decoding functions in coding_lean.h and coding.h to
  228. // encode/decode big endian.
  229. template <typename T>
  230. inline T EndianSwapValue(T v) {
  231. static_assert(std::is_integral_v<T>, "non-integral type");
  232. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  233. #ifdef _MSC_VER
  234. if (sizeof(T) == 2) {
  235. return static_cast<T>(_byteswap_ushort(static_cast<uint16_t>(v)));
  236. } else if (sizeof(T) == 4) {
  237. return static_cast<T>(_byteswap_ulong(static_cast<uint32_t>(v)));
  238. } else if (sizeof(T) == 8) {
  239. return static_cast<T>(_byteswap_uint64(static_cast<uint64_t>(v)));
  240. }
  241. #else
  242. if (sizeof(T) == 2) {
  243. return static_cast<T>(__builtin_bswap16(static_cast<uint16_t>(v)));
  244. } else if (sizeof(T) == 4) {
  245. return static_cast<T>(__builtin_bswap32(static_cast<uint32_t>(v)));
  246. } else if (sizeof(T) == 8) {
  247. return static_cast<T>(__builtin_bswap64(static_cast<uint64_t>(v)));
  248. }
  249. #endif
  250. // Recognized by clang as bswap, but not by gcc :(
  251. T ret_val = 0;
  252. for (std::size_t i = 0; i < sizeof(T); ++i) {
  253. ret_val |= ((v >> (8 * i)) & 0xff) << (8 * (sizeof(T) - 1 - i));
  254. }
  255. return ret_val;
  256. }
  257. // Reverses the order of bits in an integral value
  258. template <typename T>
  259. inline T ReverseBits(T v) {
  260. static_assert(std::is_integral_v<T>, "non-integral type");
  261. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  262. T r = EndianSwapValue(v);
  263. const T kHighestByte = T{1} << ((sizeof(T) - 1) * 8);
  264. const T kEveryByte = kHighestByte | (kHighestByte / 255);
  265. r = ((r & (kEveryByte * 0x0f)) << 4) | ((r >> 4) & (kEveryByte * 0x0f));
  266. r = ((r & (kEveryByte * 0x33)) << 2) | ((r >> 2) & (kEveryByte * 0x33));
  267. r = ((r & (kEveryByte * 0x55)) << 1) | ((r >> 1) & (kEveryByte * 0x55));
  268. return r;
  269. }
  270. // Every output bit depends on many input bits in the same and higher
  271. // positions, but not lower positions. Specifically, this function
  272. // * Output highest bit set to 1 is same as input (same FloorLog2, or
  273. // equivalently, same number of leading zeros)
  274. // * Is its own inverse (an involution)
  275. // * Guarantees that b bottom bits of v and c bottom bits of
  276. // DownwardInvolution(v) uniquely identify b + c bottom bits of v
  277. // (which is all of v if v < 2**(b + c)).
  278. // ** A notable special case is that modifying c adjacent bits at
  279. // some chosen position in the input is bijective with the bottom c
  280. // output bits.
  281. // * Distributes over xor, as in DI(a ^ b) == DI(a) ^ DI(b)
  282. //
  283. // This transformation is equivalent to a matrix*vector multiplication in
  284. // GF(2) where the matrix is recursively defined by the pattern matrix
  285. // P = | 1 1 |
  286. // | 0 1 |
  287. // and replacing 1's with P and 0's with 2x2 zero matices to some depth,
  288. // e.g. depth of 6 for 64-bit T. An essential feature of this matrix
  289. // is that all square sub-matrices that include the top row are invertible.
  290. template <typename T>
  291. inline T DownwardInvolution(T v) {
  292. static_assert(std::is_integral_v<T>, "non-integral type");
  293. static_assert(!std::is_reference_v<T>, "use std::remove_reference_t");
  294. static_assert(sizeof(T) <= 8, "only supported up to 64 bits");
  295. uint64_t r = static_cast<uint64_t>(v);
  296. if constexpr (sizeof(T) > 4) {
  297. r ^= r >> 32;
  298. }
  299. if constexpr (sizeof(T) > 2) {
  300. r ^= (r & 0xffff0000ffff0000U) >> 16;
  301. }
  302. if constexpr (sizeof(T) > 1) {
  303. r ^= (r & 0xff00ff00ff00ff00U) >> 8;
  304. }
  305. r ^= (r & 0xf0f0f0f0f0f0f0f0U) >> 4;
  306. r ^= (r & 0xccccccccccccccccU) >> 2;
  307. r ^= (r & 0xaaaaaaaaaaaaaaaaU) >> 1;
  308. return static_cast<T>(r);
  309. }
  310. // Bitwise-And with typing that allows you to avoid writing an explicit cast
  311. // to the smaller type, or the type of the right parameter if same size.
  312. template <typename A, typename B>
  313. inline std::conditional_t<sizeof(A) < sizeof(B), A, B> BitwiseAnd(A a, B b) {
  314. static_assert(std::is_integral_v<A>, "non-integral type");
  315. static_assert(std::is_integral_v<B>, "non-integral type");
  316. static_assert(!std::is_reference_v<A>, "use std::remove_reference_t");
  317. static_assert(!std::is_reference_v<B>, "use std::remove_reference_t");
  318. using Smaller = std::conditional_t<sizeof(A) < sizeof(B), A, B>;
  319. return static_cast<Smaller>(a & b);
  320. }
  321. } // namespace ROCKSDB_NAMESPACE