fastrange.h 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. // fastrange/FastRange: A faster alternative to % for mapping a hash value
  6. // to an arbitrary range. See https://github.com/lemire/fastrange
  7. //
  8. // Generally recommended are FastRange32 for mapping results of 32-bit
  9. // hash functions and FastRange64 for mapping results of 64-bit hash
  10. // functions. FastRange is less forgiving than % if the input hashes are
  11. // not well distributed over the full range of the type (32 or 64 bits).
  12. //
  13. // Also included is a templated implementation FastRangeGeneric for use
  14. // in generic algorithms, but not otherwise recommended because of
  15. // potential ambiguity. Unlike with %, it is critical to use the right
  16. // FastRange variant for the output size of your hash function.
  17. #pragma once
  18. #include <cstddef>
  19. #include <cstdint>
  20. #include <type_traits>
  21. #include "rocksdb/rocksdb_namespace.h"
  22. #ifdef TEST_UINT128_COMPAT
  23. #undef HAVE_UINT128_EXTENSION
  24. #endif
  25. namespace ROCKSDB_NAMESPACE {
  26. namespace detail {
  27. // Using a class template to support partial specialization
  28. template <typename Hash, typename Range>
  29. struct FastRangeGenericImpl {
  30. // only reach this on no supported specialization
  31. };
  32. template <typename Range>
  33. struct FastRangeGenericImpl<uint32_t, Range> {
  34. static inline Range Fn(uint32_t hash, Range range) {
  35. static_assert(std::is_unsigned<Range>::value, "must be unsigned");
  36. static_assert(sizeof(Range) <= sizeof(uint32_t),
  37. "cannot be larger than hash (32 bits)");
  38. uint64_t product = uint64_t{range} * hash;
  39. return static_cast<Range>(product >> 32);
  40. }
  41. };
  42. template <typename Range>
  43. struct FastRangeGenericImpl<uint64_t, Range> {
  44. static inline Range Fn(uint64_t hash, Range range) {
  45. static_assert(std::is_unsigned<Range>::value, "must be unsigned");
  46. static_assert(sizeof(Range) <= sizeof(uint64_t),
  47. "cannot be larger than hash (64 bits)");
  48. #ifdef HAVE_UINT128_EXTENSION
  49. // Can use compiler's 128-bit type. Trust it to do the right thing.
  50. __uint128_t wide = __uint128_t{range} * hash;
  51. return static_cast<Range>(wide >> 64);
  52. #else
  53. // Fall back: full decomposition.
  54. // NOTE: GCC seems to fully understand this code as 64-bit x 64-bit
  55. // -> 128-bit multiplication and optimize it appropriately
  56. uint64_t range64 = range; // ok to shift by 32, even if Range is 32-bit
  57. uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
  58. tmp >>= 32;
  59. tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
  60. // Avoid overflow: first add lower 32 of tmp2, and later upper 32
  61. uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
  62. tmp += static_cast<uint32_t>(tmp2);
  63. tmp >>= 32;
  64. tmp += (tmp2 >> 32);
  65. tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
  66. return static_cast<Range>(tmp);
  67. #endif
  68. }
  69. };
  70. } // namespace detail
  71. // Now an omnibus templated function (yay parameter inference).
  72. //
  73. // NOTICE:
  74. // This templated version is not recommended for typical use because
  75. // of the potential to mix a 64-bit FastRange with a 32-bit bit hash,
  76. // most likely because you put your 32-bit hash in an "unsigned long"
  77. // which is 64 bits on some platforms. That doesn't really matter for
  78. // an operation like %, but 64-bit FastRange gives extremely bad results,
  79. // mostly zero, on 32-bit hash values. And because good hashing is not
  80. // generally required for correctness, this kind of mistake could go
  81. // unnoticed with just unit tests. Plus it could vary by platform.
  82. template <typename Hash, typename Range>
  83. inline Range FastRangeGeneric(Hash hash, Range range) {
  84. return detail::FastRangeGenericImpl<Hash, Range>::Fn(hash, range);
  85. }
  86. // The most popular / convenient / recommended variants:
  87. // Map a quality 64-bit hash value down to an arbitrary size_t range.
  88. // (size_t is standard for mapping to things in memory.)
  89. inline size_t FastRange64(uint64_t hash, size_t range) {
  90. return FastRangeGeneric(hash, range);
  91. }
  92. // Map a quality 32-bit hash value down to an arbitrary uint32_t range.
  93. inline uint32_t FastRange32(uint32_t hash, uint32_t range) {
  94. return FastRangeGeneric(hash, range);
  95. }
  96. } // namespace ROCKSDB_NAMESPACE