hash.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. // Common hash functions with convenient interfaces. If hashing a
  11. // statically-sized input in a performance-critical context, consider
  12. // calling a specific hash implementation directly, such as
  13. // XXH3p_64bits from xxhash.h.
  14. //
  15. // Since this is a very common header, implementation details are kept
  16. // out-of-line. Out-of-lining also aids in tracking the time spent in
  17. // hashing functions. Inlining is of limited benefit for runtime-sized
  18. // hash inputs.
  19. #pragma once
  20. #include <stddef.h>
  21. #include <stdint.h>
  22. #include "rocksdb/slice.h"
  23. namespace ROCKSDB_NAMESPACE {
  24. // Stable/persistent 64-bit hash. Higher quality and generally faster than
  25. // Hash(), especially for inputs > 24 bytes.
  26. extern uint64_t Hash64(const char* data, size_t n, uint64_t seed);
  27. // Specific optimization without seed (same as seed = 0)
  28. extern uint64_t Hash64(const char* data, size_t n);
  29. // Non-persistent hash. Must only used for in-memory data structure.
  30. // The hash results are thus applicable to change. (Thus, it rarely makes
  31. // sense to specify a seed for this function.)
  32. inline uint64_t NPHash64(const char* data, size_t n, uint32_t seed) {
  33. // Currently same as Hash64
  34. return Hash64(data, n, seed);
  35. }
  36. // Specific optimization without seed (same as seed = 0)
  37. inline uint64_t NPHash64(const char* data, size_t n) {
  38. // Currently same as Hash64
  39. return Hash64(data, n);
  40. }
  41. // Stable/persistent 32-bit hash. Moderate quality and high speed on
  42. // small inputs.
  43. // TODO: consider rename to Hash32
  44. extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
  45. // TODO: consider rename to LegacyBloomHash32
  46. inline uint32_t BloomHash(const Slice& key) {
  47. return Hash(key.data(), key.size(), 0xbc9f1d34);
  48. }
  49. inline uint64_t GetSliceHash64(const Slice& key) {
  50. return Hash64(key.data(), key.size());
  51. }
  52. inline uint64_t GetSliceNPHash64(const Slice& s) {
  53. return NPHash64(s.data(), s.size());
  54. }
  55. // TODO: consider rename to GetSliceHash32
  56. inline uint32_t GetSliceHash(const Slice& s) {
  57. return Hash(s.data(), s.size(), 397);
  58. }
  59. // Useful for splitting up a 64-bit hash
  60. inline uint32_t Upper32of64(uint64_t v) {
  61. return static_cast<uint32_t>(v >> 32);
  62. }
  63. inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
  64. // std::hash compatible interface.
  65. // TODO: consider rename to SliceHasher32
  66. struct SliceHasher {
  67. uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
  68. };
  69. // An alternative to % for mapping a hash value to an arbitrary range. See
  70. // https://github.com/lemire/fastrange
  71. inline uint32_t fastrange32(uint32_t hash, uint32_t range) {
  72. uint64_t product = uint64_t{range} * hash;
  73. return static_cast<uint32_t>(product >> 32);
  74. }
  75. // An alternative to % for mapping a 64-bit hash value to an arbitrary range
  76. // that fits in size_t. See https://github.com/lemire/fastrange
  77. // We find size_t more convenient than uint64_t for the range, with side
  78. // benefit of better optimization on 32-bit platforms.
  79. inline size_t fastrange64(uint64_t hash, size_t range) {
  80. #if defined(HAVE_UINT128_EXTENSION)
  81. // Can use compiler's 128-bit type. Trust it to do the right thing.
  82. __uint128_t wide = __uint128_t{range} * hash;
  83. return static_cast<size_t>(wide >> 64);
  84. #else
  85. // Fall back: full decomposition.
  86. // NOTE: GCC seems to fully understand this code as 64-bit x {32 or 64}-bit
  87. // -> {96 or 128}-bit multiplication and optimize it down to a single
  88. // wide-result multiplication (64-bit platform) or two wide-result
  89. // multiplications (32-bit platforms, where range64 >> 32 is zero).
  90. uint64_t range64 = range; // ok to shift by 32, even if size_t is 32-bit
  91. uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
  92. tmp >>= 32;
  93. tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
  94. // Avoid overflow: first add lower 32 of tmp2, and later upper 32
  95. uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
  96. tmp += static_cast<uint32_t>(tmp2);
  97. tmp >>= 32;
  98. tmp += (tmp2 >> 32);
  99. tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
  100. return static_cast<size_t>(tmp);
  101. #endif
  102. }
  103. } // namespace ROCKSDB_NAMESPACE