hash.h 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. // Common hash functions with convenient interfaces. If hashing a
  11. // statically-sized input in a performance-critical context, consider
  12. // calling a specific hash implementation directly, such as
  13. // XXH3_64bits from xxhash.h.
  14. //
  15. // Since this is a very common header, implementation details are kept
  16. // out-of-line. Out-of-lining also aids in tracking the time spent in
  17. // hashing functions. Inlining is of limited benefit for runtime-sized
  18. // hash inputs.
  19. #pragma once
  20. #include <cstddef>
  21. #include <cstdint>
  22. #include "rocksdb/slice.h"
  23. #include "util/fastrange.h"
  24. namespace ROCKSDB_NAMESPACE {
  25. // Stable/persistent 64-bit hash. Higher quality and generally faster than
  26. // Hash(), especially for inputs > 24 bytes.
  27. // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
  28. // results from previous seed. Recommend incrementing by a large odd number.
  29. uint64_t Hash64(const char* data, size_t n, uint64_t seed);
  30. // Specific optimization without seed (same as seed = 0)
  31. uint64_t Hash64(const char* data, size_t n);
  32. // Non-persistent hash. Must only used for in-memory data structures.
  33. // The hash results are thus subject to change between releases,
  34. // architectures, build configuration, etc. (Thus, it rarely makes sense
  35. // to specify a seed for this function, except for a "rolling" hash.)
  36. // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
  37. // results from previous seed. Recommend incrementing by a large odd number.
  38. inline uint64_t NPHash64(const char* data, size_t n, uint64_t seed) {
  39. #ifdef ROCKSDB_MODIFY_NPHASH
  40. // For testing "subject to change"
  41. return Hash64(data, n, seed + 123456789);
  42. #else
  43. // Currently same as Hash64
  44. return Hash64(data, n, seed);
  45. #endif
  46. }
  47. // Specific optimization without seed (same as seed = 0)
  48. inline uint64_t NPHash64(const char* data, size_t n) {
  49. #ifdef ROCKSDB_MODIFY_NPHASH
  50. // For testing "subject to change"
  51. return Hash64(data, n, 123456789);
  52. #else
  53. // Currently same as Hash64
  54. return Hash64(data, n);
  55. #endif
  56. }
  57. // Convenient and equivalent version of Hash128 without depending on 128-bit
  58. // scalars
  59. void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64);
  60. void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
  61. uint64_t* low64);
  62. // Hash 128 bits to 128 bits, guaranteed not to lose data (equivalent to
  63. // Hash2x64 on 16 bytes little endian)
  64. void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
  65. uint64_t* out_high64, uint64_t* out_low64);
  66. void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
  67. uint64_t* out_high64, uint64_t* out_low64);
  68. // Inverse of above (mostly for testing)
  69. void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
  70. uint64_t* out_high64, uint64_t* out_low64);
  71. void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
  72. uint64_t* out_high64, uint64_t* out_low64);
  73. // Stable/persistent 32-bit hash. Moderate quality and high speed on
  74. // small inputs.
  75. // TODO: consider rename to Hash32
  76. // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
  77. // results from previous seed. Recommend pseudorandom or hashed seeds.
  78. uint32_t Hash(const char* data, size_t n, uint32_t seed);
  79. // TODO: consider rename to LegacyBloomHash32
  80. inline uint32_t BloomHash(const Slice& key) {
  81. return Hash(key.data(), key.size(), 0xbc9f1d34);
  82. }
  83. inline uint64_t GetSliceHash64(const Slice& key) {
  84. return Hash64(key.data(), key.size());
  85. }
  86. // Provided for convenience for use with template argument deduction, where a
  87. // specific overload needs to be used.
  88. extern uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&);
  89. inline uint64_t GetSliceNPHash64(const Slice& s) {
  90. return NPHash64(s.data(), s.size());
  91. }
  92. inline uint64_t GetSliceNPHash64(const Slice& s, uint64_t seed) {
  93. return NPHash64(s.data(), s.size(), seed);
  94. }
  95. // Similar to `GetSliceNPHash64()` with `seed`, but input comes from
  96. // concatenation of `Slice`s in `data`.
  97. uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed);
  98. inline size_t GetSliceRangedNPHash(const Slice& s, size_t range) {
  99. return FastRange64(NPHash64(s.data(), s.size()), range);
  100. }
  101. // TODO: consider rename to GetSliceHash32
  102. inline uint32_t GetSliceHash(const Slice& s) {
  103. return Hash(s.data(), s.size(), 397);
  104. }
  105. // Useful for splitting up a 64-bit hash
  106. inline uint32_t Upper32of64(uint64_t v) {
  107. return static_cast<uint32_t>(v >> 32);
  108. }
  109. inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
  110. // std::hash-like interface.
  111. struct SliceHasher32 {
  112. uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
  113. };
  114. struct SliceNPHasher64 {
  115. uint64_t operator()(const Slice& s, uint64_t seed = 0) const {
  116. return GetSliceNPHash64(s, seed);
  117. }
  118. };
  119. } // namespace ROCKSDB_NAMESPACE