hash.cc 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <string.h>
  10. #include "util/coding.h"
  11. #include "util/hash.h"
  12. #include "util/util.h"
  13. #include "util/xxhash.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. uint32_t Hash(const char* data, size_t n, uint32_t seed) {
  16. // MurmurHash1 - fast but mediocre quality
  17. // https://github.com/aappleby/smhasher/wiki/MurmurHash1
  18. //
  19. const uint32_t m = 0xc6a4a793;
  20. const uint32_t r = 24;
  21. const char* limit = data + n;
  22. uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
  23. // Pick up four bytes at a time
  24. while (data + 4 <= limit) {
  25. uint32_t w = DecodeFixed32(data);
  26. data += 4;
  27. h += w;
  28. h *= m;
  29. h ^= (h >> 16);
  30. }
  31. // Pick up remaining bytes
  32. switch (limit - data) {
  33. // Note: The original hash implementation used data[i] << shift, which
  34. // promotes the char to int and then performs the shift. If the char is
  35. // negative, the shift is undefined behavior in C++. The hash algorithm is
  36. // part of the format definition, so we cannot change it; to obtain the same
  37. // behavior in a legal way we just cast to uint32_t, which will do
  38. // sign-extension. To guarantee compatibility with architectures where chars
  39. // are unsigned we first cast the char to int8_t.
  40. case 3:
  41. h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
  42. FALLTHROUGH_INTENDED;
  43. case 2:
  44. h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
  45. FALLTHROUGH_INTENDED;
  46. case 1:
  47. h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
  48. h *= m;
  49. h ^= (h >> r);
  50. break;
  51. }
  52. return h;
  53. }
  54. // We are standardizing on a preview release of XXH3, because that's
  55. // the best available at time of standardizing.
  56. //
  57. // In testing (mostly Intel Skylake), this hash function is much more
  58. // thorough than Hash32 and is almost universally faster. Hash() only
  59. // seems faster when passing runtime-sized keys of the same small size
  60. // (less than about 24 bytes) thousands of times in a row; this seems
  61. // to allow the branch predictor to work some magic. XXH3's speed is
  62. // much less dependent on branch prediction.
  63. //
  64. // Hashing with a prefix extractor is potentially a common case of
  65. // hashing objects of small, predictable size. We could consider
  66. // bundling hash functions specialized for particular lengths with
  67. // the prefix extractors.
  68. uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
  69. return XXH3p_64bits_withSeed(data, n, seed);
  70. }
  71. uint64_t Hash64(const char* data, size_t n) {
  72. // Same as seed = 0
  73. return XXH3p_64bits(data, n);
  74. }
  75. } // namespace ROCKSDB_NAMESPACE