murmurhash.cc 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. /*
  7. Murmurhash from http://sites.google.com/site/murmurhash/
  8. All code is released to the public domain. For business purposes, Murmurhash
  9. is under the MIT license.
  10. */
  11. #include "murmurhash.h"
  12. #include "port/lang.h"
  13. #if defined(__x86_64__)
  14. // -------------------------------------------------------------------
  15. //
  16. // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
  17. // and endian-ness issues if used across multiple platforms.
  18. //
  19. // 64-bit hash for 64-bit platforms
  20. #ifdef ROCKSDB_UBSAN_RUN
  21. #if defined(__clang__)
  22. __attribute__((__no_sanitize__("alignment")))
  23. #elif defined(__GNUC__)
  24. __attribute__((__no_sanitize_undefined__))
  25. #endif
  26. #endif
  27. // clang-format off
  28. uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
  29. {
  30. const uint64_t m = 0xc6a4a7935bd1e995;
  31. const int r = 47;
  32. uint64_t h = seed ^ (len * m);
  33. const uint64_t * data = (const uint64_t *)key;
  34. const uint64_t * end = data + (len/8);
  35. while(data != end)
  36. {
  37. uint64_t k = *data++;
  38. k *= m;
  39. k ^= k >> r;
  40. k *= m;
  41. h ^= k;
  42. h *= m;
  43. }
  44. const unsigned char * data2 = (const unsigned char*)data;
  45. switch(len & 7)
  46. {
  47. case 7: h ^= ((uint64_t)data2[6]) << 48; FALLTHROUGH_INTENDED;
  48. case 6: h ^= ((uint64_t)data2[5]) << 40; FALLTHROUGH_INTENDED;
  49. case 5: h ^= ((uint64_t)data2[4]) << 32; FALLTHROUGH_INTENDED;
  50. case 4: h ^= ((uint64_t)data2[3]) << 24; FALLTHROUGH_INTENDED;
  51. case 3: h ^= ((uint64_t)data2[2]) << 16; FALLTHROUGH_INTENDED;
  52. case 2: h ^= ((uint64_t)data2[1]) << 8; FALLTHROUGH_INTENDED;
  53. case 1: h ^= ((uint64_t)data2[0]);
  54. h *= m;
  55. }
  56. h ^= h >> r;
  57. h *= m;
  58. h ^= h >> r;
  59. return h;
  60. }
  61. // clang-format on
  62. #elif defined(__i386__)
  63. // -------------------------------------------------------------------
  64. //
  65. // Note - This code makes a few assumptions about how your machine behaves -
  66. //
  67. // 1. We can read a 4-byte value from any address without crashing
  68. // 2. sizeof(int) == 4
  69. //
  70. // And it has a few limitations -
  71. //
  72. // 1. It will not work incrementally.
  73. // 2. It will not produce the same results on little-endian and big-endian
  74. // machines.
  75. // clang-format off
  76. unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
  77. {
  78. // 'm' and 'r' are mixing constants generated offline.
  79. // They're not really 'magic', they just happen to work well.
  80. const unsigned int m = 0x5bd1e995;
  81. const int r = 24;
  82. // Initialize the hash to a 'random' value
  83. unsigned int h = seed ^ len;
  84. // Mix 4 bytes at a time into the hash
  85. const unsigned char * data = (const unsigned char *)key;
  86. while(len >= 4)
  87. {
  88. unsigned int k = *(unsigned int *)data;
  89. k *= m;
  90. k ^= k >> r;
  91. k *= m;
  92. h *= m;
  93. h ^= k;
  94. data += 4;
  95. len -= 4;
  96. }
  97. // Handle the last few bytes of the input array
  98. switch(len)
  99. {
  100. case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
  101. case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED;
  102. case 1: h ^= data[0];
  103. h *= m;
  104. };
  105. // Do a few final mixes of the hash to ensure the last few
  106. // bytes are well-incorporated.
  107. h ^= h >> 13;
  108. h *= m;
  109. h ^= h >> 15;
  110. return h;
  111. }
  112. // clang-format on
  113. #else
  114. // -------------------------------------------------------------------
  115. //
  116. // Same as MurmurHash2, but endian- and alignment-neutral.
  117. // Half the speed though, alas.
  118. // clang-format off
  119. unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
  120. {
  121. const unsigned int m = 0x5bd1e995;
  122. const int r = 24;
  123. unsigned int h = seed ^ len;
  124. const unsigned char * data = (const unsigned char *)key;
  125. while(len >= 4)
  126. {
  127. unsigned int k;
  128. k = data[0];
  129. k |= data[1] << 8;
  130. k |= data[2] << 16;
  131. k |= data[3] << 24;
  132. k *= m;
  133. k ^= k >> r;
  134. k *= m;
  135. h *= m;
  136. h ^= k;
  137. data += 4;
  138. len -= 4;
  139. }
  140. switch(len)
  141. {
  142. case 3: h ^= data[2] << 16; FALLTHROUGH_INTENDED;
  143. case 2: h ^= data[1] << 8; FALLTHROUGH_INTENDED;
  144. case 1: h ^= data[0];
  145. h *= m;
  146. };
  147. h ^= h >> 13;
  148. h *= m;
  149. h ^= h >> 15;
  150. return h;
  151. }
  152. // clang-format on
  153. #endif