dynamic_bloom.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <array>
  7. #include <string>
  8. #include "port/port.h"
  9. #include "rocksdb/slice.h"
  10. #include "table/multiget_context.h"
  11. #include "util/hash.h"
  12. #include <atomic>
  13. #include <memory>
  14. namespace ROCKSDB_NAMESPACE {
  15. class Slice;
  16. class Allocator;
  17. class Logger;
  18. // A Bloom filter intended only to be used in memory, never serialized in a way
  19. // that could lead to schema incompatibility. Supports opt-in lock-free
  20. // concurrent access.
  21. //
  22. // This implementation is also intended for applications generally preferring
  23. // speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
  24. // For 1% FP rate, that means that the latency of a look-up triggered by an FP
  25. // should be less than roughly 100x the cost of a Bloom filter op.
  26. //
  27. // For simplicity and performance, the current implementation requires
  28. // num_probes to be a multiple of two and <= 10.
  29. //
  30. class DynamicBloom {
  31. public:
  32. // allocator: pass allocator to bloom filter, hence trace the usage of memory
  33. // total_bits: fixed total bits for the bloom
  34. // num_probes: number of hash probes for a single key
  35. // hash_func: customized hash function
  36. // huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
  37. // within this page size. Need to reserve huge pages for
  38. // it to be allocated, like:
  39. // sysctl -w vm.nr_hugepages=20
  40. // See linux doc Documentation/vm/hugetlbpage.txt
  41. explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
  42. uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
  43. Logger* logger = nullptr);
  44. ~DynamicBloom() {}
  45. // Assuming single threaded access to this function.
  46. void Add(const Slice& key);
  47. // Like Add, but may be called concurrent with other functions.
  48. void AddConcurrently(const Slice& key);
  49. // Assuming single threaded access to this function.
  50. void AddHash(uint32_t hash);
  51. // Like AddHash, but may be called concurrent with other functions.
  52. void AddHashConcurrently(uint32_t hash);
  53. // Multithreaded access to this function is OK
  54. bool MayContain(const Slice& key) const;
  55. void MayContain(int num_keys, Slice** keys, bool* may_match) const;
  56. // Multithreaded access to this function is OK
  57. bool MayContainHash(uint32_t hash) const;
  58. void Prefetch(uint32_t h);
  59. private:
  60. // Length of the structure, in 64-bit words. For this structure, "word"
  61. // will always refer to 64-bit words.
  62. uint32_t kLen;
  63. // We make the k probes in pairs, two for each 64-bit read/write. Thus,
  64. // this stores k/2, the number of words to double-probe.
  65. const uint32_t kNumDoubleProbes;
  66. std::atomic<uint64_t>* data_;
  67. // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
  68. // concurrency safety, working with bytes.
  69. template <typename OrFunc>
  70. void AddHash(uint32_t hash, const OrFunc& or_func);
  71. bool DoubleProbe(uint32_t h32, size_t a) const;
  72. };
  73. inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
  74. inline void DynamicBloom::AddConcurrently(const Slice& key) {
  75. AddHashConcurrently(BloomHash(key));
  76. }
  77. inline void DynamicBloom::AddHash(uint32_t hash) {
  78. AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
  79. ptr->store(ptr->load(std::memory_order_relaxed) | mask,
  80. std::memory_order_relaxed);
  81. });
  82. }
  83. inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
  84. AddHash(hash, [](std::atomic<uint64_t>* ptr, uint64_t mask) {
  85. // Happens-before between AddHash and MaybeContains is handled by
  86. // access to versions_->LastSequence(), so all we have to do here is
  87. // avoid races (so we don't give the compiler a license to mess up
  88. // our code) and not lose bits. std::memory_order_relaxed is enough
  89. // for that.
  90. if ((mask & ptr->load(std::memory_order_relaxed)) != mask) {
  91. ptr->fetch_or(mask, std::memory_order_relaxed);
  92. }
  93. });
  94. }
  95. inline bool DynamicBloom::MayContain(const Slice& key) const {
  96. return (MayContainHash(BloomHash(key)));
  97. }
  98. inline void DynamicBloom::MayContain(int num_keys, Slice** keys,
  99. bool* may_match) const {
  100. std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
  101. std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
  102. for (int i = 0; i < num_keys; ++i) {
  103. hashes[i] = BloomHash(*keys[i]);
  104. size_t a = fastrange32(kLen, hashes[i]);
  105. PREFETCH(data_ + a, 0, 3);
  106. byte_offsets[i] = a;
  107. }
  108. for (int i = 0; i < num_keys; i++) {
  109. may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
  110. }
  111. }
  112. #if defined(_MSC_VER)
  113. #pragma warning(push)
  114. // local variable is initialized but not referenced
  115. #pragma warning(disable : 4189)
  116. #endif
  117. inline void DynamicBloom::Prefetch(uint32_t h32) {
  118. size_t a = fastrange32(kLen, h32);
  119. PREFETCH(data_ + a, 0, 3);
  120. }
  121. #if defined(_MSC_VER)
  122. #pragma warning(pop)
  123. #endif
  124. // Speed hacks in this implementation:
  125. // * Uses fastrange instead of %
  126. // * Minimum logic to determine first (and all) probed memory addresses.
  127. // (Uses constant bit-xor offsets from the starting probe address.)
  128. // * (Major) Two probes per 64-bit memory fetch/write.
  129. // Code simplification / optimization: only allow even number of probes.
  130. // * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
  131. // least on recent CPUs, integer multiplication is very cheap. Each 64-bit
  132. // remix provides five pairs of bit addresses within a uint64_t.)
  133. // Code simplification / optimization: only allow up to 10 probes, from a
  134. // single 64-bit remix.
  135. //
  136. // The FP rate penalty for this implementation, vs. standard Bloom filter, is
  137. // roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
  138. // This implementation does not explicitly use the cache line size, but is
  139. // effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
  140. //
  141. // NB: could easily be upgraded to support a 64-bit hash and
  142. // total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
  143. // because of false positives.)
  144. inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
  145. size_t a = fastrange32(kLen, h32);
  146. PREFETCH(data_ + a, 0, 3);
  147. return DoubleProbe(h32, a);
  148. }
  149. inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
  150. // Expand/remix with 64-bit golden ratio
  151. uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
  152. for (unsigned i = 0;; ++i) {
  153. // Two bit probes per uint64_t probe
  154. uint64_t mask =
  155. ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
  156. uint64_t val = data_[byte_offset ^ i].load(std::memory_order_relaxed);
  157. if (i + 1 >= kNumDoubleProbes) {
  158. return (val & mask) == mask;
  159. } else if ((val & mask) != mask) {
  160. return false;
  161. }
  162. h = (h >> 12) | (h << 52);
  163. }
  164. }
  165. template <typename OrFunc>
  166. inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
  167. size_t a = fastrange32(kLen, h32);
  168. PREFETCH(data_ + a, 0, 3);
  169. // Expand/remix with 64-bit golden ratio
  170. uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
  171. for (unsigned i = 0;; ++i) {
  172. // Two bit probes per uint64_t probe
  173. uint64_t mask =
  174. ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
  175. or_func(&data_[a ^ i], mask);
  176. if (i + 1 >= kNumDoubleProbes) {
  177. return;
  178. }
  179. h = (h >> 12) | (h << 52);
  180. }
  181. }
  182. } // namespace ROCKSDB_NAMESPACE