dynamic_bloom.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <array>
  7. #include <atomic>
  8. #include "rocksdb/slice.h"
  9. #include "table/multiget_context.h"
  10. #include "util/atomic.h"
  11. #include "util/hash.h"
  12. namespace ROCKSDB_NAMESPACE {
  13. class Slice;
  14. class Allocator;
  15. class Logger;
  16. // A Bloom filter intended only to be used in memory, never serialized in a way
  17. // that could lead to schema incompatibility. Supports opt-in lock-free
  18. // concurrent access.
  19. //
  20. // This implementation is also intended for applications generally preferring
  21. // speed vs. maximum accuracy: roughly 0.9x BF op latency for 1.1x FP rate.
  22. // For 1% FP rate, that means that the latency of a look-up triggered by an FP
  23. // should be less than roughly 100x the cost of a Bloom filter op.
  24. //
  25. // For simplicity and performance, the current implementation requires
  26. // num_probes to be a multiple of two and <= 10.
  27. //
  28. class DynamicBloom {
  29. public:
  30. // allocator: pass allocator to bloom filter, hence trace the usage of memory
  31. // total_bits: fixed total bits for the bloom
  32. // num_probes: number of hash probes for a single key
  33. // hash_func: customized hash function
  34. // huge_page_tlb_size: if >0, try to allocate bloom bytes from huge page TLB
  35. // within this page size. Need to reserve huge pages for
  36. // it to be allocated, like:
  37. // sysctl -w vm.nr_hugepages=20
  38. // See linux doc Documentation/vm/hugetlbpage.txt
  39. explicit DynamicBloom(Allocator* allocator, uint32_t total_bits,
  40. uint32_t num_probes = 6, size_t huge_page_tlb_size = 0,
  41. Logger* logger = nullptr);
  42. ~DynamicBloom() {}
  43. // Assuming single thread adding to the DynamicBloom
  44. void Add(const Slice& key);
  45. // Like Add, but may be called concurrently with other functions. Does not
  46. // establish happens-before relationship with other functions so requires some
  47. // external mechanism to ensure other threads can see the change.
  48. void AddConcurrently(const Slice& key);
  49. // Assuming single threaded access to this function.
  50. void AddHash(uint32_t hash);
  51. // Like AddHash, but may be called concurrently with other functions. Does not
  52. // establish happens-before relationship with other functions so requires some
  53. // external mechanism to ensure other threads can see the change.
  54. void AddHashConcurrently(uint32_t hash);
  55. // Multithreaded access to this function is OK
  56. bool MayContain(const Slice& key) const;
  57. void MayContain(int num_keys, Slice* keys, bool* may_match) const;
  58. // Multithreaded access to this function is OK
  59. bool MayContainHash(uint32_t hash) const;
  60. void Prefetch(uint32_t h);
  61. private:
  62. // Length of the structure, in 64-bit words. For this structure, "word"
  63. // will always refer to 64-bit words.
  64. uint32_t kLen;
  65. // We make the k probes in pairs, two for each 64-bit read/write. Thus,
  66. // this stores k/2, the number of words to double-probe.
  67. const uint32_t kNumDoubleProbes;
  68. RelaxedAtomic<uint64_t>* data_;
  69. // or_func(ptr, mask) should effect *ptr |= mask with the appropriate
  70. // concurrency safety, working with bytes.
  71. template <typename OrFunc>
  72. void AddHash(uint32_t hash, const OrFunc& or_func);
  73. bool DoubleProbe(uint32_t h32, size_t a) const;
  74. };
  75. inline void DynamicBloom::Add(const Slice& key) { AddHash(BloomHash(key)); }
  76. inline void DynamicBloom::AddConcurrently(const Slice& key) {
  77. AddHashConcurrently(BloomHash(key));
  78. }
  79. inline void DynamicBloom::AddHash(uint32_t hash) {
  80. AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) {
  81. ptr->StoreRelaxed(ptr->LoadRelaxed() | mask);
  82. });
  83. }
  84. inline void DynamicBloom::AddHashConcurrently(uint32_t hash) {
  85. AddHash(hash, [](RelaxedAtomic<uint64_t>* ptr, uint64_t mask) {
  86. // Happens-before between AddHash and MaybeContains is handled by
  87. // access to versions_->LastSequence(), so all we have to do here is
  88. // avoid races (so we don't give the compiler a license to mess up
  89. // our code) and not lose bits. std::memory_order_relaxed is enough
  90. // for that.
  91. if ((mask & ptr->LoadRelaxed()) != mask) {
  92. ptr->FetchOrRelaxed(mask);
  93. }
  94. });
  95. }
  96. inline bool DynamicBloom::MayContain(const Slice& key) const {
  97. return (MayContainHash(BloomHash(key)));
  98. }
  99. inline void DynamicBloom::MayContain(int num_keys, Slice* keys,
  100. bool* may_match) const {
  101. std::array<uint32_t, MultiGetContext::MAX_BATCH_SIZE> hashes;
  102. std::array<size_t, MultiGetContext::MAX_BATCH_SIZE> byte_offsets;
  103. for (int i = 0; i < num_keys; ++i) {
  104. hashes[i] = BloomHash(keys[i]);
  105. size_t a = FastRange32(hashes[i], kLen);
  106. PREFETCH(data_ + a, 0, 3);
  107. byte_offsets[i] = a;
  108. }
  109. for (int i = 0; i < num_keys; i++) {
  110. may_match[i] = DoubleProbe(hashes[i], byte_offsets[i]);
  111. }
  112. }
  113. #if defined(_MSC_VER)
  114. #pragma warning(push)
  115. // local variable is initialized but not referenced
  116. #pragma warning(disable : 4189)
  117. #endif
  118. inline void DynamicBloom::Prefetch(uint32_t h32) {
  119. size_t a = FastRange32(h32, kLen);
  120. PREFETCH(data_ + a, 0, 3);
  121. }
  122. #if defined(_MSC_VER)
  123. #pragma warning(pop)
  124. #endif
  125. // Speed hacks in this implementation:
  126. // * Uses fastrange instead of %
  127. // * Minimum logic to determine first (and all) probed memory addresses.
  128. // (Uses constant bit-xor offsets from the starting probe address.)
  129. // * (Major) Two probes per 64-bit memory fetch/write.
  130. // Code simplification / optimization: only allow even number of probes.
  131. // * Very fast and effective (murmur-like) hash expansion/re-mixing. (At
  132. // least on recent CPUs, integer multiplication is very cheap. Each 64-bit
  133. // remix provides five pairs of bit addresses within a uint64_t.)
  134. // Code simplification / optimization: only allow up to 10 probes, from a
  135. // single 64-bit remix.
  136. //
  137. // The FP rate penalty for this implementation, vs. standard Bloom filter, is
  138. // roughly 1.12x on top of the 1.15x penalty for a 512-bit cache-local Bloom.
  139. // This implementation does not explicitly use the cache line size, but is
  140. // effectively cache-local (up to 16 probes) because of the bit-xor offsetting.
  141. //
  142. // NB: could easily be upgraded to support a 64-bit hash and
  143. // total_bits > 2^32 (512MB). (The latter is a bad idea without the former,
  144. // because of false positives.)
  145. inline bool DynamicBloom::MayContainHash(uint32_t h32) const {
  146. size_t a = FastRange32(h32, kLen);
  147. PREFETCH(data_ + a, 0, 3);
  148. return DoubleProbe(h32, a);
  149. }
  150. inline bool DynamicBloom::DoubleProbe(uint32_t h32, size_t byte_offset) const {
  151. // Expand/remix with 64-bit golden ratio
  152. uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
  153. for (unsigned i = 0;; ++i) {
  154. // Two bit probes per uint64_t probe
  155. uint64_t mask =
  156. ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
  157. uint64_t val = data_[byte_offset ^ i].LoadRelaxed();
  158. if (i + 1 >= kNumDoubleProbes) {
  159. return (val & mask) == mask;
  160. } else if ((val & mask) != mask) {
  161. return false;
  162. }
  163. h = (h >> 12) | (h << 52);
  164. }
  165. }
  166. template <typename OrFunc>
  167. inline void DynamicBloom::AddHash(uint32_t h32, const OrFunc& or_func) {
  168. size_t a = FastRange32(h32, kLen);
  169. PREFETCH(data_ + a, 0, 3);
  170. // Expand/remix with 64-bit golden ratio
  171. uint64_t h = 0x9e3779b97f4a7c13ULL * h32;
  172. for (unsigned i = 0;; ++i) {
  173. // Two bit probes per uint64_t probe
  174. uint64_t mask =
  175. ((uint64_t)1 << (h & 63)) | ((uint64_t)1 << ((h >> 6) & 63));
  176. or_func(&data_[a ^ i], mask);
  177. if (i + 1 >= kNumDoubleProbes) {
  178. return;
  179. }
  180. h = (h >> 12) | (h << 52);
  181. }
  182. }
  183. } // namespace ROCKSDB_NAMESPACE