bloom_test.cc 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #ifndef GFLAGS
  10. #include <cstdio>
  11. int main() {
  12. fprintf(stderr, "Please install gflags to run this test... Skipping...\n");
  13. return 0;
  14. }
  15. #else
  16. #include <array>
  17. #include <cmath>
  18. #include <vector>
  19. #include "cache/cache_entry_roles.h"
  20. #include "cache/cache_reservation_manager.h"
  21. #include "memory/arena.h"
  22. #include "port/jemalloc_helper.h"
  23. #include "rocksdb/convenience.h"
  24. #include "rocksdb/filter_policy.h"
  25. #include "table/block_based/filter_policy_internal.h"
  26. #include "test_util/testharness.h"
  27. #include "test_util/testutil.h"
  28. #include "util/gflags_compat.h"
  29. #include "util/hash.h"
  30. using GFLAGS_NAMESPACE::ParseCommandLineFlags;
  31. // The test is not fully designed for bits_per_key other than 10, but with
  32. // this parameter you can easily explore the behavior of other bits_per_key.
  33. // See also filter_bench.
  34. DEFINE_int32(bits_per_key, 10, "");
  35. namespace ROCKSDB_NAMESPACE {
  36. namespace {
  37. const std::string kLegacyBloom = test::LegacyBloomFilterPolicy::kClassName();
  38. const std::string kFastLocalBloom =
  39. test::FastLocalBloomFilterPolicy::kClassName();
  40. const std::string kStandard128Ribbon =
  41. test::Standard128RibbonFilterPolicy::kClassName();
  42. } // namespace
  43. static const int kVerbose = 1;
  44. static Slice Key(int i, char* buffer) {
  45. std::string s;
  46. PutFixed32(&s, static_cast<uint32_t>(i));
  47. memcpy(buffer, s.c_str(), sizeof(i));
  48. return Slice(buffer, sizeof(i));
  49. }
  50. static int NextLength(int length) {
  51. if (length < 10) {
  52. length += 1;
  53. } else if (length < 100) {
  54. length += 10;
  55. } else if (length < 1000) {
  56. length += 100;
  57. } else {
  58. length += 1000;
  59. }
  60. return length;
  61. }
  62. class FullBloomTest : public testing::TestWithParam<std::string> {
  63. protected:
  64. BlockBasedTableOptions table_options_;
  65. private:
  66. std::shared_ptr<const FilterPolicy>& policy_;
  67. std::unique_ptr<FilterBitsBuilder> bits_builder_;
  68. std::unique_ptr<FilterBitsReader> bits_reader_;
  69. std::unique_ptr<const char[]> buf_;
  70. size_t filter_size_;
  71. public:
  72. FullBloomTest() : policy_(table_options_.filter_policy), filter_size_(0) {
  73. ResetPolicy();
  74. }
  75. BuiltinFilterBitsBuilder* GetBuiltinFilterBitsBuilder() {
  76. // Throws on bad cast
  77. return dynamic_cast<BuiltinFilterBitsBuilder*>(bits_builder_.get());
  78. }
  79. const BloomLikeFilterPolicy* GetBloomLikeFilterPolicy() {
  80. // Throws on bad cast
  81. return &dynamic_cast<const BloomLikeFilterPolicy&>(*policy_);
  82. }
  83. void Reset() {
  84. bits_builder_.reset(BloomFilterPolicy::GetBuilderFromContext(
  85. FilterBuildingContext(table_options_)));
  86. bits_reader_.reset(nullptr);
  87. buf_.reset(nullptr);
  88. filter_size_ = 0;
  89. }
  90. void ResetPolicy(double bits_per_key) {
  91. policy_ = BloomLikeFilterPolicy::Create(GetParam(), bits_per_key);
  92. Reset();
  93. }
  94. void ResetPolicy() { ResetPolicy(FLAGS_bits_per_key); }
  95. void Add(const Slice& s) { bits_builder_->AddKey(s); }
  96. void OpenRaw(const Slice& s) {
  97. bits_reader_.reset(policy_->GetFilterBitsReader(s));
  98. }
  99. void Build() {
  100. Slice filter = bits_builder_->Finish(&buf_);
  101. bits_reader_.reset(policy_->GetFilterBitsReader(filter));
  102. filter_size_ = filter.size();
  103. }
  104. size_t FilterSize() const { return filter_size_; }
  105. Slice FilterData() { return Slice(buf_.get(), filter_size_); }
  106. int GetNumProbesFromFilterData() {
  107. assert(filter_size_ >= 5);
  108. int8_t raw_num_probes = static_cast<int8_t>(buf_.get()[filter_size_ - 5]);
  109. if (raw_num_probes == -1) { // New bloom filter marker
  110. return static_cast<uint8_t>(buf_.get()[filter_size_ - 3]);
  111. } else {
  112. return raw_num_probes;
  113. }
  114. }
  115. int GetRibbonSeedFromFilterData() {
  116. assert(filter_size_ >= 5);
  117. // Check for ribbon marker
  118. assert(-2 == static_cast<int8_t>(buf_.get()[filter_size_ - 5]));
  119. return static_cast<uint8_t>(buf_.get()[filter_size_ - 4]);
  120. }
  121. bool Matches(const Slice& s) {
  122. if (bits_reader_ == nullptr) {
  123. Build();
  124. }
  125. return bits_reader_->MayMatch(s);
  126. }
  127. // Provides a kind of fingerprint on the Bloom filter's
  128. // behavior, for reasonbly high FP rates.
  129. uint64_t PackedMatches() {
  130. char buffer[sizeof(int)];
  131. uint64_t result = 0;
  132. for (int i = 0; i < 64; i++) {
  133. if (Matches(Key(i + 12345, buffer))) {
  134. result |= uint64_t{1} << i;
  135. }
  136. }
  137. return result;
  138. }
  139. // Provides a kind of fingerprint on the Bloom filter's
  140. // behavior, for lower FP rates.
  141. std::string FirstFPs(int count) {
  142. char buffer[sizeof(int)];
  143. std::string rv;
  144. int fp_count = 0;
  145. for (int i = 0; i < 1000000; i++) {
  146. // Pack four match booleans into each hexadecimal digit
  147. if (Matches(Key(i + 1000000, buffer))) {
  148. ++fp_count;
  149. rv += std::to_string(i);
  150. if (fp_count == count) {
  151. break;
  152. }
  153. rv += ',';
  154. }
  155. }
  156. return rv;
  157. }
  158. double FalsePositiveRate() {
  159. char buffer[sizeof(int)];
  160. int result = 0;
  161. for (int i = 0; i < 10000; i++) {
  162. if (Matches(Key(i + 1000000000, buffer))) {
  163. result++;
  164. }
  165. }
  166. return result / 10000.0;
  167. }
  168. };
  169. TEST_P(FullBloomTest, FilterSize) {
  170. // In addition to checking the consistency of space computation, we are
  171. // checking that denoted and computed doubles are interpreted as expected
  172. // as bits_per_key values.
  173. bool some_computed_less_than_denoted = false;
  174. // Note: to avoid unproductive configurations, bits_per_key < 0.5 is rounded
  175. // down to 0 (no filter), and 0.5 <= bits_per_key < 1.0 is rounded up to 1
  176. // bit per key (1000 millibits). Also, enforced maximum is 100 bits per key
  177. // (100000 millibits).
  178. for (auto bpk : std::vector<std::pair<double, int> >{{-HUGE_VAL, 0},
  179. {-INFINITY, 0},
  180. {0.0, 0},
  181. {0.499, 0},
  182. {0.5, 1000},
  183. {1.234, 1234},
  184. {3.456, 3456},
  185. {9.5, 9500},
  186. {10.0, 10000},
  187. {10.499, 10499},
  188. {21.345, 21345},
  189. {99.999, 99999},
  190. {1234.0, 100000},
  191. {HUGE_VAL, 100000},
  192. {INFINITY, 100000},
  193. {NAN, 100000}}) {
  194. ResetPolicy(bpk.first);
  195. auto bfp = GetBloomLikeFilterPolicy();
  196. EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
  197. EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
  198. double computed = bpk.first;
  199. // This transforms e.g. 9.5 -> 9.499999999999998, which we still
  200. // round to 10 for whole bits per key.
  201. computed += 0.5;
  202. computed /= 1234567.0;
  203. computed *= 1234567.0;
  204. computed -= 0.5;
  205. some_computed_less_than_denoted |= (computed < bpk.first);
  206. ResetPolicy(computed);
  207. bfp = GetBloomLikeFilterPolicy();
  208. EXPECT_EQ(bpk.second, bfp->GetMillibitsPerKey());
  209. EXPECT_EQ((bpk.second + 500) / 1000, bfp->GetWholeBitsPerKey());
  210. auto bits_builder = GetBuiltinFilterBitsBuilder();
  211. if (bpk.second == 0) {
  212. ASSERT_EQ(bits_builder, nullptr);
  213. continue;
  214. }
  215. size_t n = 1;
  216. size_t space = 0;
  217. for (; n < 1000000; n += 1 + n / 1000) {
  218. // Ensure consistency between CalculateSpace and ApproximateNumEntries
  219. space = bits_builder->CalculateSpace(n);
  220. size_t n2 = bits_builder->ApproximateNumEntries(space);
  221. EXPECT_GE(n2, n);
  222. size_t space2 = bits_builder->CalculateSpace(n2);
  223. if (n > 12000 && GetParam() == kStandard128Ribbon) {
  224. // TODO(peterd): better approximation?
  225. EXPECT_GE(space2, space);
  226. EXPECT_LE(space2 * 0.998, space * 1.0);
  227. } else {
  228. EXPECT_EQ(space2, space);
  229. }
  230. }
  231. // Until size_t overflow
  232. for (; n < (n + n / 3); n += n / 3) {
  233. // Ensure space computation is not overflowing; capped is OK
  234. size_t space2 = bits_builder->CalculateSpace(n);
  235. EXPECT_GE(space2, space);
  236. space = space2;
  237. }
  238. }
  239. // Check that the compiler hasn't optimized our computation into nothing
  240. EXPECT_TRUE(some_computed_less_than_denoted);
  241. ResetPolicy();
  242. }
  243. TEST_P(FullBloomTest, FullEmptyFilter) {
  244. // Empty filter is not match, at this level
  245. ASSERT_TRUE(!Matches("hello"));
  246. ASSERT_TRUE(!Matches("world"));
  247. }
  248. TEST_P(FullBloomTest, FullSmall) {
  249. Add("hello");
  250. Add("world");
  251. ASSERT_TRUE(Matches("hello"));
  252. ASSERT_TRUE(Matches("world"));
  253. ASSERT_TRUE(!Matches("x"));
  254. ASSERT_TRUE(!Matches("foo"));
  255. }
  256. TEST_P(FullBloomTest, FullVaryingLengths) {
  257. // Match how this test was originally built
  258. table_options_.optimize_filters_for_memory = false;
  259. char buffer[sizeof(int)];
  260. // Count number of filters that significantly exceed the false positive rate
  261. int mediocre_filters = 0;
  262. int good_filters = 0;
  263. for (int length = 1; length <= 10000; length = NextLength(length)) {
  264. Reset();
  265. for (int i = 0; i < length; i++) {
  266. Add(Key(i, buffer));
  267. }
  268. Build();
  269. EXPECT_LE(FilterSize(), (size_t)((length * FLAGS_bits_per_key / 8) +
  270. CACHE_LINE_SIZE * 2 + 5));
  271. // All added keys must match
  272. for (int i = 0; i < length; i++) {
  273. ASSERT_TRUE(Matches(Key(i, buffer)))
  274. << "Length " << length << "; key " << i;
  275. }
  276. // Check false positive rate
  277. double rate = FalsePositiveRate();
  278. if (kVerbose >= 1) {
  279. fprintf(stderr, "False positives: %5.2f%% @ length = %6d ; bytes = %6d\n",
  280. rate * 100.0, length, static_cast<int>(FilterSize()));
  281. }
  282. if (FLAGS_bits_per_key == 10) {
  283. EXPECT_LE(rate, 0.02); // Must not be over 2%
  284. if (rate > 0.0125) {
  285. mediocre_filters++; // Allowed, but not too often
  286. } else {
  287. good_filters++;
  288. }
  289. }
  290. }
  291. if (kVerbose >= 1) {
  292. fprintf(stderr, "Filters: %d good, %d mediocre\n", good_filters,
  293. mediocre_filters);
  294. }
  295. EXPECT_LE(mediocre_filters, good_filters / 5);
  296. }
  297. TEST_P(FullBloomTest, OptimizeForMemory) {
  298. // Verify default option
  299. EXPECT_EQ(BlockBasedTableOptions().optimize_filters_for_memory, true);
  300. char buffer[sizeof(int)];
  301. for (bool offm : {true, false}) {
  302. table_options_.optimize_filters_for_memory = offm;
  303. ResetPolicy();
  304. Random32 rnd(12345);
  305. uint64_t total_size = 0;
  306. uint64_t total_mem = 0;
  307. int64_t total_keys = 0;
  308. double total_fp_rate = 0;
  309. constexpr int nfilters = 100;
  310. for (int i = 0; i < nfilters; ++i) {
  311. int nkeys = static_cast<int>(rnd.Uniformish(10000)) + 100;
  312. Reset();
  313. for (int j = 0; j < nkeys; ++j) {
  314. Add(Key(j, buffer));
  315. }
  316. Build();
  317. size_t size = FilterData().size();
  318. total_size += size;
  319. // optimize_filters_for_memory currently only has an effect with
  320. // malloc_usable_size support, but we run the rest of the test to ensure
  321. // no bad behavior without it.
  322. #ifdef ROCKSDB_MALLOC_USABLE_SIZE
  323. size = malloc_usable_size(const_cast<char*>(FilterData().data()));
  324. #endif // ROCKSDB_MALLOC_USABLE_SIZE
  325. total_mem += size;
  326. total_keys += nkeys;
  327. total_fp_rate += FalsePositiveRate();
  328. }
  329. if (FLAGS_bits_per_key == 10) {
  330. EXPECT_LE(total_fp_rate / double{nfilters}, 0.011);
  331. EXPECT_GE(total_fp_rate / double{nfilters},
  332. CACHE_LINE_SIZE >= 256 ? 0.007 : 0.008);
  333. }
  334. int64_t ex_min_total_size = int64_t{FLAGS_bits_per_key} * total_keys / 8;
  335. if (GetParam() == kStandard128Ribbon) {
  336. // ~ 30% savings vs. Bloom filter
  337. ex_min_total_size = 7 * ex_min_total_size / 10;
  338. }
  339. EXPECT_GE(static_cast<int64_t>(total_size), ex_min_total_size);
  340. int64_t blocked_bloom_overhead = nfilters * (CACHE_LINE_SIZE + 5);
  341. if (GetParam() == kLegacyBloom) {
  342. // this config can add extra cache line to make odd number
  343. blocked_bloom_overhead += nfilters * CACHE_LINE_SIZE;
  344. }
  345. EXPECT_GE(total_mem, total_size);
  346. // optimize_filters_for_memory not implemented with legacy Bloom
  347. if (offm && GetParam() != kLegacyBloom) {
  348. // This value can include a small extra penalty for kExtraPadding
  349. fprintf(stderr, "Internal fragmentation (optimized): %g%%\n",
  350. (total_mem - total_size) * 100.0 / total_size);
  351. // Less than 1% internal fragmentation
  352. EXPECT_LE(total_mem, total_size * 101 / 100);
  353. // Up to 2% storage penalty
  354. EXPECT_LE(static_cast<int64_t>(total_size),
  355. ex_min_total_size * 102 / 100 + blocked_bloom_overhead);
  356. } else {
  357. fprintf(stderr, "Internal fragmentation (not optimized): %g%%\n",
  358. (total_mem - total_size) * 100.0 / total_size);
  359. // TODO: add control checks for more allocators?
  360. #ifdef ROCKSDB_JEMALLOC
  361. fprintf(stderr, "Jemalloc detected? %d\n", HasJemalloc());
  362. if (HasJemalloc()) {
  363. #ifdef ROCKSDB_MALLOC_USABLE_SIZE
  364. // More than 5% internal fragmentation
  365. EXPECT_GE(total_mem, total_size * 105 / 100);
  366. #endif // ROCKSDB_MALLOC_USABLE_SIZE
  367. }
  368. #endif // ROCKSDB_JEMALLOC
  369. // No storage penalty, just usual overhead
  370. EXPECT_LE(static_cast<int64_t>(total_size),
  371. ex_min_total_size + blocked_bloom_overhead);
  372. }
  373. }
  374. }
  375. class ChargeFilterConstructionTest : public testing::Test {};
  376. TEST_F(ChargeFilterConstructionTest, RibbonFilterFallBackOnLargeBanding) {
  377. constexpr std::size_t kCacheCapacity =
  378. 8 * CacheReservationManagerImpl<
  379. CacheEntryRole::kFilterConstruction>::GetDummyEntrySize();
  380. constexpr std::size_t num_entries_for_cache_full = kCacheCapacity / 8;
  381. for (CacheEntryRoleOptions::Decision charge_filter_construction_mem :
  382. {CacheEntryRoleOptions::Decision::kEnabled,
  383. CacheEntryRoleOptions::Decision::kDisabled}) {
  384. bool will_fall_back = charge_filter_construction_mem ==
  385. CacheEntryRoleOptions::Decision::kEnabled;
  386. BlockBasedTableOptions table_options;
  387. table_options.cache_usage_options.options_overrides.insert(
  388. {CacheEntryRole::kFilterConstruction,
  389. {/*.charged = */ charge_filter_construction_mem}});
  390. LRUCacheOptions lo;
  391. lo.capacity = kCacheCapacity;
  392. lo.num_shard_bits = 0; // 2^0 shard
  393. lo.strict_capacity_limit = true;
  394. std::shared_ptr<Cache> cache(NewLRUCache(lo));
  395. table_options.block_cache = cache;
  396. table_options.filter_policy =
  397. BloomLikeFilterPolicy::Create(kStandard128Ribbon, FLAGS_bits_per_key);
  398. FilterBuildingContext ctx(table_options);
  399. std::unique_ptr<FilterBitsBuilder> filter_bits_builder(
  400. table_options.filter_policy->GetBuilderWithContext(ctx));
  401. char key_buffer[sizeof(int)];
  402. for (std::size_t i = 0; i < num_entries_for_cache_full; ++i) {
  403. filter_bits_builder->AddKey(Key(static_cast<int>(i), key_buffer));
  404. }
  405. std::unique_ptr<const char[]> buf;
  406. Slice filter = filter_bits_builder->Finish(&buf);
  407. // To verify Ribbon Filter fallbacks to Bloom Filter properly
  408. // based on cache charging result
  409. // See BloomFilterPolicy::GetBloomBitsReader re: metadata
  410. // -1 = Marker for newer Bloom implementations
  411. // -2 = Marker for Standard128 Ribbon
  412. if (will_fall_back) {
  413. EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-1));
  414. } else {
  415. EXPECT_EQ(filter.data()[filter.size() - 5], static_cast<char>(-2));
  416. }
  417. if (charge_filter_construction_mem ==
  418. CacheEntryRoleOptions::Decision::kEnabled) {
  419. const size_t dummy_entry_num = static_cast<std::size_t>(std::ceil(
  420. filter.size() * 1.0 /
  421. CacheReservationManagerImpl<
  422. CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()));
  423. EXPECT_GE(
  424. cache->GetPinnedUsage(),
  425. dummy_entry_num *
  426. CacheReservationManagerImpl<
  427. CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
  428. EXPECT_LT(
  429. cache->GetPinnedUsage(),
  430. (dummy_entry_num + 1) *
  431. CacheReservationManagerImpl<
  432. CacheEntryRole::kFilterConstruction>::GetDummyEntrySize());
  433. } else {
  434. EXPECT_EQ(cache->GetPinnedUsage(), 0);
  435. }
  436. }
  437. }
  438. namespace {
  439. inline uint32_t SelectByCacheLineSize(uint32_t for64, uint32_t for128,
  440. uint32_t for256) {
  441. (void)for64;
  442. (void)for128;
  443. (void)for256;
  444. #if CACHE_LINE_SIZE == 64
  445. return for64;
  446. #elif CACHE_LINE_SIZE == 128
  447. return for128;
  448. #elif CACHE_LINE_SIZE == 256
  449. return for256;
  450. #else
  451. #error "CACHE_LINE_SIZE unknown or unrecognized"
  452. #endif
  453. }
  454. } // namespace
  455. // Ensure the implementation doesn't accidentally change in an
  456. // incompatible way. This test doesn't check the reading side
  457. // (FirstFPs/PackedMatches) for LegacyBloom because it requires the
  458. // ability to read filters generated using other cache line sizes.
  459. // See RawSchema.
  460. TEST_P(FullBloomTest, Schema) {
  461. // Match how this test was originally built
  462. table_options_.optimize_filters_for_memory = false;
  463. #define EXPECT_EQ_Bloom(a, b) \
  464. { \
  465. if (GetParam() != kStandard128Ribbon) { \
  466. EXPECT_EQ(a, b); \
  467. } \
  468. }
  469. #define EXPECT_EQ_Ribbon(a, b) \
  470. { \
  471. if (GetParam() == kStandard128Ribbon) { \
  472. EXPECT_EQ(a, b); \
  473. } \
  474. }
  475. #define EXPECT_EQ_FastBloom(a, b) \
  476. { \
  477. if (GetParam() == kFastLocalBloom) { \
  478. EXPECT_EQ(a, b); \
  479. } \
  480. }
  481. #define EXPECT_EQ_LegacyBloom(a, b) \
  482. { \
  483. if (GetParam() == kLegacyBloom) { \
  484. EXPECT_EQ(a, b); \
  485. } \
  486. }
  487. #define EXPECT_EQ_NotLegacy(a, b) \
  488. { \
  489. if (GetParam() != kLegacyBloom) { \
  490. EXPECT_EQ(a, b); \
  491. } \
  492. }
  493. char buffer[sizeof(int)];
  494. // First do a small number of keys, where Ribbon config will fall back on
  495. // fast Bloom filter and generate the same data
  496. ResetPolicy(5); // num_probes = 3
  497. for (int key = 0; key < 87; key++) {
  498. Add(Key(key, buffer));
  499. }
  500. Build();
  501. EXPECT_EQ(GetNumProbesFromFilterData(), 3);
  502. EXPECT_EQ_NotLegacy(BloomHash(FilterData()), 4130687756U);
  503. EXPECT_EQ_NotLegacy("31,38,40,43,61,83,86,112,125,131", FirstFPs(10));
  504. // Now use enough keys so that changing bits / key by 1 is guaranteed to
  505. // change number of allocated cache lines. So keys > max cache line bits.
  506. // Note that the first attempted Ribbon seed is determined by the hash
  507. // of the first key added (for pseudorandomness in practice, determinism in
  508. // testing)
  509. ResetPolicy(2); // num_probes = 1
  510. for (int key = 0; key < 2087; key++) {
  511. Add(Key(key, buffer));
  512. }
  513. Build();
  514. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 1);
  515. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  516. EXPECT_EQ_LegacyBloom(
  517. BloomHash(FilterData()),
  518. SelectByCacheLineSize(1567096579, 1964771444, 2659542661U));
  519. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3817481309U);
  520. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1705851228U);
  521. EXPECT_EQ_FastBloom("11,13,17,25,29,30,35,37,45,53", FirstFPs(10));
  522. EXPECT_EQ_Ribbon("3,8,10,17,19,20,23,28,31,32", FirstFPs(10));
  523. ResetPolicy(3); // num_probes = 2
  524. for (int key = 0; key < 2087; key++) {
  525. Add(Key(key, buffer));
  526. }
  527. Build();
  528. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 2);
  529. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  530. EXPECT_EQ_LegacyBloom(
  531. BloomHash(FilterData()),
  532. SelectByCacheLineSize(2707206547U, 2571983456U, 218344685));
  533. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2807269961U);
  534. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1095342358U);
  535. EXPECT_EQ_FastBloom("4,15,17,24,27,28,29,53,63,70", FirstFPs(10));
  536. EXPECT_EQ_Ribbon("3,17,20,28,32,33,36,43,49,54", FirstFPs(10));
  537. ResetPolicy(5); // num_probes = 3
  538. for (int key = 0; key < 2087; key++) {
  539. Add(Key(key, buffer));
  540. }
  541. Build();
  542. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 3);
  543. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  544. EXPECT_EQ_LegacyBloom(
  545. BloomHash(FilterData()),
  546. SelectByCacheLineSize(515748486, 94611728, 2436112214U));
  547. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 204628445U);
  548. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3971337699U);
  549. EXPECT_EQ_FastBloom("15,24,29,39,53,87,89,100,103,104", FirstFPs(10));
  550. EXPECT_EQ_Ribbon("3,33,36,43,67,70,76,78,84,102", FirstFPs(10));
  551. ResetPolicy(8); // num_probes = 5
  552. for (int key = 0; key < 2087; key++) {
  553. Add(Key(key, buffer));
  554. }
  555. Build();
  556. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 5);
  557. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  558. EXPECT_EQ_LegacyBloom(
  559. BloomHash(FilterData()),
  560. SelectByCacheLineSize(1302145999, 2811644657U, 756553699));
  561. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 355564975U);
  562. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3651449053U);
  563. EXPECT_EQ_FastBloom("16,60,66,126,220,238,244,256,265,287", FirstFPs(10));
  564. EXPECT_EQ_Ribbon("33,187,203,296,300,322,411,419,547,582", FirstFPs(10));
  565. ResetPolicy(9); // num_probes = 6
  566. for (int key = 0; key < 2087; key++) {
  567. Add(Key(key, buffer));
  568. }
  569. Build();
  570. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
  571. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  572. EXPECT_EQ_LegacyBloom(
  573. BloomHash(FilterData()),
  574. SelectByCacheLineSize(2092755149, 661139132, 1182970461));
  575. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2137566013U);
  576. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1005676675U);
  577. EXPECT_EQ_FastBloom("156,367,791,872,945,1015,1139,1159,1265", FirstFPs(9));
  578. EXPECT_EQ_Ribbon("33,187,203,296,411,419,604,612,615,619", FirstFPs(10));
  579. ResetPolicy(11); // num_probes = 7
  580. for (int key = 0; key < 2087; key++) {
  581. Add(Key(key, buffer));
  582. }
  583. Build();
  584. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 7);
  585. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  586. EXPECT_EQ_LegacyBloom(
  587. BloomHash(FilterData()),
  588. SelectByCacheLineSize(3755609649U, 1812694762, 1449142939));
  589. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2561502687U);
  590. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3129900846U);
  591. EXPECT_EQ_FastBloom("34,74,130,236,643,882,962,1015,1035,1110", FirstFPs(10));
  592. EXPECT_EQ_Ribbon("411,419,623,665,727,794,955,1052,1323,1330", FirstFPs(10));
  593. // This used to be 9 probes, but 8 is a better choice for speed,
  594. // especially with SIMD groups of 8 probes, with essentially no
  595. // change in FP rate.
  596. // FP rate @ 9 probes, old Bloom: 0.4321%
  597. // FP rate @ 9 probes, new Bloom: 0.1846%
  598. // FP rate @ 8 probes, new Bloom: 0.1843%
  599. ResetPolicy(14); // num_probes = 8 (new), 9 (old)
  600. for (int key = 0; key < 2087; key++) {
  601. Add(Key(key, buffer));
  602. }
  603. Build();
  604. EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 9);
  605. EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 8);
  606. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  607. EXPECT_EQ_LegacyBloom(
  608. BloomHash(FilterData()),
  609. SelectByCacheLineSize(178861123, 379087593, 2574136516U));
  610. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3709876890U);
  611. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1855638875U);
  612. EXPECT_EQ_FastBloom("130,240,522,565,989,2002,2526,3147,3543", FirstFPs(9));
  613. EXPECT_EQ_Ribbon("665,727,1323,1755,3866,4232,4442,4492,4736", FirstFPs(9));
  614. // This used to be 11 probes, but 9 is a better choice for speed
  615. // AND accuracy.
  616. // FP rate @ 11 probes, old Bloom: 0.3571%
  617. // FP rate @ 11 probes, new Bloom: 0.0884%
  618. // FP rate @ 9 probes, new Bloom: 0.0843%
  619. ResetPolicy(16); // num_probes = 9 (new), 11 (old)
  620. for (int key = 0; key < 2087; key++) {
  621. Add(Key(key, buffer));
  622. }
  623. Build();
  624. EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 11);
  625. EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 9);
  626. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  627. EXPECT_EQ_LegacyBloom(
  628. BloomHash(FilterData()),
  629. SelectByCacheLineSize(1129406313, 3049154394U, 1727750964));
  630. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 1087138490U);
  631. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 459379967U);
  632. EXPECT_EQ_FastBloom("3299,3611,3916,6620,7822,8079,8482,8942", FirstFPs(8));
  633. EXPECT_EQ_Ribbon("727,1323,1755,4442,4736,5386,6974,7154,8222", FirstFPs(9));
  634. ResetPolicy(10); // num_probes = 6, but different memory ratio vs. 9
  635. for (int key = 0; key < 2087; key++) {
  636. Add(Key(key, buffer));
  637. }
  638. Build();
  639. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
  640. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 61);
  641. EXPECT_EQ_LegacyBloom(
  642. BloomHash(FilterData()),
  643. SelectByCacheLineSize(1478976371, 2910591341U, 1182970461));
  644. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2498541272U);
  645. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1273231667U);
  646. EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
  647. EXPECT_EQ_Ribbon("296,411,419,612,619,623,630,665,686,727", FirstFPs(10));
  648. ResetPolicy(10);
  649. for (int key = /*CHANGED*/ 1; key < 2087; key++) {
  650. Add(Key(key, buffer));
  651. }
  652. Build();
  653. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
  654. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), /*CHANGED*/ 184);
  655. EXPECT_EQ_LegacyBloom(
  656. BloomHash(FilterData()),
  657. SelectByCacheLineSize(4205696321U, 1132081253U, 2385981855U));
  658. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 2058382345U);
  659. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 3007790572U);
  660. EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
  661. EXPECT_EQ_Ribbon("33,152,383,497,589,633,737,781,911,990", FirstFPs(10));
  662. ResetPolicy(10);
  663. for (int key = 1; key < /*CHANGED*/ 2088; key++) {
  664. Add(Key(key, buffer));
  665. }
  666. Build();
  667. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
  668. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
  669. EXPECT_EQ_LegacyBloom(
  670. BloomHash(FilterData()),
  671. SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
  672. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 23699164U);
  673. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1942323379U);
  674. EXPECT_EQ_FastBloom("16,126,133,422,466,472,813,1002,1035", FirstFPs(9));
  675. EXPECT_EQ_Ribbon("33,95,360,589,737,911,990,1048,1081,1414", FirstFPs(10));
  676. // With new fractional bits_per_key, check that we are rounding to
  677. // whole bits per key for old Bloom filters but fractional for
  678. // new Bloom filter.
  679. ResetPolicy(9.5);
  680. for (int key = 1; key < 2088; key++) {
  681. Add(Key(key, buffer));
  682. }
  683. Build();
  684. EXPECT_EQ_Bloom(GetNumProbesFromFilterData(), 6);
  685. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
  686. EXPECT_EQ_LegacyBloom(
  687. BloomHash(FilterData()),
  688. /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
  689. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 3166884174U);
  690. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 1148258663U);
  691. EXPECT_EQ_FastBloom("126,156,367,444,458,791,813,976,1015", FirstFPs(9));
  692. EXPECT_EQ_Ribbon("33,54,95,360,589,693,737,911,990,1048", FirstFPs(10));
  693. ResetPolicy(10.499);
  694. for (int key = 1; key < 2088; key++) {
  695. Add(Key(key, buffer));
  696. }
  697. Build();
  698. EXPECT_EQ_LegacyBloom(GetNumProbesFromFilterData(), 6);
  699. EXPECT_EQ_FastBloom(GetNumProbesFromFilterData(), 7);
  700. EXPECT_EQ_Ribbon(GetRibbonSeedFromFilterData(), 184);
  701. EXPECT_EQ_LegacyBloom(
  702. BloomHash(FilterData()),
  703. /*SAME*/ SelectByCacheLineSize(2885052954U, 769447944, 4175124908U));
  704. EXPECT_EQ_FastBloom(BloomHash(FilterData()), 4098502778U);
  705. EXPECT_EQ_Ribbon(BloomHash(FilterData()), 792138188U);
  706. EXPECT_EQ_FastBloom("16,236,240,472,1015,1045,1111,1409,1465", FirstFPs(9));
  707. EXPECT_EQ_Ribbon("33,95,360,589,737,990,1048,1081,1414,1643", FirstFPs(10));
  708. ResetPolicy();
  709. }
  710. // A helper class for testing custom or corrupt filter bits as read by
  711. // built-in FilterBitsReaders.
  712. struct RawFilterTester {
  713. // Buffer, from which we always return a tail Slice, so the
  714. // last five bytes are always the metadata bytes.
  715. std::array<char, 3000> data_{};
  716. // Points five bytes from the end
  717. char* metadata_ptr_;
  718. RawFilterTester() : metadata_ptr_(&*(data_.end() - 5)) {}
  719. Slice ResetNoFill(uint32_t len_without_metadata, uint32_t num_lines,
  720. uint32_t num_probes) {
  721. metadata_ptr_[0] = static_cast<char>(num_probes);
  722. EncodeFixed32(metadata_ptr_ + 1, num_lines);
  723. uint32_t len = len_without_metadata + /*metadata*/ 5;
  724. assert(len <= data_.size());
  725. return Slice(metadata_ptr_ - len_without_metadata, len);
  726. }
  727. Slice Reset(uint32_t len_without_metadata, uint32_t num_lines,
  728. uint32_t num_probes, bool fill_ones) {
  729. data_.fill(fill_ones ? 0xff : 0);
  730. return ResetNoFill(len_without_metadata, num_lines, num_probes);
  731. }
  732. Slice ResetWeirdFill(uint32_t len_without_metadata, uint32_t num_lines,
  733. uint32_t num_probes) {
  734. for (uint32_t i = 0; i < data_.size(); ++i) {
  735. data_[i] = static_cast<char>(0x7b7b >> (i % 7));
  736. }
  737. return ResetNoFill(len_without_metadata, num_lines, num_probes);
  738. }
  739. };
  740. TEST_P(FullBloomTest, RawSchema) {
  741. RawFilterTester cft;
  742. // Legacy Bloom configurations
  743. // Two probes, about 3/4 bits set: ~50% "FP" rate
  744. // One 256-byte cache line.
  745. OpenRaw(cft.ResetWeirdFill(256, 1, 2));
  746. EXPECT_EQ(uint64_t{11384799501900898790U}, PackedMatches());
  747. // Two 128-byte cache lines.
  748. OpenRaw(cft.ResetWeirdFill(256, 2, 2));
  749. EXPECT_EQ(uint64_t{10157853359773492589U}, PackedMatches());
  750. // Four 64-byte cache lines.
  751. OpenRaw(cft.ResetWeirdFill(256, 4, 2));
  752. EXPECT_EQ(uint64_t{7123594913907464682U}, PackedMatches());
  753. // Fast local Bloom configurations (marker 255 -> -1)
  754. // Two probes, about 3/4 bits set: ~50% "FP" rate
  755. // Four 64-byte cache lines.
  756. OpenRaw(cft.ResetWeirdFill(256, 2U << 8, 255));
  757. EXPECT_EQ(uint64_t{9957045189927952471U}, PackedMatches());
  758. // Ribbon configurations (marker 254 -> -2)
  759. // Even though the builder never builds configurations this
  760. // small (preferring Bloom), we can test that the configuration
  761. // can be read, for possible future-proofing.
  762. // 256 slots, one result column = 32 bytes (2 blocks, seed 0)
  763. // ~50% FP rate:
  764. // 0b0101010111110101010000110000011011011111100100001110010011101010
  765. OpenRaw(cft.ResetWeirdFill(32, 2U << 8, 254));
  766. EXPECT_EQ(uint64_t{6193930559317665002U}, PackedMatches());
  767. // 256 slots, three-to-four result columns = 112 bytes
  768. // ~ 1 in 10 FP rate:
  769. // 0b0000000000100000000000000000000001000001000000010000101000000000
  770. OpenRaw(cft.ResetWeirdFill(112, 2U << 8, 254));
  771. EXPECT_EQ(uint64_t{9007200345328128U}, PackedMatches());
  772. }
  773. TEST_P(FullBloomTest, CorruptFilters) {
  774. RawFilterTester cft;
  775. for (bool fill : {false, true}) {
  776. // Legacy Bloom configurations
  777. // Good filter bits - returns same as fill
  778. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 6, fill));
  779. ASSERT_EQ(fill, Matches("hello"));
  780. ASSERT_EQ(fill, Matches("world"));
  781. // Good filter bits - returns same as fill
  782. OpenRaw(cft.Reset(CACHE_LINE_SIZE * 3, 3, 6, fill));
  783. ASSERT_EQ(fill, Matches("hello"));
  784. ASSERT_EQ(fill, Matches("world"));
  785. // Good filter bits - returns same as fill
  786. // 256 is unusual but legal cache line size
  787. OpenRaw(cft.Reset(256 * 3, 3, 6, fill));
  788. ASSERT_EQ(fill, Matches("hello"));
  789. ASSERT_EQ(fill, Matches("world"));
  790. // Good filter bits - returns same as fill
  791. // 30 should be max num_probes
  792. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 30, fill));
  793. ASSERT_EQ(fill, Matches("hello"));
  794. ASSERT_EQ(fill, Matches("world"));
  795. // Good filter bits - returns same as fill
  796. // 1 should be min num_probes
  797. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 1, fill));
  798. ASSERT_EQ(fill, Matches("hello"));
  799. ASSERT_EQ(fill, Matches("world"));
  800. // Type 1 trivial filter bits - returns true as if FP by zero probes
  801. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 0, fill));
  802. ASSERT_TRUE(Matches("hello"));
  803. ASSERT_TRUE(Matches("world"));
  804. // Type 2 trivial filter bits - returns false as if built from zero keys
  805. OpenRaw(cft.Reset(0, 0, 6, fill));
  806. ASSERT_FALSE(Matches("hello"));
  807. ASSERT_FALSE(Matches("world"));
  808. // Type 2 trivial filter bits - returns false as if built from zero keys
  809. OpenRaw(cft.Reset(0, 37, 6, fill));
  810. ASSERT_FALSE(Matches("hello"));
  811. ASSERT_FALSE(Matches("world"));
  812. // Type 2 trivial filter bits - returns false as 0 size trumps 0 probes
  813. OpenRaw(cft.Reset(0, 0, 0, fill));
  814. ASSERT_FALSE(Matches("hello"));
  815. ASSERT_FALSE(Matches("world"));
  816. // Bad filter bits - returns true for safety
  817. // No solution to 0 * x == CACHE_LINE_SIZE
  818. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 0, 6, fill));
  819. ASSERT_TRUE(Matches("hello"));
  820. ASSERT_TRUE(Matches("world"));
  821. // Bad filter bits - returns true for safety
  822. // Can't have 3 * x == 4 for integer x
  823. OpenRaw(cft.Reset(4, 3, 6, fill));
  824. ASSERT_TRUE(Matches("hello"));
  825. ASSERT_TRUE(Matches("world"));
  826. // Bad filter bits - returns true for safety
  827. // 97 bytes is not a power of two, so not a legal cache line size
  828. OpenRaw(cft.Reset(97 * 3, 3, 6, fill));
  829. ASSERT_TRUE(Matches("hello"));
  830. ASSERT_TRUE(Matches("world"));
  831. // Bad filter bits - returns true for safety
  832. // 65 bytes is not a power of two, so not a legal cache line size
  833. OpenRaw(cft.Reset(65 * 3, 3, 6, fill));
  834. ASSERT_TRUE(Matches("hello"));
  835. ASSERT_TRUE(Matches("world"));
  836. // Bad filter bits - returns false as if built from zero keys
  837. // < 5 bytes overall means missing even metadata
  838. OpenRaw(cft.Reset(static_cast<uint32_t>(-1), 3, 6, fill));
  839. ASSERT_FALSE(Matches("hello"));
  840. ASSERT_FALSE(Matches("world"));
  841. OpenRaw(cft.Reset(static_cast<uint32_t>(-5), 3, 6, fill));
  842. ASSERT_FALSE(Matches("hello"));
  843. ASSERT_FALSE(Matches("world"));
  844. // Dubious filter bits - returns same as fill (for now)
  845. // 31 is not a useful num_probes, nor generated by RocksDB unless directly
  846. // using filter bits API without BloomFilterPolicy.
  847. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 31, fill));
  848. ASSERT_EQ(fill, Matches("hello"));
  849. ASSERT_EQ(fill, Matches("world"));
  850. // Dubious filter bits - returns same as fill (for now)
  851. // Similar, with 127, largest positive char
  852. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 127, fill));
  853. ASSERT_EQ(fill, Matches("hello"));
  854. ASSERT_EQ(fill, Matches("world"));
  855. // Dubious filter bits - returns true (for now)
  856. // num_probes set to 128 / -128, lowest negative char
  857. // NB: Bug in implementation interprets this as negative and has same
  858. // effect as zero probes, but effectively reserves negative char values
  859. // for future use.
  860. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 128, fill));
  861. ASSERT_TRUE(Matches("hello"));
  862. ASSERT_TRUE(Matches("world"));
  863. // Dubious filter bits - returns true (for now)
  864. // Similar, with 253 / -3
  865. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 1, 253, fill));
  866. ASSERT_TRUE(Matches("hello"));
  867. ASSERT_TRUE(Matches("world"));
  868. // #########################################################
  869. // Fast local Bloom configurations (marker 255 -> -1)
  870. // Good config with six probes
  871. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 6U << 8, 255, fill));
  872. ASSERT_EQ(fill, Matches("hello"));
  873. ASSERT_EQ(fill, Matches("world"));
  874. // Becomes bad/reserved config (always true) if any other byte set
  875. OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | 1U, 255, fill));
  876. ASSERT_TRUE(Matches("hello"));
  877. ASSERT_TRUE(Matches("world"));
  878. OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 16), 255, fill));
  879. ASSERT_TRUE(Matches("hello"));
  880. ASSERT_TRUE(Matches("world"));
  881. OpenRaw(cft.Reset(CACHE_LINE_SIZE, (6U << 8) | (1U << 24), 255, fill));
  882. ASSERT_TRUE(Matches("hello"));
  883. ASSERT_TRUE(Matches("world"));
  884. // Good config, max 30 probes
  885. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 30U << 8, 255, fill));
  886. ASSERT_EQ(fill, Matches("hello"));
  887. ASSERT_EQ(fill, Matches("world"));
  888. // Bad/reserved config (always true) if more than 30
  889. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 31U << 8, 255, fill));
  890. ASSERT_TRUE(Matches("hello"));
  891. ASSERT_TRUE(Matches("world"));
  892. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 33U << 8, 255, fill));
  893. ASSERT_TRUE(Matches("hello"));
  894. ASSERT_TRUE(Matches("world"));
  895. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 66U << 8, 255, fill));
  896. ASSERT_TRUE(Matches("hello"));
  897. ASSERT_TRUE(Matches("world"));
  898. OpenRaw(cft.Reset(CACHE_LINE_SIZE, 130U << 8, 255, fill));
  899. ASSERT_TRUE(Matches("hello"));
  900. ASSERT_TRUE(Matches("world"));
  901. }
  902. // #########################################################
  903. // Ribbon configurations (marker 254 -> -2)
  904. // ("fill" doesn't work to detect good configurations, we just
  905. // have to rely on TN probability)
  906. // Good: 2 blocks * 16 bytes / segment * 4 columns = 128 bytes
  907. // seed = 123
  908. OpenRaw(cft.Reset(128, (2U << 8) + 123U, 254, false));
  909. ASSERT_FALSE(Matches("hello"));
  910. ASSERT_FALSE(Matches("world"));
  911. // Good: 2 blocks * 16 bytes / segment * 8 columns = 256 bytes
  912. OpenRaw(cft.Reset(256, (2U << 8) + 123U, 254, false));
  913. ASSERT_FALSE(Matches("hello"));
  914. ASSERT_FALSE(Matches("world"));
  915. // Surprisingly OK: 5000 blocks (640,000 slots) in only 1024 bits
  916. // -> average close to 0 columns
  917. OpenRaw(cft.Reset(128, (5000U << 8) + 123U, 254, false));
  918. // *Almost* all FPs
  919. ASSERT_TRUE(Matches("hello"));
  920. ASSERT_TRUE(Matches("world"));
  921. // Need many queries to find a "true negative"
  922. for (int i = 0; Matches(std::to_string(i)); ++i) {
  923. ASSERT_LT(i, 1000);
  924. }
  925. // Bad: 1 block not allowed (for implementation detail reasons)
  926. OpenRaw(cft.Reset(128, (1U << 8) + 123U, 254, false));
  927. ASSERT_TRUE(Matches("hello"));
  928. ASSERT_TRUE(Matches("world"));
  929. // Bad: 0 blocks not allowed
  930. OpenRaw(cft.Reset(128, (0U << 8) + 123U, 254, false));
  931. ASSERT_TRUE(Matches("hello"));
  932. ASSERT_TRUE(Matches("world"));
  933. }
  934. INSTANTIATE_TEST_CASE_P(Full, FullBloomTest,
  935. testing::Values(kLegacyBloom, kFastLocalBloom,
  936. kStandard128Ribbon));
  937. static double GetEffectiveBitsPerKey(FilterBitsBuilder* builder) {
  938. union {
  939. uint64_t key_value = 0;
  940. char key_bytes[8];
  941. };
  942. const unsigned kNumKeys = 1000;
  943. Slice key_slice{key_bytes, 8};
  944. for (key_value = 0; key_value < kNumKeys; ++key_value) {
  945. builder->AddKey(key_slice);
  946. }
  947. std::unique_ptr<const char[]> buf;
  948. auto filter = builder->Finish(&buf);
  949. return filter.size() * /*bits per byte*/ 8 / (1.0 * kNumKeys);
  950. }
  951. static void SetTestingLevel(int levelish, FilterBuildingContext* ctx) {
  952. if (levelish == -1) {
  953. // Flush is treated as level -1 for this option but actually level 0
  954. ctx->level_at_creation = 0;
  955. ctx->reason = TableFileCreationReason::kFlush;
  956. } else {
  957. ctx->level_at_creation = levelish;
  958. ctx->reason = TableFileCreationReason::kCompaction;
  959. }
  960. }
  961. TEST(RibbonTest, RibbonTestLevelThreshold) {
  962. BlockBasedTableOptions opts;
  963. FilterBuildingContext ctx(opts);
  964. std::shared_ptr<FilterPolicy> reused{NewRibbonFilterPolicy(10)};
  965. // A few settings
  966. for (CompactionStyle cs : {kCompactionStyleLevel, kCompactionStyleUniversal,
  967. kCompactionStyleFIFO, kCompactionStyleNone}) {
  968. ctx.compaction_style = cs;
  969. for (int bloom_before_level : {-1, 0, 1, 10, INT_MAX - 1, INT_MAX}) {
  970. SCOPED_TRACE("bloom_before_level=" + std::to_string(bloom_before_level));
  971. std::vector<std::shared_ptr<FilterPolicy> > policies;
  972. policies.emplace_back(NewRibbonFilterPolicy(10, bloom_before_level));
  973. if (bloom_before_level == 0) {
  974. // Also test new API default
  975. policies.emplace_back(NewRibbonFilterPolicy(10));
  976. }
  977. ASSERT_OK(reused->ConfigureOption({}, "bloom_before_level",
  978. std::to_string(bloom_before_level)));
  979. policies.push_back(reused);
  980. for (auto& policy : policies) {
  981. std::unique_ptr<FilterBitsBuilder> builder;
  982. if (bloom_before_level < INT_MAX) {
  983. // Claim to be generating filter for this level
  984. SetTestingLevel(bloom_before_level, &ctx);
  985. builder.reset(policy->GetBuilderWithContext(ctx));
  986. // Must be Ribbon (more space efficient than 10 bits per key)
  987. ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
  988. }
  989. if (bloom_before_level >= 0) {
  990. // Claim to be generating filter for previous level
  991. SetTestingLevel(bloom_before_level - 1, &ctx);
  992. builder.reset(policy->GetBuilderWithContext(ctx));
  993. if (cs == kCompactionStyleLevel || cs == kCompactionStyleUniversal) {
  994. // Level is considered.
  995. // Must be Bloom (~ 10 bits per key)
  996. ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
  997. } else if (bloom_before_level == INT_MAX) {
  998. // Force bloom option
  999. // Must be Bloom (~ 10 bits per key)
  1000. ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
  1001. } else {
  1002. // Level is ignored under non-traditional compaction styles.
  1003. // Must be Ribbon (more space efficient than 10 bits per key)
  1004. ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
  1005. }
  1006. }
  1007. // Like SST file writer
  1008. ctx.level_at_creation = -1;
  1009. ctx.reason = TableFileCreationReason::kMisc;
  1010. builder.reset(policy->GetBuilderWithContext(ctx));
  1011. if (bloom_before_level < INT_MAX) {
  1012. // Must be Ribbon (more space efficient than 10 bits per key)
  1013. ASSERT_LT(GetEffectiveBitsPerKey(builder.get()), 8);
  1014. } else {
  1015. // Force bloom option
  1016. // Must be Bloom (~ 10 bits per key)
  1017. ASSERT_GT(GetEffectiveBitsPerKey(builder.get()), 9);
  1018. }
  1019. }
  1020. }
  1021. }
  1022. }
  1023. } // namespace ROCKSDB_NAMESPACE
  1024. int main(int argc, char** argv) {
  1025. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1026. ::testing::InitGoogleTest(&argc, argv);
  1027. ParseCommandLineFlags(&argc, &argv, true);
  1028. return RUN_ALL_TESTS();
  1029. }
  1030. #endif // GFLAGS