db_block_cache_test.cc 82 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <cstdlib>
  10. #include <functional>
  11. #include <memory>
  12. #include <unordered_set>
  13. #include "cache/cache_entry_roles.h"
  14. #include "cache/cache_key.h"
  15. #include "cache/lru_cache.h"
  16. #include "cache/typed_cache.h"
  17. #include "db/column_family.h"
  18. #include "db/db_impl/db_impl.h"
  19. #include "db/db_test_util.h"
  20. #include "env/unique_id_gen.h"
  21. #include "port/stack_trace.h"
  22. #include "rocksdb/persistent_cache.h"
  23. #include "rocksdb/statistics.h"
  24. #include "rocksdb/table.h"
  25. #include "rocksdb/table_properties.h"
  26. #include "table/block_based/block_based_table_reader.h"
  27. #include "table/unique_id_impl.h"
  28. #include "test_util/secondary_cache_test_util.h"
  29. #include "util/compression.h"
  30. #include "util/defer.h"
  31. #include "util/hash.h"
  32. #include "util/math.h"
  33. #include "util/random.h"
  34. #include "utilities/fault_injection_fs.h"
  35. namespace ROCKSDB_NAMESPACE {
  36. class DBBlockCacheTest : public DBTestBase {
  37. private:
  38. size_t miss_count_ = 0;
  39. size_t hit_count_ = 0;
  40. size_t insert_count_ = 0;
  41. size_t failure_count_ = 0;
  42. size_t compression_dict_miss_count_ = 0;
  43. size_t compression_dict_hit_count_ = 0;
  44. size_t compression_dict_insert_count_ = 0;
  45. public:
  46. const size_t kNumBlocks = 10;
  47. const size_t kValueSize = 100;
  48. DBBlockCacheTest()
  49. : DBTestBase("db_block_cache_test", /*env_do_fsync=*/true) {}
  50. BlockBasedTableOptions GetTableOptions() {
  51. BlockBasedTableOptions table_options;
  52. // Set a small enough block size so that each key-value get its own block.
  53. table_options.block_size = 1;
  54. return table_options;
  55. }
  56. Options GetOptions(const BlockBasedTableOptions& table_options) {
  57. Options options = CurrentOptions();
  58. options.create_if_missing = true;
  59. options.avoid_flush_during_recovery = false;
  60. // options.compression = kNoCompression;
  61. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  62. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  63. return options;
  64. }
  65. void InitTable(const Options& /*options*/) {
  66. std::string value(kValueSize, 'a');
  67. for (size_t i = 0; i < kNumBlocks; i++) {
  68. ASSERT_OK(Put(std::to_string(i), value.c_str()));
  69. }
  70. }
  71. void RecordCacheCounters(const Options& options) {
  72. miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS);
  73. hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT);
  74. insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD);
  75. failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
  76. }
  77. void RecordCacheCountersForCompressionDict(const Options& options) {
  78. compression_dict_miss_count_ =
  79. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
  80. compression_dict_hit_count_ =
  81. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
  82. compression_dict_insert_count_ =
  83. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
  84. }
  85. void CheckCacheCounters(const Options& options, size_t expected_misses,
  86. size_t expected_hits, size_t expected_inserts,
  87. size_t expected_failures) {
  88. size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS);
  89. size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT);
  90. size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD);
  91. size_t new_failure_count =
  92. TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
  93. ASSERT_EQ(miss_count_ + expected_misses, new_miss_count);
  94. ASSERT_EQ(hit_count_ + expected_hits, new_hit_count);
  95. ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count);
  96. ASSERT_EQ(failure_count_ + expected_failures, new_failure_count);
  97. miss_count_ = new_miss_count;
  98. hit_count_ = new_hit_count;
  99. insert_count_ = new_insert_count;
  100. failure_count_ = new_failure_count;
  101. }
  102. void CheckCacheCountersForCompressionDict(
  103. const Options& options, size_t expected_compression_dict_misses,
  104. size_t expected_compression_dict_hits,
  105. size_t expected_compression_dict_inserts) {
  106. size_t new_compression_dict_miss_count =
  107. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
  108. size_t new_compression_dict_hit_count =
  109. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
  110. size_t new_compression_dict_insert_count =
  111. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
  112. ASSERT_EQ(compression_dict_miss_count_ + expected_compression_dict_misses,
  113. new_compression_dict_miss_count);
  114. ASSERT_EQ(compression_dict_hit_count_ + expected_compression_dict_hits,
  115. new_compression_dict_hit_count);
  116. ASSERT_EQ(
  117. compression_dict_insert_count_ + expected_compression_dict_inserts,
  118. new_compression_dict_insert_count);
  119. compression_dict_miss_count_ = new_compression_dict_miss_count;
  120. compression_dict_hit_count_ = new_compression_dict_hit_count;
  121. compression_dict_insert_count_ = new_compression_dict_insert_count;
  122. }
  123. const std::array<size_t, kNumCacheEntryRoles> GetCacheEntryRoleCountsBg() {
  124. // Verify in cache entry role stats
  125. std::array<size_t, kNumCacheEntryRoles> cache_entry_role_counts;
  126. std::map<std::string, std::string> values;
  127. EXPECT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats,
  128. &values));
  129. for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
  130. auto role = static_cast<CacheEntryRole>(i);
  131. cache_entry_role_counts[i] =
  132. ParseSizeT(values[BlockCacheEntryStatsMapKeys::EntryCount(role)]);
  133. }
  134. return cache_entry_role_counts;
  135. }
  136. };
  137. TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) {
  138. ReadOptions read_options;
  139. read_options.fill_cache = false;
  140. auto table_options = GetTableOptions();
  141. auto options = GetOptions(table_options);
  142. InitTable(options);
  143. LRUCacheOptions co;
  144. co.capacity = 0;
  145. co.num_shard_bits = 0;
  146. co.strict_capacity_limit = false;
  147. // Needed not to count entry stats collector
  148. co.metadata_charge_policy = kDontChargeCacheMetadata;
  149. std::shared_ptr<Cache> cache = NewLRUCache(co);
  150. table_options.block_cache = cache;
  151. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  152. Reopen(options);
  153. RecordCacheCounters(options);
  154. std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
  155. Iterator* iter = nullptr;
  156. ASSERT_EQ(0, cache->GetUsage());
  157. iter = db_->NewIterator(read_options);
  158. iter->Seek(std::to_string(0));
  159. ASSERT_LT(0, cache->GetUsage());
  160. delete iter;
  161. iter = nullptr;
  162. ASSERT_EQ(0, cache->GetUsage());
  163. }
  164. TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
  165. ReadOptions read_options;
  166. auto table_options = GetTableOptions();
  167. auto options = GetOptions(table_options);
  168. InitTable(options);
  169. LRUCacheOptions co;
  170. co.capacity = 0;
  171. co.num_shard_bits = 0;
  172. co.strict_capacity_limit = false;
  173. // Needed not to count entry stats collector
  174. co.metadata_charge_policy = kDontChargeCacheMetadata;
  175. std::shared_ptr<Cache> cache = NewLRUCache(co);
  176. table_options.block_cache = cache;
  177. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  178. Reopen(options);
  179. RecordCacheCounters(options);
  180. std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
  181. Iterator* iter = nullptr;
  182. // Load blocks into cache.
  183. for (size_t i = 0; i + 1 < kNumBlocks; i++) {
  184. iter = db_->NewIterator(read_options);
  185. iter->Seek(std::to_string(i));
  186. ASSERT_OK(iter->status());
  187. CheckCacheCounters(options, 1, 0, 1, 0);
  188. iterators[i].reset(iter);
  189. }
  190. size_t usage = cache->GetUsage();
  191. ASSERT_LT(0, usage);
  192. cache->SetCapacity(usage);
  193. ASSERT_EQ(usage, cache->GetPinnedUsage());
  194. // Test with strict capacity limit.
  195. cache->SetStrictCapacityLimit(true);
  196. iter = db_->NewIterator(read_options);
  197. iter->Seek(std::to_string(kNumBlocks - 1));
  198. ASSERT_TRUE(iter->status().IsMemoryLimit());
  199. CheckCacheCounters(options, 1, 0, 0, 1);
  200. delete iter;
  201. iter = nullptr;
  202. // Release iterators and access cache again.
  203. for (size_t i = 0; i + 1 < kNumBlocks; i++) {
  204. iterators[i].reset();
  205. CheckCacheCounters(options, 0, 0, 0, 0);
  206. }
  207. ASSERT_EQ(0, cache->GetPinnedUsage());
  208. for (size_t i = 0; i + 1 < kNumBlocks; i++) {
  209. iter = db_->NewIterator(read_options);
  210. iter->Seek(std::to_string(i));
  211. ASSERT_OK(iter->status());
  212. CheckCacheCounters(options, 0, 1, 0, 0);
  213. iterators[i].reset(iter);
  214. }
  215. }
  216. #ifdef SNAPPY
  217. namespace {
  218. class PersistentCacheFromCache : public PersistentCache {
  219. public:
  220. PersistentCacheFromCache(std::shared_ptr<Cache> cache, bool read_only)
  221. : cache_(cache), read_only_(read_only) {}
  222. Status Insert(const Slice& key, const char* data,
  223. const size_t size) override {
  224. if (read_only_) {
  225. return Status::NotSupported();
  226. }
  227. std::unique_ptr<char[]> copy{new char[size]};
  228. std::copy_n(data, size, copy.get());
  229. Status s = cache_.Insert(key, copy.get(), size);
  230. if (s.ok()) {
  231. copy.release();
  232. }
  233. return s;
  234. }
  235. Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
  236. size_t* size) override {
  237. auto handle = cache_.Lookup(key);
  238. if (handle) {
  239. char* ptr = cache_.Value(handle);
  240. *size = cache_.get()->GetCharge(handle);
  241. data->reset(new char[*size]);
  242. std::copy_n(ptr, *size, data->get());
  243. cache_.Release(handle);
  244. return Status::OK();
  245. } else {
  246. return Status::NotFound();
  247. }
  248. }
  249. bool IsCompressed() override { return false; }
  250. StatsType Stats() override { return StatsType(); }
  251. std::string GetPrintableOptions() const override { return ""; }
  252. uint64_t NewId() override { return cache_.get()->NewId(); }
  253. private:
  254. BasicTypedSharedCacheInterface<char[], CacheEntryRole::kMisc> cache_;
  255. bool read_only_;
  256. };
  257. class ReadOnlyCacheWrapper : public CacheWrapper {
  258. public:
  259. using CacheWrapper::CacheWrapper;
  260. const char* Name() const override { return "ReadOnlyCacheWrapper"; }
  261. Status Insert(const Slice& /*key*/, Cache::ObjectPtr /*value*/,
  262. const CacheItemHelper* /*helper*/, size_t /*charge*/,
  263. Handle** /*handle*/, Priority /*priority*/,
  264. const Slice& /*compressed*/,
  265. CompressionType /*type*/) override {
  266. return Status::NotSupported();
  267. }
  268. };
  269. } // anonymous namespace
  270. #endif // SNAPPY
  271. // Make sure that when options.block_cache is set, after a new table is
  272. // created its index/filter blocks are added to block cache.
  273. TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
  274. Options options = CurrentOptions();
  275. options.create_if_missing = true;
  276. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  277. BlockBasedTableOptions table_options;
  278. table_options.cache_index_and_filter_blocks = true;
  279. table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  280. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  281. CreateAndReopenWithCF({"pikachu"}, options);
  282. ASSERT_OK(Put(1, "key", "val"));
  283. // Create a new table.
  284. ASSERT_OK(Flush(1));
  285. // index/filter blocks added to block cache right after table creation.
  286. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  287. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  288. ASSERT_EQ(2, /* only index/filter were added */
  289. TestGetTickerCount(options, BLOCK_CACHE_ADD));
  290. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
  291. uint64_t int_num;
  292. ASSERT_TRUE(
  293. dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
  294. ASSERT_EQ(int_num, 0U);
  295. // Make sure filter block is in cache.
  296. std::string value;
  297. ReadOptions ropt;
  298. db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
  299. // Miss count should remain the same.
  300. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  301. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  302. db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
  303. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  304. ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
  305. // Make sure index block is in cache.
  306. auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
  307. value = Get(1, "key");
  308. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  309. ASSERT_EQ(index_block_hit + 1,
  310. TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  311. value = Get(1, "key");
  312. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  313. ASSERT_EQ(index_block_hit + 2,
  314. TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
  315. }
  316. // With fill_cache = false, fills up the cache, then iterates over the entire
  317. // db, verify dummy entries inserted in `BlockBasedTable::NewDataBlockIterator`
  318. // does not cause heap-use-after-free errors in COMPILE_WITH_ASAN=1 runs
  319. TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) {
  320. ReadOptions read_options;
  321. read_options.fill_cache = false;
  322. auto table_options = GetTableOptions();
  323. auto options = GetOptions(table_options);
  324. InitTable(options);
  325. std::shared_ptr<Cache> cache = NewLRUCache(10, 0, true);
  326. table_options.block_cache = cache;
  327. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  328. Reopen(options);
  329. ASSERT_OK(Put("key1", "val1"));
  330. ASSERT_OK(Put("key2", "val2"));
  331. ASSERT_OK(Flush());
  332. ASSERT_OK(Put("key3", "val3"));
  333. ASSERT_OK(Put("key4", "val4"));
  334. ASSERT_OK(Flush());
  335. ASSERT_OK(Put("key5", "val5"));
  336. ASSERT_OK(Put("key6", "val6"));
  337. ASSERT_OK(Flush());
  338. Iterator* iter = nullptr;
  339. iter = db_->NewIterator(read_options);
  340. iter->Seek(std::to_string(0));
  341. while (iter->Valid()) {
  342. iter->Next();
  343. }
  344. ASSERT_OK(iter->status());
  345. delete iter;
  346. iter = nullptr;
  347. }
  348. TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
  349. Options options = CurrentOptions();
  350. options.create_if_missing = true;
  351. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  352. BlockBasedTableOptions table_options;
  353. table_options.cache_index_and_filter_blocks = true;
  354. LRUCacheOptions co;
  355. // 500 bytes are enough to hold the first two blocks
  356. co.capacity = 500;
  357. co.num_shard_bits = 0;
  358. co.strict_capacity_limit = false;
  359. co.metadata_charge_policy = kDontChargeCacheMetadata;
  360. std::shared_ptr<Cache> cache = NewLRUCache(co);
  361. table_options.block_cache = cache;
  362. table_options.filter_policy.reset(NewBloomFilterPolicy(20, true));
  363. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  364. CreateAndReopenWithCF({"pikachu"}, options);
  365. ASSERT_OK(Put(1, "longer_key", "val"));
  366. // Create a new table
  367. ASSERT_OK(Flush(1));
  368. size_t index_bytes_insert =
  369. TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT);
  370. size_t filter_bytes_insert =
  371. TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT);
  372. ASSERT_GT(index_bytes_insert, 0);
  373. ASSERT_GT(filter_bytes_insert, 0);
  374. ASSERT_EQ(cache->GetUsage(), index_bytes_insert + filter_bytes_insert);
  375. // set the cache capacity to the current usage
  376. cache->SetCapacity(index_bytes_insert + filter_bytes_insert);
  377. // Note that the second key needs to be no longer than the first one.
  378. // Otherwise the second index block may not fit in cache.
  379. ASSERT_OK(Put(1, "key", "val"));
  380. // Create a new table
  381. ASSERT_OK(Flush(1));
  382. // cache evicted old index and block entries
  383. ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT),
  384. index_bytes_insert);
  385. ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT),
  386. filter_bytes_insert);
  387. }
  388. #if (defined OS_LINUX || defined OS_WIN)
  389. TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) {
  390. Options options = CurrentOptions();
  391. options.create_if_missing = true;
  392. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  393. BlockBasedTableOptions table_options;
  394. table_options.block_cache = NewLRUCache(1 << 25, 0, false);
  395. table_options.cache_index_and_filter_blocks = false;
  396. table_options.prepopulate_block_cache =
  397. BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
  398. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  399. DestroyAndReopen(options);
  400. std::string value(kValueSize, 'a');
  401. for (size_t i = 1; i <= kNumBlocks; i++) {
  402. ASSERT_OK(Put(std::to_string(i), value));
  403. ASSERT_OK(Flush());
  404. ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
  405. ASSERT_EQ(value, Get(std::to_string(i)));
  406. ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
  407. ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
  408. }
  409. // Verify compaction not counted
  410. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
  411. /*end=*/nullptr));
  412. EXPECT_EQ(kNumBlocks,
  413. options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
  414. }
  415. // This test cache data, index and filter blocks during flush.
  416. class DBBlockCacheTest1 : public DBTestBase,
  417. public ::testing::WithParamInterface<uint32_t> {
  418. public:
  419. const size_t kNumBlocks = 10;
  420. const size_t kValueSize = 100;
  421. DBBlockCacheTest1() : DBTestBase("db_block_cache_test1", true) {}
  422. };
  423. INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1,
  424. ::testing::Values(1, 2));
  425. TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) {
  426. Options options = CurrentOptions();
  427. options.create_if_missing = true;
  428. options.disable_auto_compactions = true;
  429. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  430. BlockBasedTableOptions table_options;
  431. table_options.block_cache = NewLRUCache(1 << 25, 0, false);
  432. uint32_t filter_type = GetParam();
  433. switch (filter_type) {
  434. case 1: // partition_filter
  435. table_options.partition_filters = true;
  436. table_options.index_type =
  437. BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
  438. table_options.filter_policy.reset(NewBloomFilterPolicy(10));
  439. break;
  440. case 2: // full filter
  441. table_options.filter_policy.reset(NewBloomFilterPolicy(10));
  442. break;
  443. default:
  444. assert(false);
  445. }
  446. table_options.cache_index_and_filter_blocks = true;
  447. table_options.prepopulate_block_cache =
  448. BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
  449. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  450. // Include a compression dictionary block
  451. options.compression_opts.max_dict_bytes = 123;
  452. DestroyAndReopen(options);
  453. std::string value(kValueSize, 'a');
  454. for (size_t i = 1; i <= kNumBlocks; i++) {
  455. ASSERT_OK(Put(std::to_string(i), value));
  456. ASSERT_OK(Flush());
  457. ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
  458. if (filter_type == 1) {
  459. ASSERT_EQ(2 * i,
  460. options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
  461. ASSERT_EQ(2 * i,
  462. options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
  463. } else {
  464. ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
  465. ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
  466. }
  467. ASSERT_EQ(value, Get(std::to_string(i)));
  468. ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
  469. ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
  470. ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS));
  471. ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT));
  472. if (filter_type == 1) {
  473. ASSERT_EQ(i * 3,
  474. options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT));
  475. } else {
  476. ASSERT_EQ(i * 2,
  477. options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT));
  478. }
  479. ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS));
  480. // Including compression dict
  481. ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_MISS));
  482. }
  483. // Verify compaction not counted
  484. CompactRangeOptions cro;
  485. // Ensure files are rewritten, not just trivially moved.
  486. cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
  487. ASSERT_OK(db_->CompactRange(cro, /*begin=*/nullptr, /*end=*/nullptr));
  488. EXPECT_EQ(kNumBlocks,
  489. options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
  490. // Index and filter blocks are automatically warmed when the new table file
  491. // is automatically opened at the end of compaction. This is not easily
  492. // disabled so results in the new index and filter blocks being warmed.
  493. if (filter_type == 1) {
  494. EXPECT_EQ(2 * (1 + kNumBlocks),
  495. options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
  496. EXPECT_EQ(2 * (1 + kNumBlocks),
  497. options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
  498. } else {
  499. EXPECT_EQ(1 + kNumBlocks,
  500. options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
  501. EXPECT_EQ(1 + kNumBlocks,
  502. options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
  503. }
  504. }
  505. TEST_F(DBBlockCacheTest, DynamicOptions) {
  506. Options options = CurrentOptions();
  507. options.create_if_missing = true;
  508. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  509. BlockBasedTableOptions table_options;
  510. table_options.block_cache = NewLRUCache(1 << 25, 0, false);
  511. table_options.cache_index_and_filter_blocks = false;
  512. table_options.prepopulate_block_cache =
  513. BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
  514. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  515. DestroyAndReopen(options);
  516. std::string value(kValueSize, 'a');
  517. auto st = options.statistics;
  518. size_t i = 1;
  519. ASSERT_OK(Put(std::to_string(i), value));
  520. ASSERT_OK(Flush());
  521. ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  522. ASSERT_EQ(value, Get(std::to_string(i)));
  523. ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  524. ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
  525. ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
  526. ++i;
  527. ASSERT_OK(dbfull()->SetOptions(
  528. {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}}));
  529. ASSERT_OK(Put(std::to_string(i), value));
  530. ASSERT_OK(Flush());
  531. ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  532. ASSERT_EQ(value, Get(std::to_string(i)));
  533. ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  534. ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
  535. ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
  536. ++i;
  537. ASSERT_OK(dbfull()->SetOptions({{"block_based_table_factory",
  538. "{prepopulate_block_cache=kFlushOnly;}"}}));
  539. ASSERT_OK(Put(std::to_string(i), value));
  540. ASSERT_OK(Flush());
  541. ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  542. ASSERT_EQ(value, Get(std::to_string(i)));
  543. ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  544. ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
  545. ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
  546. ++i;
  547. // NOT YET SUPPORTED
  548. // FIXME: find a way to make this fail again (until well supported)
  549. // ASSERT_NOK(dbfull()->SetOptions(
  550. // {{"block_based_table_factory", "{block_cache=null;}"}}));
  551. // ASSERT_OK(Put(std::to_string(i), value));
  552. // ASSERT_OK(Flush());
  553. // ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  554. // ASSERT_EQ(value, Get(std::to_string(i)));
  555. // ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  556. // ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
  557. // ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
  558. // ++i;
  559. // NOT YET SUPPORTED
  560. // FIXME: find a way to make this fail again (until well supported)
  561. // ASSERT_NOK(dbfull()->SetOptions(
  562. // {{"block_based_table_factory", "{block_cache=1M;}"}}));
  563. // ASSERT_OK(Put(std::to_string(i), value));
  564. // ASSERT_OK(Flush());
  565. // ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  566. // ASSERT_EQ(value, Get(std::to_string(i)));
  567. // ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
  568. // ASSERT_EQ(0, st->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
  569. // ASSERT_EQ(1, st->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
  570. }
  571. #endif
  572. namespace {
  573. // A mock cache wraps LRUCache, and record how many entries have been
  574. // inserted for each priority.
  575. class MockCache : public LRUCache {
  576. public:
  577. static uint32_t high_pri_insert_count;
  578. static uint32_t low_pri_insert_count;
  579. MockCache()
  580. : LRUCache(LRUCacheOptions(
  581. size_t{1} << 25 /*capacity*/, 0 /*num_shard_bits*/,
  582. false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/)) {}
  583. using ShardedCache::Insert;
  584. Status Insert(const Slice& key, Cache::ObjectPtr value,
  585. const Cache::CacheItemHelper* helper, size_t charge,
  586. Handle** handle, Priority priority, const Slice& compressed,
  587. CompressionType type) override {
  588. if (priority == Priority::LOW) {
  589. low_pri_insert_count++;
  590. } else {
  591. high_pri_insert_count++;
  592. }
  593. return LRUCache::Insert(key, value, helper, charge, handle, priority,
  594. compressed, type);
  595. }
  596. };
  597. uint32_t MockCache::high_pri_insert_count = 0;
  598. uint32_t MockCache::low_pri_insert_count = 0;
  599. } // anonymous namespace
  600. TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) {
  601. for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) {
  602. Options options = CurrentOptions();
  603. options.create_if_missing = true;
  604. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  605. BlockBasedTableOptions table_options;
  606. table_options.cache_index_and_filter_blocks = true;
  607. table_options.block_cache.reset(new MockCache());
  608. table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  609. table_options.cache_index_and_filter_blocks_with_high_priority =
  610. priority == Cache::Priority::HIGH ? true : false;
  611. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  612. DestroyAndReopen(options);
  613. MockCache::high_pri_insert_count = 0;
  614. MockCache::low_pri_insert_count = 0;
  615. // Create a new table.
  616. ASSERT_OK(Put("foo", "value"));
  617. ASSERT_OK(Put("bar", "value"));
  618. ASSERT_OK(Flush());
  619. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  620. // index/filter blocks added to block cache right after table creation.
  621. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  622. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  623. ASSERT_EQ(2, /* only index/filter were added */
  624. TestGetTickerCount(options, BLOCK_CACHE_ADD));
  625. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
  626. if (priority == Cache::Priority::LOW) {
  627. ASSERT_EQ(0u, MockCache::high_pri_insert_count);
  628. ASSERT_EQ(2u, MockCache::low_pri_insert_count);
  629. } else {
  630. ASSERT_EQ(2u, MockCache::high_pri_insert_count);
  631. ASSERT_EQ(0u, MockCache::low_pri_insert_count);
  632. }
  633. // Access data block.
  634. ASSERT_EQ("value", Get("foo"));
  635. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  636. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  637. ASSERT_EQ(3, /*adding data block*/
  638. TestGetTickerCount(options, BLOCK_CACHE_ADD));
  639. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
  640. // Data block should be inserted with low priority.
  641. if (priority == Cache::Priority::LOW) {
  642. ASSERT_EQ(0u, MockCache::high_pri_insert_count);
  643. ASSERT_EQ(3u, MockCache::low_pri_insert_count);
  644. } else {
  645. ASSERT_EQ(2u, MockCache::high_pri_insert_count);
  646. ASSERT_EQ(1u, MockCache::low_pri_insert_count);
  647. }
  648. }
  649. }
  650. namespace {
  651. // An LRUCache wrapper that can falsely report "not found" on Lookup.
  652. // This allows us to manipulate BlockBasedTableReader into thinking
  653. // another thread inserted the data in between Lookup and Insert,
  654. // while mostly preserving the LRUCache interface/behavior.
  655. class LookupLiarCache : public CacheWrapper {
  656. int nth_lookup_not_found_ = 0;
  657. public:
  658. explicit LookupLiarCache(std::shared_ptr<Cache> target)
  659. : CacheWrapper(std::move(target)) {}
  660. const char* Name() const override { return "LookupLiarCache"; }
  661. Handle* Lookup(const Slice& key, const CacheItemHelper* helper = nullptr,
  662. CreateContext* create_context = nullptr,
  663. Priority priority = Priority::LOW,
  664. Statistics* stats = nullptr) override {
  665. if (nth_lookup_not_found_ == 1) {
  666. nth_lookup_not_found_ = 0;
  667. return nullptr;
  668. }
  669. if (nth_lookup_not_found_ > 1) {
  670. --nth_lookup_not_found_;
  671. }
  672. return CacheWrapper::Lookup(key, helper, create_context, priority, stats);
  673. }
  674. // 1 == next lookup, 2 == after next, etc.
  675. void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; }
  676. };
  677. } // anonymous namespace
  678. TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
  679. Options options = CurrentOptions();
  680. options.create_if_missing = true;
  681. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  682. options.level0_file_num_compaction_trigger = 2;
  683. options.paranoid_file_checks = true;
  684. BlockBasedTableOptions table_options;
  685. table_options.cache_index_and_filter_blocks = false;
  686. table_options.filter_policy.reset(NewBloomFilterPolicy(20));
  687. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  688. CreateAndReopenWithCF({"pikachu"}, options);
  689. ASSERT_OK(Put(1, "1_key", "val"));
  690. ASSERT_OK(Put(1, "9_key", "val"));
  691. // Create a new table.
  692. ASSERT_OK(Flush(1));
  693. ASSERT_EQ(1, /* read and cache data block */
  694. TestGetTickerCount(options, BLOCK_CACHE_ADD));
  695. ASSERT_OK(Put(1, "1_key2", "val2"));
  696. ASSERT_OK(Put(1, "9_key2", "val2"));
  697. // Create a new SST file. This will further trigger a compaction
  698. // and generate another file.
  699. ASSERT_OK(Flush(1));
  700. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  701. ASSERT_EQ(3, /* Totally 3 files created up to now */
  702. TestGetTickerCount(options, BLOCK_CACHE_ADD));
  703. // After disabling options.paranoid_file_checks. NO further block
  704. // is added after generating a new file.
  705. ASSERT_OK(
  706. dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
  707. ASSERT_OK(Put(1, "1_key3", "val3"));
  708. ASSERT_OK(Put(1, "9_key3", "val3"));
  709. ASSERT_OK(Flush(1));
  710. ASSERT_OK(Put(1, "1_key4", "val4"));
  711. ASSERT_OK(Put(1, "9_key4", "val4"));
  712. ASSERT_OK(Flush(1));
  713. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  714. ASSERT_EQ(3, /* Totally 3 files created up to now */
  715. TestGetTickerCount(options, BLOCK_CACHE_ADD));
  716. }
  717. TEST_F(DBBlockCacheTest, CacheCompressionDict) {
  718. const int kNumFiles = 4;
  719. const int kNumEntriesPerFile = 128;
  720. const int kNumBytesPerEntry = 1024;
  721. std::vector<CompressionType> dict_compressions =
  722. GetSupportedDictCompressions();
  723. Random rnd(301);
  724. // Format version before and after compression handling changes
  725. for (int format_version : {6, 7}) {
  726. // Test all supported compression types because (at least historically)
  727. // dictionary compression could be enabled and a dictionary block saved
  728. // but ignored by some compression types. Ensure we at least don't crash
  729. // or return corruption for those.
  730. for (auto compression_type : GetSupportedCompressions()) {
  731. // Extra handling checks only for types actually supporting dictionary
  732. // compression.
  733. bool dict_supported =
  734. std::count(dict_compressions.begin(), dict_compressions.end(),
  735. compression_type) > 0;
  736. Options options = CurrentOptions();
  737. options.bottommost_compression = compression_type;
  738. options.bottommost_compression_opts.max_dict_bytes = 4096;
  739. options.bottommost_compression_opts.enabled = true;
  740. options.create_if_missing = true;
  741. options.num_levels = 2;
  742. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  743. options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry;
  744. BlockBasedTableOptions table_options;
  745. table_options.cache_index_and_filter_blocks = true;
  746. table_options.block_cache.reset(new MockCache());
  747. table_options.format_version = format_version;
  748. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  749. DestroyAndReopen(options);
  750. RecordCacheCountersForCompressionDict(options);
  751. for (int i = 0; i < kNumFiles; ++i) {
  752. ASSERT_EQ(i, NumTableFilesAtLevel(0, 0));
  753. for (int j = 0; j < kNumEntriesPerFile; ++j) {
  754. std::string value = rnd.RandomString(kNumBytesPerEntry);
  755. ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str()));
  756. }
  757. ASSERT_OK(Flush());
  758. }
  759. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  760. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  761. ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1));
  762. if (dict_supported) {
  763. // Compression dictionary blocks are preloaded.
  764. CheckCacheCountersForCompressionDict(
  765. options, kNumFiles /* expected_compression_dict_misses */,
  766. 0 /* expected_compression_dict_hits */,
  767. kNumFiles /* expected_compression_dict_inserts */);
  768. }
  769. // Seek to a key in a file. It should cause the SST's dictionary
  770. // meta-block to be read.
  771. RecordCacheCounters(options);
  772. RecordCacheCountersForCompressionDict(options);
  773. ReadOptions read_options;
  774. ASSERT_NE("NOT_FOUND", Get(Key(kNumFiles * kNumEntriesPerFile - 1)));
  775. if (dict_supported) {
  776. // Two block hits: index and dictionary since they are prefetched
  777. // One block missed/added: data block
  778. CheckCacheCounters(options, 1 /* expected_misses */,
  779. 2 /* expected_hits */, 1 /* expected_inserts */,
  780. 0 /* expected_failures */);
  781. CheckCacheCountersForCompressionDict(
  782. options, 0 /* expected_compression_dict_misses */,
  783. 1 /* expected_compression_dict_hits */,
  784. 0 /* expected_compression_dict_inserts */);
  785. }
  786. }
  787. }
  788. }
  789. static void ClearCache(Cache* cache) {
  790. std::deque<std::string> keys;
  791. Cache::ApplyToAllEntriesOptions opts;
  792. auto callback = [&](const Slice& key, Cache::ObjectPtr, size_t /*charge*/,
  793. const Cache::CacheItemHelper* helper) {
  794. if (helper && helper->role == CacheEntryRole::kMisc) {
  795. // Keep the stats collector
  796. return;
  797. }
  798. keys.push_back(key.ToString());
  799. };
  800. cache->ApplyToAllEntries(callback, opts);
  801. for (auto& k : keys) {
  802. cache->Erase(k);
  803. }
  804. }
  805. TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
  806. const size_t capacity = size_t{1} << 25;
  807. int iterations_tested = 0;
  808. for (bool partition : {false, true}) {
  809. SCOPED_TRACE("Partition? " + std::to_string(partition));
  810. for (const std::shared_ptr<Cache>& cache :
  811. {NewLRUCache(capacity),
  812. HyperClockCacheOptions(
  813. capacity,
  814. BlockBasedTableOptions().block_size /*estimated_value_size*/)
  815. .MakeSharedCache()}) {
  816. SCOPED_TRACE(std::string("Cache: ") + cache->Name());
  817. ++iterations_tested;
  818. Options options = CurrentOptions();
  819. SetTimeElapseOnlySleepOnReopen(&options);
  820. options.create_if_missing = true;
  821. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  822. options.max_open_files = 13;
  823. options.table_cache_numshardbits = 0;
  824. // If this wakes up, it could interfere with test
  825. options.stats_dump_period_sec = 0;
  826. BlockBasedTableOptions table_options;
  827. table_options.block_cache = cache;
  828. table_options.cache_index_and_filter_blocks = true;
  829. table_options.filter_policy.reset(NewBloomFilterPolicy(50));
  830. if (partition) {
  831. table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
  832. table_options.partition_filters = true;
  833. }
  834. table_options.metadata_cache_options.top_level_index_pinning =
  835. PinningTier::kNone;
  836. table_options.metadata_cache_options.partition_pinning =
  837. PinningTier::kNone;
  838. table_options.metadata_cache_options.unpartitioned_pinning =
  839. PinningTier::kNone;
  840. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  841. DestroyAndReopen(options);
  842. // Create a new table.
  843. ASSERT_OK(Put("foo", "value"));
  844. ASSERT_OK(Put("bar", "value"));
  845. ASSERT_OK(Flush());
  846. ASSERT_OK(Put("zfoo", "value"));
  847. ASSERT_OK(Put("zbar", "value"));
  848. ASSERT_OK(Flush());
  849. ASSERT_EQ(2, NumTableFilesAtLevel(0));
  850. // Fresh cache
  851. ClearCache(cache.get());
  852. std::array<size_t, kNumCacheEntryRoles> expected{};
  853. // For CacheEntryStatsCollector
  854. expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
  855. EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
  856. std::array<size_t, kNumCacheEntryRoles> prev_expected = expected;
  857. // First access only filters
  858. ASSERT_EQ("NOT_FOUND", Get("different from any key added"));
  859. expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)] += 2;
  860. if (partition) {
  861. expected[static_cast<size_t>(CacheEntryRole::kFilterMetaBlock)] += 2;
  862. }
  863. // Within some time window, we will get cached entry stats
  864. EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
  865. // Not enough to force a miss
  866. env_->MockSleepForSeconds(45);
  867. EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
  868. // Enough to force a miss
  869. env_->MockSleepForSeconds(601);
  870. EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
  871. // Now access index and data block
  872. ASSERT_EQ("value", Get("foo"));
  873. expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
  874. if (partition) {
  875. // top-level
  876. expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
  877. }
  878. expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
  879. // Enough to force a miss
  880. env_->MockSleepForSeconds(601);
  881. // But inject a simulated long scan so that we need a longer
  882. // interval to force a miss next time.
  883. SyncPoint::GetInstance()->SetCallBack(
  884. "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
  885. [this](void*) {
  886. // To spend no more than 0.2% of time scanning, we would need
  887. // interval of at least 10000s
  888. env_->MockSleepForSeconds(20);
  889. });
  890. SyncPoint::GetInstance()->EnableProcessing();
  891. EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
  892. prev_expected = expected;
  893. SyncPoint::GetInstance()->DisableProcessing();
  894. SyncPoint::GetInstance()->ClearAllCallBacks();
  895. // The same for other file
  896. ASSERT_EQ("value", Get("zfoo"));
  897. expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
  898. if (partition) {
  899. // top-level
  900. expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
  901. }
  902. expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
  903. // Because of the simulated long scan, this is not enough to force
  904. // a miss
  905. env_->MockSleepForSeconds(601);
  906. EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
  907. // But this is enough
  908. env_->MockSleepForSeconds(10000);
  909. EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
  910. prev_expected = expected;
  911. // Also check the GetProperty interface
  912. std::map<std::string, std::string> values;
  913. ASSERT_TRUE(
  914. db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
  915. for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
  916. auto role = static_cast<CacheEntryRole>(i);
  917. EXPECT_EQ(std::to_string(expected[i]),
  918. values[BlockCacheEntryStatsMapKeys::EntryCount(role)]);
  919. }
  920. // Add one for kWriteBuffer
  921. {
  922. WriteBufferManager wbm(size_t{1} << 20, cache);
  923. wbm.ReserveMem(1024);
  924. expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]++;
  925. // Now we check that the GetProperty interface is more agressive about
  926. // re-scanning stats, but not totally aggressive.
  927. // Within some time window, we will get cached entry stats
  928. env_->MockSleepForSeconds(1);
  929. EXPECT_EQ(std::to_string(prev_expected[static_cast<size_t>(
  930. CacheEntryRole::kWriteBuffer)]),
  931. values[BlockCacheEntryStatsMapKeys::EntryCount(
  932. CacheEntryRole::kWriteBuffer)]);
  933. // Not enough for a "background" miss but enough for a "foreground" miss
  934. env_->MockSleepForSeconds(45);
  935. ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats,
  936. &values));
  937. EXPECT_EQ(
  938. std::to_string(
  939. expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
  940. values[BlockCacheEntryStatsMapKeys::EntryCount(
  941. CacheEntryRole::kWriteBuffer)]);
  942. }
  943. prev_expected = expected;
  944. // With collector pinned in cache, we should be able to hit
  945. // even if the cache is full
  946. ClearCache(cache.get());
  947. Cache::Handle* h = nullptr;
  948. if (strcmp(cache->Name(), "LRUCache") == 0) {
  949. ASSERT_OK(cache->Insert("Fill-it-up", nullptr, &kNoopCacheItemHelper,
  950. capacity + 1, &h, Cache::Priority::HIGH));
  951. } else {
  952. // For ClockCache we use a 16-byte key.
  953. ASSERT_OK(cache->Insert("Fill-it-up-xxxxx", nullptr,
  954. &kNoopCacheItemHelper, capacity + 1, &h,
  955. Cache::Priority::HIGH));
  956. }
  957. ASSERT_GT(cache->GetUsage(), cache->GetCapacity());
  958. expected = {};
  959. // For CacheEntryStatsCollector
  960. expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
  961. // For Fill-it-up
  962. expected[static_cast<size_t>(CacheEntryRole::kMisc)]++;
  963. // Still able to hit on saved stats
  964. EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
  965. // Enough to force a miss
  966. env_->MockSleepForSeconds(1000);
  967. EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
  968. cache->Release(h);
  969. // Now we test that the DB mutex is not held during scans, for the ways
  970. // we know how to (possibly) trigger them. Without a better good way to
  971. // check this, we simply inject an acquire & release of the DB mutex
  972. // deep in the stat collection code. If we were already holding the
  973. // mutex, that is UB that would at least be found by TSAN.
  974. int scan_count = 0;
  975. SyncPoint::GetInstance()->SetCallBack(
  976. "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
  977. [this, &scan_count](void*) {
  978. dbfull()->TEST_LockMutex();
  979. dbfull()->TEST_UnlockMutex();
  980. ++scan_count;
  981. });
  982. SyncPoint::GetInstance()->EnableProcessing();
  983. // Different things that might trigger a scan, with mock sleeps to
  984. // force a miss.
  985. env_->MockSleepForSeconds(10000);
  986. dbfull()->DumpStats();
  987. ASSERT_EQ(scan_count, 1);
  988. env_->MockSleepForSeconds(60);
  989. ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats,
  990. &values));
  991. ASSERT_EQ(scan_count, 1);
  992. ASSERT_TRUE(
  993. db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
  994. ASSERT_EQ(scan_count, 2);
  995. env_->MockSleepForSeconds(10000);
  996. ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats,
  997. &values));
  998. ASSERT_EQ(scan_count, 3);
  999. env_->MockSleepForSeconds(60);
  1000. std::string value_str;
  1001. ASSERT_TRUE(db_->GetProperty(DB::Properties::kFastBlockCacheEntryStats,
  1002. &value_str));
  1003. ASSERT_EQ(scan_count, 3);
  1004. ASSERT_TRUE(
  1005. db_->GetProperty(DB::Properties::kBlockCacheEntryStats, &value_str));
  1006. ASSERT_EQ(scan_count, 4);
  1007. env_->MockSleepForSeconds(10000);
  1008. ASSERT_TRUE(db_->GetProperty(DB::Properties::kFastBlockCacheEntryStats,
  1009. &value_str));
  1010. ASSERT_EQ(scan_count, 5);
  1011. ASSERT_TRUE(db_->GetProperty(DB::Properties::kCFStats, &value_str));
  1012. // To match historical speed, querying this property no longer triggers
  1013. // a scan, even if results are old. But periodic dump stats should keep
  1014. // things reasonably updated.
  1015. ASSERT_EQ(scan_count, /*unchanged*/ 5);
  1016. SyncPoint::GetInstance()->DisableProcessing();
  1017. SyncPoint::GetInstance()->ClearAllCallBacks();
  1018. }
  1019. EXPECT_GE(iterations_tested, 1);
  1020. }
  1021. }
  1022. namespace {
  1023. void DummyFillCache(Cache& cache, size_t entry_size,
  1024. std::vector<CacheHandleGuard<void>>& handles) {
  1025. // fprintf(stderr, "Entry size: %zu\n", entry_size);
  1026. handles.clear();
  1027. cache.EraseUnRefEntries();
  1028. void* fake_value = &cache;
  1029. size_t capacity = cache.GetCapacity();
  1030. OffsetableCacheKey ck{"abc", "abc", 42};
  1031. for (size_t my_usage = 0; my_usage < capacity;) {
  1032. size_t charge = std::min(entry_size, capacity - my_usage);
  1033. Cache::Handle* handle;
  1034. Status st = cache.Insert(ck.WithOffset(my_usage).AsSlice(), fake_value,
  1035. &kNoopCacheItemHelper, charge, &handle);
  1036. ASSERT_OK(st);
  1037. handles.emplace_back(&cache, handle);
  1038. my_usage += charge;
  1039. }
  1040. }
  1041. class CountingLogger : public Logger {
  1042. public:
  1043. ~CountingLogger() override = default;
  1044. using Logger::Logv;
  1045. void Logv(const InfoLogLevel log_level, const char* format,
  1046. va_list /*ap*/) override {
  1047. if (std::strstr(format, "HyperClockCache") == nullptr) {
  1048. // Not a match
  1049. return;
  1050. }
  1051. // static StderrLogger debug;
  1052. // debug.Logv(log_level, format, ap);
  1053. if (log_level == InfoLogLevel::INFO_LEVEL) {
  1054. ++info_count_;
  1055. } else if (log_level == InfoLogLevel::WARN_LEVEL) {
  1056. ++warn_count_;
  1057. } else if (log_level == InfoLogLevel::ERROR_LEVEL) {
  1058. ++error_count_;
  1059. }
  1060. }
  1061. std::array<int, 3> PopCounts() {
  1062. std::array<int, 3> rv{{info_count_, warn_count_, error_count_}};
  1063. info_count_ = warn_count_ = error_count_ = 0;
  1064. return rv;
  1065. }
  1066. private:
  1067. int info_count_{};
  1068. int warn_count_{};
  1069. int error_count_{};
  1070. };
  1071. } // namespace
  1072. TEST_F(DBBlockCacheTest, HyperClockCacheReportProblems) {
  1073. size_t capacity = 1024 * 1024;
  1074. size_t value_size_est = 8 * 1024;
  1075. HyperClockCacheOptions hcc_opts{capacity, value_size_est};
  1076. hcc_opts.num_shard_bits = 2; // 4 shards
  1077. hcc_opts.metadata_charge_policy = kDontChargeCacheMetadata;
  1078. hcc_opts.hash_seed = 0; // deterministic hashing
  1079. std::shared_ptr<Cache> cache = hcc_opts.MakeSharedCache();
  1080. std::shared_ptr<CountingLogger> logger = std::make_shared<CountingLogger>();
  1081. auto table_options = GetTableOptions();
  1082. auto options = GetOptions(table_options);
  1083. table_options.block_cache = cache;
  1084. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1085. options.info_log = logger;
  1086. // Going to sample more directly
  1087. options.stats_dump_period_sec = 0;
  1088. Reopen(options);
  1089. std::vector<CacheHandleGuard<void>> handles;
  1090. // Clear anything from DB startup
  1091. logger->PopCounts();
  1092. // Fill cache based on expected size and check that when we
  1093. // don't report anything relevant in periodic stats dump
  1094. DummyFillCache(*cache, value_size_est, handles);
  1095. dbfull()->DumpStats();
  1096. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 0}}));
  1097. // Same, within reasonable bounds
  1098. DummyFillCache(*cache, value_size_est - value_size_est / 4, handles);
  1099. dbfull()->DumpStats();
  1100. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 0}}));
  1101. DummyFillCache(*cache, value_size_est + value_size_est / 3, handles);
  1102. dbfull()->DumpStats();
  1103. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 0}}));
  1104. // Estimate too high (value size too low) eventually reports ERROR
  1105. DummyFillCache(*cache, value_size_est / 2, handles);
  1106. dbfull()->DumpStats();
  1107. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 1, 0}}));
  1108. DummyFillCache(*cache, value_size_est / 3, handles);
  1109. dbfull()->DumpStats();
  1110. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 1}}));
  1111. // Estimate too low (value size too high) starts with INFO
  1112. // and is only WARNING in the worst case
  1113. DummyFillCache(*cache, value_size_est * 2, handles);
  1114. dbfull()->DumpStats();
  1115. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{1, 0, 0}}));
  1116. DummyFillCache(*cache, value_size_est * 3, handles);
  1117. dbfull()->DumpStats();
  1118. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 1, 0}}));
  1119. DummyFillCache(*cache, value_size_est * 20, handles);
  1120. dbfull()->DumpStats();
  1121. EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 1, 0}}));
  1122. }
  1123. class DBBlockCacheTypeTest
  1124. : public DBBlockCacheTest,
  1125. public secondary_cache_test_util::WithCacheTypeParam {};
  1126. INSTANTIATE_TEST_CASE_P(DBBlockCacheTypeTestInstance, DBBlockCacheTypeTest,
  1127. secondary_cache_test_util::GetTestingCacheTypes());
  1128. TEST_P(DBBlockCacheTypeTest, AddRedundantStats) {
  1129. BlockBasedTableOptions table_options;
  1130. const size_t capacity = size_t{1} << 25;
  1131. const int num_shard_bits = 0; // 1 shard
  1132. estimated_value_size_ = table_options.block_size;
  1133. std::shared_ptr<Cache> base_cache =
  1134. NewCache(capacity, num_shard_bits, /*strict_capacity_limit=*/false);
  1135. Options options = CurrentOptions();
  1136. options.create_if_missing = true;
  1137. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1138. std::shared_ptr<LookupLiarCache> cache =
  1139. std::make_shared<LookupLiarCache>(base_cache);
  1140. table_options.cache_index_and_filter_blocks = true;
  1141. table_options.block_cache = cache;
  1142. table_options.filter_policy.reset(NewBloomFilterPolicy(50));
  1143. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1144. DestroyAndReopen(options);
  1145. // Create a new table.
  1146. ASSERT_OK(Put("foo", "value"));
  1147. ASSERT_OK(Put("bar", "value"));
  1148. ASSERT_OK(Flush());
  1149. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  1150. // Normal access filter+index+data.
  1151. ASSERT_EQ("value", Get("foo"));
  1152. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
  1153. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
  1154. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
  1155. // --------
  1156. ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  1157. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
  1158. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
  1159. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
  1160. // --------
  1161. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
  1162. // Againt access filter+index+data, but force redundant load+insert on index
  1163. cache->SetNthLookupNotFound(2);
  1164. ASSERT_EQ("value", Get("bar"));
  1165. ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
  1166. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
  1167. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
  1168. // --------
  1169. ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  1170. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
  1171. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
  1172. ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
  1173. // --------
  1174. ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
  1175. // Access just filter (with high probability), and force redundant
  1176. // load+insert
  1177. cache->SetNthLookupNotFound(1);
  1178. ASSERT_EQ("NOT_FOUND", Get("this key was not added"));
  1179. EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
  1180. EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
  1181. EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
  1182. // --------
  1183. EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  1184. EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
  1185. EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
  1186. EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
  1187. // --------
  1188. EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
  1189. // Access just data, forcing redundant load+insert
  1190. ReadOptions read_options;
  1191. std::unique_ptr<Iterator> iter{db_->NewIterator(read_options)};
  1192. cache->SetNthLookupNotFound(1);
  1193. iter->SeekToFirst();
  1194. ASSERT_TRUE(iter->Valid());
  1195. ASSERT_EQ(iter->key(), "bar");
  1196. EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
  1197. EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
  1198. EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
  1199. // --------
  1200. EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  1201. EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
  1202. EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
  1203. EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
  1204. // --------
  1205. EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
  1206. }
  1207. namespace {
  1208. std::string AltKey(int i) {
  1209. char buf[100];
  1210. snprintf(buf, sizeof(buf), "altkey%06d", i);
  1211. return std::string(buf);
  1212. }
  1213. } // namespace
  1214. TEST_P(DBBlockCacheTypeTest, Uncache) {
  1215. for (bool partitioned : {false, true}) {
  1216. SCOPED_TRACE("partitioned=" + std::to_string(partitioned));
  1217. for (uint32_t ua : {0, 1, 2, 10000}) {
  1218. SCOPED_TRACE("ua=" + std::to_string(ua));
  1219. BlockBasedTableOptions table_options;
  1220. Options options = CurrentOptions();
  1221. options.uncache_aggressiveness = ua;
  1222. options.create_if_missing = true;
  1223. // Don't allow background operations to keep Versions referenced
  1224. options.stats_dump_period_sec = 0;
  1225. options.stats_persist_period_sec = 0;
  1226. auto stats = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1227. options.statistics = stats;
  1228. const size_t capacity = size_t{1} << 25;
  1229. const int num_shard_bits = 0; // 1 shard
  1230. estimated_value_size_ = table_options.block_size;
  1231. std::shared_ptr<Cache> cache =
  1232. NewCache(capacity, num_shard_bits, /*strict_capacity_limit=*/false);
  1233. table_options.cache_index_and_filter_blocks = true;
  1234. table_options.block_cache = cache;
  1235. table_options.filter_policy.reset(NewBloomFilterPolicy(10));
  1236. table_options.partition_filters = partitioned;
  1237. table_options.index_type =
  1238. partitioned ? BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch
  1239. : BlockBasedTableOptions::IndexType::kBinarySearch;
  1240. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1241. DestroyAndReopen(options);
  1242. size_t kBaselineCount = 1; // Because of entry stats collector
  1243. ASSERT_EQ(kBaselineCount, cache->GetOccupancyCount());
  1244. ASSERT_EQ(0U, cache->GetUsage());
  1245. constexpr uint8_t kNumDataBlocks = 10;
  1246. constexpr uint8_t kNumFiles = 3;
  1247. for (int i = 0; i < kNumDataBlocks; i++) {
  1248. // Force some overlap with ordering
  1249. ASSERT_OK(Put(Key((i * 7) % kNumDataBlocks),
  1250. Random::GetTLSInstance()->RandomBinaryString(
  1251. static_cast<int>(table_options.block_size))));
  1252. if (i >= kNumDataBlocks - kNumFiles) {
  1253. ASSERT_OK(Flush());
  1254. }
  1255. }
  1256. ASSERT_EQ(int{kNumFiles}, NumTableFilesAtLevel(0));
  1257. for (int i = 0; i < kNumDataBlocks; i++) {
  1258. ASSERT_NE(Get(Key(i)), "NOT_FOUND");
  1259. }
  1260. size_t meta_blocks_per_file = /*index & filter*/ 2U * (1U + partitioned);
  1261. ASSERT_EQ(
  1262. cache->GetOccupancyCount(),
  1263. kBaselineCount + kNumDataBlocks + meta_blocks_per_file * kNumFiles);
  1264. ASSERT_GE(cache->GetUsage(), kNumDataBlocks * table_options.block_size);
  1265. // Combine into one file, making the originals obsolete
  1266. ASSERT_OK(db_->CompactRange({}, nullptr, nullptr));
  1267. ASSERT_OK(dbfull()->TEST_WaitForBackgroundWork());
  1268. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  1269. for (int i = 0; i < kNumDataBlocks; i++) {
  1270. ASSERT_NE(Get(Key(i)), "NOT_FOUND");
  1271. }
  1272. if (ua == 0) {
  1273. // Expect to see cache entries for new file and obsolete files
  1274. EXPECT_EQ(cache->GetOccupancyCount(),
  1275. kBaselineCount + kNumDataBlocks * 2U +
  1276. meta_blocks_per_file * (kNumFiles + 1));
  1277. EXPECT_GE(cache->GetUsage(),
  1278. kNumDataBlocks * table_options.block_size * 2U);
  1279. } else {
  1280. // Expect only to see cache entries for new file
  1281. EXPECT_EQ(cache->GetOccupancyCount(),
  1282. kBaselineCount + kNumDataBlocks + meta_blocks_per_file);
  1283. EXPECT_GE(cache->GetUsage(), kNumDataBlocks * table_options.block_size);
  1284. EXPECT_LT(cache->GetUsage(),
  1285. kNumDataBlocks * table_options.block_size * 2U);
  1286. }
  1287. size_t alt_baseline_count = cache->GetOccupancyCount();
  1288. size_t alt_baseline_usage = cache->GetUsage();
  1289. ASSERT_OK(stats->Reset());
  1290. // We aren't generally cleaning up cache entries on DB::Close, especially
  1291. // because someone might just re-open the same DB.
  1292. Reopen(options);
  1293. for (int i = 0; i < kNumDataBlocks; i++) {
  1294. ASSERT_NE(Get(Key(i)), "NOT_FOUND");
  1295. }
  1296. EXPECT_EQ(cache->GetOccupancyCount(), alt_baseline_count);
  1297. EXPECT_EQ(cache->GetUsage(), alt_baseline_usage);
  1298. // Check for unnecessary unncessary cache churn
  1299. ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
  1300. ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
  1301. ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
  1302. // And now do a similar test as above except with trivial moves, making
  1303. // sure that we aren't falsely uncaching in that case, which would cause
  1304. // unnecessary cache misses. Using AltKey instead of Key to avoid
  1305. // interference.
  1306. for (int i = 0; i < kNumDataBlocks; i++) {
  1307. // No overlap
  1308. ASSERT_OK(
  1309. Put(AltKey(i), Random::GetTLSInstance()->RandomBinaryString(
  1310. static_cast<int>(table_options.block_size))));
  1311. if (i >= kNumDataBlocks - kNumFiles) {
  1312. ASSERT_OK(Flush());
  1313. }
  1314. }
  1315. ASSERT_EQ(int{kNumFiles}, NumTableFilesAtLevel(0));
  1316. for (int i = 0; i < kNumDataBlocks; i++) {
  1317. ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
  1318. }
  1319. ASSERT_EQ(cache->GetOccupancyCount(),
  1320. alt_baseline_count + kNumDataBlocks +
  1321. meta_blocks_per_file * kNumFiles);
  1322. ASSERT_GE(cache->GetUsage(),
  1323. alt_baseline_usage + kNumDataBlocks * table_options.block_size);
  1324. ASSERT_OK(stats->Reset());
  1325. // Make trivial move
  1326. {
  1327. auto a = AltKey(0);
  1328. auto b = AltKey(kNumDataBlocks);
  1329. Slice slice_a{a};
  1330. Slice slice_b{b};
  1331. ASSERT_OK(db_->CompactRange({}, &slice_a, &slice_b));
  1332. }
  1333. ASSERT_EQ(/*old*/ 1 + /*new*/ int{kNumFiles}, NumTableFilesAtLevel(1));
  1334. for (int i = 0; i < kNumDataBlocks; i++) {
  1335. ASSERT_NE(Get(AltKey(i)), "NOT_FOUND");
  1336. }
  1337. // Should be the same if trivial move
  1338. ASSERT_EQ(cache->GetOccupancyCount(),
  1339. alt_baseline_count + kNumDataBlocks +
  1340. meta_blocks_per_file * kNumFiles);
  1341. // Check for unnecessary unncessary cache churn
  1342. ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_ADD), 0U);
  1343. ASSERT_EQ(stats->getTickerCount(BLOCK_CACHE_MISS), 0U);
  1344. ASSERT_GT(stats->getTickerCount(BLOCK_CACHE_HIT), 0U);
  1345. }
  1346. }
  1347. }
  1348. class DBBlockCacheKeyTest
  1349. : public DBTestBase,
  1350. public testing::WithParamInterface<std::tuple<bool, bool>> {
  1351. public:
  1352. DBBlockCacheKeyTest()
  1353. : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
  1354. void SetUp() override {
  1355. use_compressed_cache_ = std::get<0>(GetParam());
  1356. exclude_file_numbers_ = std::get<1>(GetParam());
  1357. }
  1358. bool use_compressed_cache_;
  1359. bool exclude_file_numbers_;
  1360. };
  1361. // Disable LinkFile so that we can physically copy a DB using Checkpoint.
  1362. // Disable file GetUniqueId to enable stable cache keys.
  1363. class StableCacheKeyTestFS : public FaultInjectionTestFS {
  1364. public:
  1365. explicit StableCacheKeyTestFS(const std::shared_ptr<FileSystem>& base)
  1366. : FaultInjectionTestFS(base) {
  1367. SetFailGetUniqueId(true);
  1368. }
  1369. ~StableCacheKeyTestFS() override = default;
  1370. IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
  1371. IODebugContext*) override {
  1372. return IOStatus::NotSupported("Disabled");
  1373. }
  1374. };
  1375. TEST_P(DBBlockCacheKeyTest, StableCacheKeys) {
  1376. std::shared_ptr<StableCacheKeyTestFS> test_fs{
  1377. new StableCacheKeyTestFS(env_->GetFileSystem())};
  1378. std::unique_ptr<CompositeEnvWrapper> test_env{
  1379. new CompositeEnvWrapper(env_, test_fs)};
  1380. Options options = CurrentOptions();
  1381. options.create_if_missing = true;
  1382. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1383. options.env = test_env.get();
  1384. // Corrupting the table properties corrupts the unique id.
  1385. // Ignore the unique id recorded in the manifest.
  1386. options.verify_sst_unique_id_in_manifest = false;
  1387. BlockBasedTableOptions table_options;
  1388. int key_count = 0;
  1389. uint64_t expected_stat = 0;
  1390. std::function<void()> verify_stats;
  1391. table_options.cache_index_and_filter_blocks = true;
  1392. table_options.block_cache = NewLRUCache(1 << 25, 0, false);
  1393. verify_stats = [&options, &expected_stat] {
  1394. ASSERT_EQ(expected_stat,
  1395. options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
  1396. ASSERT_EQ(expected_stat,
  1397. options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
  1398. ASSERT_EQ(expected_stat,
  1399. options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
  1400. };
  1401. table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  1402. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1403. CreateAndReopenWithCF({"koko"}, options);
  1404. if (exclude_file_numbers_) {
  1405. // Simulate something like old behavior without file numbers in properties.
  1406. // This is a "control" side of the test that also ensures safely degraded
  1407. // behavior on old files.
  1408. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  1409. "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey",
  1410. [&](void* arg) {
  1411. TableProperties* props = static_cast<TableProperties*>(arg);
  1412. props->orig_file_number = 0;
  1413. });
  1414. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  1415. }
  1416. std::function<void()> perform_gets = [&key_count, &expected_stat, this]() {
  1417. if (exclude_file_numbers_) {
  1418. // No cache key reuse should happen, because we can't rely on current
  1419. // file number being stable
  1420. expected_stat += key_count;
  1421. } else {
  1422. // Cache keys should be stable
  1423. expected_stat = key_count;
  1424. }
  1425. for (int i = 0; i < key_count; ++i) {
  1426. ASSERT_EQ(Get(1, Key(i)), "abc");
  1427. }
  1428. };
  1429. // Ordinary SST files with same session id
  1430. const std::string something_compressible(500U, 'x');
  1431. for (int i = 0; i < 2; ++i) {
  1432. ASSERT_OK(Put(1, Key(key_count), "abc"));
  1433. ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible));
  1434. ASSERT_OK(Flush(1));
  1435. ++key_count;
  1436. }
  1437. // Save an export of those ordinary SST files for later
  1438. std::string export_files_dir = dbname_ + "/exported";
  1439. ExportImportFilesMetaData* metadata_ptr_ = nullptr;
  1440. Checkpoint* checkpoint;
  1441. ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
  1442. ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir,
  1443. &metadata_ptr_));
  1444. ASSERT_NE(metadata_ptr_, nullptr);
  1445. delete checkpoint;
  1446. checkpoint = nullptr;
  1447. // External SST files with same session id
  1448. SstFileWriter sst_file_writer(EnvOptions(), options);
  1449. std::vector<std::string> external;
  1450. for (int i = 0; i < 2; ++i) {
  1451. std::string f = dbname_ + "/external" + std::to_string(i) + ".sst";
  1452. external.push_back(f);
  1453. ASSERT_OK(sst_file_writer.Open(f));
  1454. ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc"));
  1455. ASSERT_OK(
  1456. sst_file_writer.Put(Key(key_count) + "a", something_compressible));
  1457. ++key_count;
  1458. ExternalSstFileInfo external_info;
  1459. ASSERT_OK(sst_file_writer.Finish(&external_info));
  1460. IngestExternalFileOptions ingest_opts;
  1461. ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts));
  1462. }
  1463. perform_gets();
  1464. verify_stats();
  1465. // Make sure we can cache hit after re-open
  1466. ReopenWithColumnFamilies({"default", "koko"}, options);
  1467. perform_gets();
  1468. verify_stats();
  1469. // Make sure we can cache hit even on a full copy of the DB. Using
  1470. // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link.
  1471. // (Checkpoint not available in LITE mode to test this.)
  1472. auto db_copy_name = dbname_ + "-copy";
  1473. ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
  1474. ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name));
  1475. delete checkpoint;
  1476. Close();
  1477. Destroy(options);
  1478. // Switch to the DB copy
  1479. SaveAndRestore<std::string> save_dbname(&dbname_, db_copy_name);
  1480. ReopenWithColumnFamilies({"default", "koko"}, options);
  1481. perform_gets();
  1482. verify_stats();
  1483. // And ensure that re-importing + ingesting the same files into a
  1484. // different DB uses same cache keys
  1485. DestroyAndReopen(options);
  1486. ColumnFamilyHandle* cfh = nullptr;
  1487. ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
  1488. ImportColumnFamilyOptions(),
  1489. *metadata_ptr_, &cfh));
  1490. ASSERT_NE(cfh, nullptr);
  1491. delete cfh;
  1492. cfh = nullptr;
  1493. delete metadata_ptr_;
  1494. metadata_ptr_ = nullptr;
  1495. ASSERT_OK(DestroyDB(export_files_dir, options));
  1496. ReopenWithColumnFamilies({"default", "yoyo"}, options);
  1497. IngestExternalFileOptions ingest_opts;
  1498. ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts));
  1499. perform_gets();
  1500. verify_stats();
  1501. Close();
  1502. Destroy(options);
  1503. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  1504. }
  1505. class CacheKeyTest : public testing::Test {
  1506. public:
  1507. CacheKey GetBaseCacheKey() {
  1508. CacheKey rv = GetOffsetableCacheKey(0, /*min file_number*/ 1).WithOffset(0);
  1509. // Correct for file_number_ == 1
  1510. *reinterpret_cast<uint64_t*>(&rv) ^= ReverseBits(uint64_t{1});
  1511. return rv;
  1512. }
  1513. CacheKey GetCacheKey(uint64_t session_counter, uint64_t file_number,
  1514. uint64_t offset) {
  1515. OffsetableCacheKey offsetable =
  1516. GetOffsetableCacheKey(session_counter, file_number);
  1517. // * 4 to counteract optimization that strips lower 2 bits in encoding
  1518. // the offset in BlockBasedTable::GetCacheKey (which we prefer to include
  1519. // in unit tests to maximize functional coverage).
  1520. EXPECT_GE(offset * 4, offset); // no overflow
  1521. return BlockBasedTable::GetCacheKey(offsetable,
  1522. BlockHandle(offset * 4, /*size*/ 5));
  1523. }
  1524. protected:
  1525. OffsetableCacheKey GetOffsetableCacheKey(uint64_t session_counter,
  1526. uint64_t file_number) {
  1527. // Like SemiStructuredUniqueIdGen::GenerateNext
  1528. tp_.db_session_id = EncodeSessionId(base_session_upper_,
  1529. base_session_lower_ ^ session_counter);
  1530. tp_.db_id = std::to_string(db_id_);
  1531. tp_.orig_file_number = file_number;
  1532. bool is_stable;
  1533. std::string cur_session_id; // ignored
  1534. uint64_t cur_file_number = 42; // ignored
  1535. OffsetableCacheKey rv;
  1536. BlockBasedTable::SetupBaseCacheKey(&tp_, cur_session_id, cur_file_number,
  1537. &rv, &is_stable);
  1538. EXPECT_TRUE(is_stable);
  1539. EXPECT_TRUE(!rv.IsEmpty());
  1540. // BEGIN some assertions in relation to SST unique IDs
  1541. std::string external_unique_id_str;
  1542. EXPECT_OK(GetUniqueIdFromTableProperties(tp_, &external_unique_id_str));
  1543. UniqueId64x2 sst_unique_id = {};
  1544. EXPECT_OK(DecodeUniqueIdBytes(external_unique_id_str, &sst_unique_id));
  1545. ExternalUniqueIdToInternal(&sst_unique_id);
  1546. OffsetableCacheKey ock =
  1547. OffsetableCacheKey::FromInternalUniqueId(&sst_unique_id);
  1548. EXPECT_EQ(rv.WithOffset(0).AsSlice(), ock.WithOffset(0).AsSlice());
  1549. EXPECT_EQ(ock.ToInternalUniqueId(), sst_unique_id);
  1550. // END some assertions in relation to SST unique IDs
  1551. return rv;
  1552. }
  1553. TableProperties tp_;
  1554. uint64_t base_session_upper_ = 0;
  1555. uint64_t base_session_lower_ = 0;
  1556. uint64_t db_id_ = 0;
  1557. };
  1558. TEST_F(CacheKeyTest, DBImplSessionIdStructure) {
  1559. // We have to generate our own session IDs for simulation purposes in other
  1560. // tests. Here we verify that the DBImpl implementation seems to match
  1561. // our construction here, by using lowest XORed-in bits for "session
  1562. // counter."
  1563. std::string session_id1 = DBImpl::GenerateDbSessionId(/*env*/ nullptr);
  1564. std::string session_id2 = DBImpl::GenerateDbSessionId(/*env*/ nullptr);
  1565. uint64_t upper1, upper2, lower1, lower2;
  1566. ASSERT_OK(DecodeSessionId(session_id1, &upper1, &lower1));
  1567. ASSERT_OK(DecodeSessionId(session_id2, &upper2, &lower2));
  1568. // Because generated in same process
  1569. ASSERT_EQ(upper1, upper2);
  1570. // Unless we generate > 4 billion session IDs in this process...
  1571. ASSERT_EQ(Upper32of64(lower1), Upper32of64(lower2));
  1572. // But they must be different somewhere
  1573. ASSERT_NE(Lower32of64(lower1), Lower32of64(lower2));
  1574. }
  1575. namespace {
  1576. // Deconstruct cache key, based on knowledge of implementation details.
  1577. void DeconstructNonemptyCacheKey(const CacheKey& key, uint64_t* file_num_etc64,
  1578. uint64_t* offset_etc64) {
  1579. *file_num_etc64 = *reinterpret_cast<const uint64_t*>(key.AsSlice().data());
  1580. *offset_etc64 = *reinterpret_cast<const uint64_t*>(key.AsSlice().data() + 8);
  1581. assert(*file_num_etc64 != 0);
  1582. if (*offset_etc64 == 0) {
  1583. std::swap(*file_num_etc64, *offset_etc64);
  1584. }
  1585. assert(*offset_etc64 != 0);
  1586. }
  1587. // Make a bit mask of 0 to 64 bits
  1588. uint64_t MakeMask64(int bits) {
  1589. if (bits >= 64) {
  1590. return uint64_t{0} - 1;
  1591. } else {
  1592. return (uint64_t{1} << bits) - 1;
  1593. }
  1594. }
  1595. // See CacheKeyTest::Encodings
  1596. struct CacheKeyDecoder {
  1597. // Inputs
  1598. uint64_t base_file_num_etc64, base_offset_etc64;
  1599. int session_counter_bits, file_number_bits, offset_bits;
  1600. // Derived
  1601. uint64_t session_counter_mask, file_number_mask, offset_mask;
  1602. // Outputs
  1603. uint64_t decoded_session_counter, decoded_file_num, decoded_offset;
  1604. void SetBaseCacheKey(const CacheKey& base) {
  1605. DeconstructNonemptyCacheKey(base, &base_file_num_etc64, &base_offset_etc64);
  1606. }
  1607. void SetRanges(int _session_counter_bits, int _file_number_bits,
  1608. int _offset_bits) {
  1609. session_counter_bits = _session_counter_bits;
  1610. session_counter_mask = MakeMask64(session_counter_bits);
  1611. file_number_bits = _file_number_bits;
  1612. file_number_mask = MakeMask64(file_number_bits);
  1613. offset_bits = _offset_bits;
  1614. offset_mask = MakeMask64(offset_bits);
  1615. }
  1616. void Decode(const CacheKey& key) {
  1617. uint64_t file_num_etc64, offset_etc64;
  1618. DeconstructNonemptyCacheKey(key, &file_num_etc64, &offset_etc64);
  1619. // First decode session counter
  1620. if (offset_bits + session_counter_bits <= 64) {
  1621. // fully recoverable from offset_etc64
  1622. decoded_session_counter =
  1623. ReverseBits((offset_etc64 ^ base_offset_etc64)) &
  1624. session_counter_mask;
  1625. } else if (file_number_bits + session_counter_bits <= 64) {
  1626. // fully recoverable from file_num_etc64
  1627. decoded_session_counter = DownwardInvolution(
  1628. (file_num_etc64 ^ base_file_num_etc64) & session_counter_mask);
  1629. } else {
  1630. // Need to combine parts from each word.
  1631. // Piece1 will contain some correct prefix of the bottom bits of
  1632. // session counter.
  1633. uint64_t piece1 =
  1634. ReverseBits((offset_etc64 ^ base_offset_etc64) & ~offset_mask);
  1635. int piece1_bits = 64 - offset_bits;
  1636. // Piece2 will contain involuded bits that we can combine with piece1
  1637. // to infer rest of session counter
  1638. int piece2_bits = std::min(64 - file_number_bits, 64 - piece1_bits);
  1639. ASSERT_LT(piece2_bits, 64);
  1640. uint64_t piece2_mask = MakeMask64(piece2_bits);
  1641. uint64_t piece2 = (file_num_etc64 ^ base_file_num_etc64) & piece2_mask;
  1642. // Cancel out the part of piece2 that we can infer from piece1
  1643. // (DownwardInvolution distributes over xor)
  1644. piece2 ^= DownwardInvolution(piece1) & piece2_mask;
  1645. // Now we need to solve for the unknown original bits in higher
  1646. // positions than piece1 provides. We use Gaussian elimination
  1647. // because we know that a piece2_bits X piece2_bits submatrix of
  1648. // the matrix underlying DownwardInvolution times the vector of
  1649. // unknown original bits equals piece2.
  1650. //
  1651. // Build an augmented row matrix for that submatrix, built column by
  1652. // column.
  1653. std::array<uint64_t, 64> aug_rows{};
  1654. for (int i = 0; i < piece2_bits; ++i) { // over columns
  1655. uint64_t col_i = DownwardInvolution(uint64_t{1} << piece1_bits << i);
  1656. ASSERT_NE(col_i & 1U, 0);
  1657. for (int j = 0; j < piece2_bits; ++j) { // over rows
  1658. aug_rows[j] |= (col_i & 1U) << i;
  1659. col_i >>= 1;
  1660. }
  1661. }
  1662. // Augment with right hand side
  1663. for (int j = 0; j < piece2_bits; ++j) { // over rows
  1664. aug_rows[j] |= (piece2 & 1U) << piece2_bits;
  1665. piece2 >>= 1;
  1666. }
  1667. // Run Gaussian elimination
  1668. for (int i = 0; i < piece2_bits; ++i) { // over columns
  1669. // Find a row that can be used to cancel others
  1670. uint64_t canceller = 0;
  1671. // Note: Rows 0 through i-1 contain 1s in columns already eliminated
  1672. for (int j = i; j < piece2_bits; ++j) { // over rows
  1673. if (aug_rows[j] & (uint64_t{1} << i)) {
  1674. // Swap into appropriate row
  1675. std::swap(aug_rows[i], aug_rows[j]);
  1676. // Keep a handy copy for row reductions
  1677. canceller = aug_rows[i];
  1678. break;
  1679. }
  1680. }
  1681. ASSERT_NE(canceller, 0);
  1682. for (int j = 0; j < piece2_bits; ++j) { // over rows
  1683. if (i != j && ((aug_rows[j] >> i) & 1) != 0) {
  1684. // Row reduction
  1685. aug_rows[j] ^= canceller;
  1686. }
  1687. }
  1688. }
  1689. // Extract result
  1690. decoded_session_counter = piece1;
  1691. for (int j = 0; j < piece2_bits; ++j) { // over rows
  1692. ASSERT_EQ(aug_rows[j] & piece2_mask, uint64_t{1} << j);
  1693. decoded_session_counter |= aug_rows[j] >> piece2_bits << piece1_bits
  1694. << j;
  1695. }
  1696. }
  1697. decoded_offset =
  1698. offset_etc64 ^ base_offset_etc64 ^ ReverseBits(decoded_session_counter);
  1699. decoded_file_num = ReverseBits(file_num_etc64 ^ base_file_num_etc64 ^
  1700. DownwardInvolution(decoded_session_counter));
  1701. }
  1702. };
  1703. } // anonymous namespace
  1704. TEST_F(CacheKeyTest, Encodings) {
  1705. // This test primarily verifies this claim from cache_key.cc:
  1706. // // In fact, if DB ids were not involved, we would be guaranteed unique
  1707. // // cache keys for files generated in a single process until total bits for
  1708. // // biggest session_id_counter, orig_file_number, and offset_in_file
  1709. // // reach 128 bits.
  1710. //
  1711. // To demonstrate this, CacheKeyDecoder can reconstruct the structured inputs
  1712. // to the cache key when provided an output cache key, the unstructured
  1713. // inputs, and bounds on the structured inputs.
  1714. //
  1715. // See OffsetableCacheKey comments in cache_key.cc.
  1716. // We are going to randomly initialize some values that *should* not affect
  1717. // result
  1718. Random64 r{std::random_device{}()};
  1719. CacheKeyDecoder decoder;
  1720. db_id_ = r.Next();
  1721. base_session_upper_ = r.Next();
  1722. base_session_lower_ = r.Next();
  1723. if (base_session_lower_ == 0) {
  1724. base_session_lower_ = 1;
  1725. }
  1726. decoder.SetBaseCacheKey(GetBaseCacheKey());
  1727. // Loop over configurations and test those
  1728. for (int session_counter_bits = 0; session_counter_bits <= 64;
  1729. ++session_counter_bits) {
  1730. for (int file_number_bits = 1; file_number_bits <= 64; ++file_number_bits) {
  1731. // 62 bits max because unoptimized offset will be 64 bits in that case
  1732. for (int offset_bits = 0; offset_bits <= 62; ++offset_bits) {
  1733. if (session_counter_bits + file_number_bits + offset_bits > 128) {
  1734. break;
  1735. }
  1736. decoder.SetRanges(session_counter_bits, file_number_bits, offset_bits);
  1737. uint64_t session_counter = r.Next() & decoder.session_counter_mask;
  1738. uint64_t file_number = r.Next() & decoder.file_number_mask;
  1739. if (file_number == 0) {
  1740. // Minimum
  1741. file_number = 1;
  1742. }
  1743. uint64_t offset = r.Next() & decoder.offset_mask;
  1744. decoder.Decode(GetCacheKey(session_counter, file_number, offset));
  1745. EXPECT_EQ(decoder.decoded_session_counter, session_counter);
  1746. EXPECT_EQ(decoder.decoded_file_num, file_number);
  1747. EXPECT_EQ(decoder.decoded_offset, offset);
  1748. }
  1749. }
  1750. }
  1751. }
  1752. INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest,
  1753. ::testing::Combine(::testing::Bool(),
  1754. ::testing::Bool()));
  1755. class DBBlockCachePinningTest
  1756. : public DBTestBase,
  1757. public testing::WithParamInterface<
  1758. std::tuple<bool, PinningTier, PinningTier, PinningTier>> {
  1759. public:
  1760. DBBlockCachePinningTest()
  1761. : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
  1762. void SetUp() override {
  1763. partition_index_and_filters_ = std::get<0>(GetParam());
  1764. top_level_index_pinning_ = std::get<1>(GetParam());
  1765. partition_pinning_ = std::get<2>(GetParam());
  1766. unpartitioned_pinning_ = std::get<3>(GetParam());
  1767. }
  1768. bool partition_index_and_filters_;
  1769. PinningTier top_level_index_pinning_;
  1770. PinningTier partition_pinning_;
  1771. PinningTier unpartitioned_pinning_;
  1772. };
  1773. #ifdef LZ4
  1774. TEST_P(DBBlockCachePinningTest, TwoLevelDB) {
  1775. // Creates one file in L0 and one file in L1. Both files have enough data that
  1776. // their index and filter blocks are partitioned. The L1 file will also have
  1777. // a compression dictionary (those are trained only during compaction), which
  1778. // must be unpartitioned.
  1779. const int kKeySize = 32;
  1780. const int kBlockSize = 128;
  1781. const int kNumBlocksPerFile = 128;
  1782. const int kNumKeysPerFile = kBlockSize * kNumBlocksPerFile / kKeySize;
  1783. Options options = CurrentOptions();
  1784. options.compression = kLZ4Compression;
  1785. options.compression_opts.max_dict_bytes = 4 << 10;
  1786. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1787. BlockBasedTableOptions table_options;
  1788. table_options.block_cache = NewLRUCache(1 << 20 /* capacity */);
  1789. table_options.block_size = kBlockSize;
  1790. table_options.metadata_block_size = kBlockSize;
  1791. table_options.cache_index_and_filter_blocks = true;
  1792. table_options.metadata_cache_options.top_level_index_pinning =
  1793. top_level_index_pinning_;
  1794. table_options.metadata_cache_options.partition_pinning = partition_pinning_;
  1795. table_options.metadata_cache_options.unpartitioned_pinning =
  1796. unpartitioned_pinning_;
  1797. table_options.filter_policy.reset(
  1798. NewBloomFilterPolicy(10 /* bits_per_key */));
  1799. if (partition_index_and_filters_) {
  1800. table_options.index_type =
  1801. BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
  1802. table_options.partition_filters = true;
  1803. }
  1804. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1805. Reopen(options);
  1806. Random rnd(301);
  1807. for (int i = 0; i < 2; ++i) {
  1808. for (int j = 0; j < kNumKeysPerFile; ++j) {
  1809. ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kKeySize)));
  1810. }
  1811. ASSERT_OK(Flush());
  1812. if (i == 0) {
  1813. // Prevent trivial move so file will be rewritten with dictionary and
  1814. // reopened with L1's pinning settings.
  1815. CompactRangeOptions cro;
  1816. cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  1817. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  1818. }
  1819. }
  1820. // Clear all unpinned blocks so unpinned blocks will show up as cache misses
  1821. // when reading a key from a file.
  1822. table_options.block_cache->EraseUnRefEntries();
  1823. // Get base cache values
  1824. uint64_t filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
  1825. uint64_t index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
  1826. uint64_t compression_dict_misses =
  1827. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
  1828. // Read a key from the L0 file
  1829. Get(Key(kNumKeysPerFile));
  1830. uint64_t expected_filter_misses = filter_misses;
  1831. uint64_t expected_index_misses = index_misses;
  1832. uint64_t expected_compression_dict_misses = compression_dict_misses;
  1833. if (partition_index_and_filters_) {
  1834. if (top_level_index_pinning_ == PinningTier::kNone) {
  1835. ++expected_filter_misses;
  1836. ++expected_index_misses;
  1837. }
  1838. if (partition_pinning_ == PinningTier::kNone) {
  1839. ++expected_filter_misses;
  1840. ++expected_index_misses;
  1841. }
  1842. } else {
  1843. if (unpartitioned_pinning_ == PinningTier::kNone) {
  1844. ++expected_filter_misses;
  1845. ++expected_index_misses;
  1846. }
  1847. }
  1848. if (unpartitioned_pinning_ == PinningTier::kNone) {
  1849. ++expected_compression_dict_misses;
  1850. }
  1851. ASSERT_EQ(expected_filter_misses,
  1852. TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1853. ASSERT_EQ(expected_index_misses,
  1854. TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1855. ASSERT_EQ(expected_compression_dict_misses,
  1856. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
  1857. // Clear all unpinned blocks so unpinned blocks will show up as cache misses
  1858. // when reading a key from a file.
  1859. table_options.block_cache->EraseUnRefEntries();
  1860. // Read a key from the L1 file
  1861. Get(Key(0));
  1862. if (partition_index_and_filters_) {
  1863. if (top_level_index_pinning_ == PinningTier::kNone ||
  1864. top_level_index_pinning_ == PinningTier::kFlushedAndSimilar) {
  1865. ++expected_filter_misses;
  1866. ++expected_index_misses;
  1867. }
  1868. if (partition_pinning_ == PinningTier::kNone ||
  1869. partition_pinning_ == PinningTier::kFlushedAndSimilar) {
  1870. ++expected_filter_misses;
  1871. ++expected_index_misses;
  1872. }
  1873. } else {
  1874. if (unpartitioned_pinning_ == PinningTier::kNone ||
  1875. unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
  1876. ++expected_filter_misses;
  1877. ++expected_index_misses;
  1878. }
  1879. }
  1880. if (unpartitioned_pinning_ == PinningTier::kNone ||
  1881. unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
  1882. ++expected_compression_dict_misses;
  1883. }
  1884. ASSERT_EQ(expected_filter_misses,
  1885. TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
  1886. ASSERT_EQ(expected_index_misses,
  1887. TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
  1888. ASSERT_EQ(expected_compression_dict_misses,
  1889. TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
  1890. }
  1891. #endif
  1892. INSTANTIATE_TEST_CASE_P(
  1893. DBBlockCachePinningTest, DBBlockCachePinningTest,
  1894. ::testing::Combine(
  1895. ::testing::Bool(),
  1896. ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
  1897. PinningTier::kAll),
  1898. ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
  1899. PinningTier::kAll),
  1900. ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
  1901. PinningTier::kAll)));
  1902. } // namespace ROCKSDB_NAMESPACE
  1903. int main(int argc, char** argv) {
  1904. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1905. ::testing::InitGoogleTest(&argc, argv);
  1906. return RUN_ALL_TESTS();
  1907. }