block_based_table_reader_test.cc 82 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #include "table/block_based/block_based_table_reader.h"
  6. #include <cmath>
  7. #include <memory>
  8. #include <string>
  9. #include "cache/cache_reservation_manager.h"
  10. #include "db/db_test_util.h"
  11. #include "db/table_properties_collector.h"
  12. #include "file/file_util.h"
  13. #include "options/options_helper.h"
  14. #include "port/port.h"
  15. #include "port/stack_trace.h"
  16. #include "rocksdb/compression_type.h"
  17. #include "rocksdb/db.h"
  18. #include "rocksdb/file_system.h"
  19. #include "rocksdb/options.h"
  20. #include "table/block_based/block_based_table_builder.h"
  21. #include "table/block_based/block_based_table_factory.h"
  22. #include "table/block_based/block_based_table_iterator.h"
  23. #include "table/block_based/partitioned_index_iterator.h"
  24. #include "table/format.h"
  25. #include "test_util/testharness.h"
  26. #include "test_util/testutil.h"
  27. #include "util/random.h"
  28. // Enable io_uring support for this test
  29. extern "C" bool RocksDbIOUringEnable() { return true; }
  30. namespace ROCKSDB_NAMESPACE {
  31. class BlockBasedTableReaderBaseTest : public testing::Test {
  32. public:
  33. static constexpr int kBytesPerEntry = 256;
  34. // 16 = (default block size) 4 * 1024 / kBytesPerEntry
  35. static constexpr int kEntriesPerBlock = 16;
  36. protected:
  37. // Prepare key-value pairs to occupy multiple blocks.
  38. // Each (key, value) pair is `kBytesPerEntry` byte, every kEntriesPerBlock
  39. // pairs constitute 1 block.
  40. // If mixed_with_human_readable_string_value == true,
  41. // then adjacent blocks contain values with different compression
  42. // complexity: human readable strings are easier to compress than random
  43. // strings. key is an internal key.
  44. // When ts_sz > 0 and `same_key_diff_ts` is true, this
  45. // function generate keys with the same user provided key, with different
  46. // user defined timestamps and different sequence number to differentiate them
  47. static std::vector<std::pair<std::string, std::string>> GenerateKVMap(
  48. int num_block = 2, bool mixed_with_human_readable_string_value = false,
  49. size_t ts_sz = 0, bool same_key_diff_ts = false,
  50. const Comparator* comparator = BytewiseComparator()) {
  51. std::vector<std::pair<std::string, std::string>> kv;
  52. SequenceNumber seq_no = 0;
  53. uint64_t current_udt = 0;
  54. if (same_key_diff_ts) {
  55. // These numbers are based on the number of keys to create + an arbitrary
  56. // buffer number (100) to avoid overflow.
  57. current_udt = kEntriesPerBlock * num_block + 100;
  58. seq_no = kEntriesPerBlock * num_block + 100;
  59. }
  60. Random rnd(101);
  61. uint32_t key = 0;
  62. // To make each (key, value) pair occupy exactly kBytesPerEntry bytes.
  63. int value_size = kBytesPerEntry - (8 + static_cast<int>(ts_sz) +
  64. static_cast<int>(kNumInternalBytes));
  65. for (int block = 0; block < num_block; block++) {
  66. for (int i = 0; i < kEntriesPerBlock; i++) {
  67. char k[9] = {0};
  68. // Internal key is constructed directly from this key,
  69. // and internal key size is required to be >= 8 bytes,
  70. // so use %08u as the format string.
  71. snprintf(k, sizeof(k), "%08u", key);
  72. std::string v;
  73. if (mixed_with_human_readable_string_value) {
  74. v = (block % 2) ? rnd.HumanReadableString(value_size)
  75. : rnd.RandomString(value_size);
  76. } else {
  77. v = rnd.RandomString(value_size);
  78. }
  79. std::string user_key = std::string(k);
  80. if (ts_sz > 0) {
  81. if (same_key_diff_ts) {
  82. PutFixed64(&user_key, current_udt);
  83. current_udt -= 1;
  84. } else {
  85. PutFixed64(&user_key, 0);
  86. }
  87. }
  88. InternalKey internal_key(user_key, seq_no, ValueType::kTypeValue);
  89. kv.emplace_back(internal_key.Encode().ToString(), v);
  90. if (same_key_diff_ts) {
  91. seq_no -= 1;
  92. } else {
  93. key++;
  94. }
  95. }
  96. }
  97. auto comparator_name = std::string(comparator->Name());
  98. if (comparator_name.find("Reverse") != std::string::npos) {
  99. std::reverse(kv.begin(), kv.end());
  100. }
  101. return kv;
  102. }
  103. void SetUp() override {
  104. SetupSyncPointsToMockDirectIO();
  105. test_dir_ = test::PerThreadDBPath("block_based_table_reader_test");
  106. env_ = Env::Default();
  107. fs_ = FileSystem::Default();
  108. ASSERT_OK(fs_->CreateDir(test_dir_, IOOptions(), nullptr));
  109. ConfigureTableFactory();
  110. }
  111. virtual void ConfigureTableFactory() = 0;
  112. void TearDown() override { EXPECT_OK(DestroyDir(env_, test_dir_)); }
  113. // Creates a table with the specificied key value pairs (kv).
  114. void CreateTable(const std::string& table_name,
  115. const ImmutableOptions& ioptions,
  116. const CompressionType& compression_type,
  117. const std::vector<std::pair<std::string, std::string>>& kv,
  118. uint32_t compression_parallel_threads = 1,
  119. uint32_t compression_dict_bytes = 0) {
  120. std::unique_ptr<WritableFileWriter> writer;
  121. NewFileWriter(table_name, &writer);
  122. InternalKeyComparator comparator(ioptions.user_comparator);
  123. ColumnFamilyOptions cf_options;
  124. cf_options.comparator = ioptions.user_comparator;
  125. cf_options.prefix_extractor = options_.prefix_extractor;
  126. MutableCFOptions moptions(cf_options);
  127. CompressionOptions compression_opts;
  128. compression_opts.parallel_threads = compression_parallel_threads;
  129. // Enable compression dictionary and set a buffering limit that is the same
  130. // as each block's size.
  131. compression_opts.max_dict_bytes = compression_dict_bytes;
  132. compression_opts.max_dict_buffer_bytes = compression_dict_bytes;
  133. InternalTblPropCollFactories factories;
  134. const ReadOptions read_options;
  135. const WriteOptions write_options;
  136. std::unique_ptr<TableBuilder> table_builder(
  137. options_.table_factory->NewTableBuilder(
  138. TableBuilderOptions(ioptions, moptions, read_options, write_options,
  139. comparator, &factories, compression_type,
  140. compression_opts, 0 /* column_family_id */,
  141. kDefaultColumnFamilyName, -1 /* level */,
  142. kUnknownNewestKeyTime),
  143. writer.get()));
  144. // Build table.
  145. for (auto it = kv.begin(); it != kv.end(); it++) {
  146. std::string v = it->second;
  147. table_builder->Add(it->first, v);
  148. }
  149. ASSERT_OK(table_builder->Finish());
  150. }
  151. void NewBlockBasedTableReader(const FileOptions& foptions,
  152. const ImmutableOptions& ioptions,
  153. const InternalKeyComparator& comparator,
  154. const std::string& table_name,
  155. std::unique_ptr<BlockBasedTable>* table,
  156. bool prefetch_index_and_filter_in_cache = true,
  157. Status* status = nullptr,
  158. bool user_defined_timestamps_persisted = true) {
  159. const MutableCFOptions moptions(options_);
  160. TableReaderOptions table_reader_options = TableReaderOptions(
  161. ioptions, moptions.prefix_extractor, moptions.compression_manager.get(),
  162. foptions, comparator, 0 /* block_protection_bytes_per_key */,
  163. false /* _skip_filters */, false /* _immortal */,
  164. false /* _force_direct_prefetch */, -1 /* _level */,
  165. nullptr /* _block_cache_tracer */,
  166. 0 /* _max_file_size_for_l0_meta_pin */, "" /* _cur_db_session_id */,
  167. table_num_++ /* _cur_file_num */, {} /* _unique_id */,
  168. 0 /* _largest_seqno */, 0 /* _tail_size */,
  169. user_defined_timestamps_persisted);
  170. std::unique_ptr<RandomAccessFileReader> file;
  171. NewFileReader(table_name, foptions, &file, ioptions.statistics.get());
  172. uint64_t file_size = 0;
  173. ASSERT_OK(env_->GetFileSize(Path(table_name), &file_size));
  174. ReadOptions read_opts;
  175. read_opts.verify_checksums = true;
  176. std::unique_ptr<TableReader> general_table;
  177. Status s = options_.table_factory->NewTableReader(
  178. read_opts, table_reader_options, std::move(file), file_size,
  179. &general_table, prefetch_index_and_filter_in_cache);
  180. if (s.ok()) {
  181. table->reset(static_cast<BlockBasedTable*>(general_table.release()));
  182. }
  183. if (status) {
  184. *status = s;
  185. } else {
  186. ASSERT_OK(s);
  187. }
  188. }
  189. std::string Path(const std::string& fname) { return test_dir_ + "/" + fname; }
  190. std::string test_dir_;
  191. Env* env_;
  192. std::shared_ptr<FileSystem> fs_;
  193. Options options_;
  194. uint64_t table_num_{0};
  195. private:
  196. void WriteToFile(const std::string& content, const std::string& filename) {
  197. std::unique_ptr<FSWritableFile> f;
  198. ASSERT_OK(fs_->NewWritableFile(Path(filename), FileOptions(), &f, nullptr));
  199. ASSERT_OK(f->Append(content, IOOptions(), nullptr));
  200. ASSERT_OK(f->Close(IOOptions(), nullptr));
  201. }
  202. void NewFileWriter(const std::string& filename,
  203. std::unique_ptr<WritableFileWriter>* writer) {
  204. std::string path = Path(filename);
  205. EnvOptions env_options;
  206. FileOptions foptions;
  207. std::unique_ptr<FSWritableFile> file;
  208. ASSERT_OK(fs_->NewWritableFile(path, foptions, &file, nullptr));
  209. writer->reset(new WritableFileWriter(std::move(file), path, env_options));
  210. }
  211. void NewFileReader(const std::string& filename, const FileOptions& opt,
  212. std::unique_ptr<RandomAccessFileReader>* reader,
  213. Statistics* stats = nullptr) {
  214. std::string path = Path(filename);
  215. std::unique_ptr<FSRandomAccessFile> f;
  216. ASSERT_OK(fs_->NewRandomAccessFile(path, opt, &f, nullptr));
  217. reader->reset(new RandomAccessFileReader(std::move(f), path,
  218. env_->GetSystemClock().get(),
  219. /*io_tracer=*/nullptr,
  220. /*stats=*/stats));
  221. }
  222. };
  223. struct BlockBasedTableReaderTestParam {
  224. BlockBasedTableReaderTestParam(
  225. CompressionType _compression_type, bool _use_direct_reads,
  226. BlockBasedTableOptions::IndexType _index_type, bool _no_block_cache,
  227. test::UserDefinedTimestampTestMode _udt_test_mode,
  228. uint32_t _compression_parallel_threads, uint32_t _compression_dict_bytes,
  229. bool _same_key_diff_ts, const Comparator* _comparator, bool _fill_cache,
  230. bool _use_async_io, bool _block_align, size_t _super_block_alignment_size,
  231. size_t _super_block_alignment_space_overhead_ratio)
  232. : compression_type(_compression_type),
  233. use_direct_reads(_use_direct_reads),
  234. index_type(_index_type),
  235. no_block_cache(_no_block_cache),
  236. udt_test_mode(_udt_test_mode),
  237. compression_parallel_threads(_compression_parallel_threads),
  238. compression_dict_bytes(_compression_dict_bytes),
  239. same_key_diff_ts(_same_key_diff_ts),
  240. comparator(_comparator),
  241. fill_cache(_fill_cache),
  242. use_async_io(_use_async_io),
  243. block_align(_block_align),
  244. super_block_alignment_size(_super_block_alignment_size),
  245. super_block_alignment_space_overhead_ratio(
  246. _super_block_alignment_space_overhead_ratio) {}
  247. CompressionType compression_type;
  248. bool use_direct_reads;
  249. BlockBasedTableOptions::IndexType index_type;
  250. bool no_block_cache;
  251. test::UserDefinedTimestampTestMode udt_test_mode;
  252. uint32_t compression_parallel_threads;
  253. uint32_t compression_dict_bytes;
  254. bool same_key_diff_ts;
  255. const Comparator* comparator;
  256. bool fill_cache;
  257. bool use_async_io;
  258. bool block_align;
  259. size_t super_block_alignment_size;
  260. size_t super_block_alignment_space_overhead_ratio;
  261. };
  262. // Define operator<< for SpotLockManagerTestParam to stop valgrind from
  263. // complaining uinitialized value when printing SpotLockManagerTestParam.
  264. std::ostream& operator<<(std::ostream& os,
  265. const BlockBasedTableReaderTestParam& param) {
  266. os << "compression_type: " << CompressionTypeToString(param.compression_type)
  267. << " use_direct_reads: " << param.use_direct_reads
  268. << " index_type: " << static_cast<int>(param.index_type)
  269. << " no_block_cache: " << param.no_block_cache
  270. << " udt_test_mode: " << static_cast<int>(param.udt_test_mode)
  271. << " compression_parallel_threads: " << param.compression_parallel_threads
  272. << " compression_dict_bytes: " << param.compression_dict_bytes
  273. << " same_key_diff_ts: " << param.same_key_diff_ts
  274. << " comparator: " << param.comparator->Name()
  275. << " fill_cache: " << param.fill_cache
  276. << " use_async_io: " << param.use_async_io
  277. << " block_align: " << param.block_align
  278. << " super_block_alignment_size: " << param.super_block_alignment_size
  279. << " super_block_alignment_space_overhead_ratio: "
  280. << param.super_block_alignment_space_overhead_ratio;
  281. return os;
  282. }
  283. // Param 1: compression type
  284. // Param 2: whether to use direct reads
  285. // Param 3: Block Based Table Index type
  286. // Param 4: BBTO no_block_cache option
  287. // Param 5: test mode for the user-defined timestamp feature
  288. // Param 6: number of parallel compression threads
  289. // Param 7: CompressionOptions.max_dict_bytes and
  290. // CompressionOptions.max_dict_buffer_bytes to enable/disable
  291. // compression dictionary.
  292. // Param 8: test mode to specify the pattern for generating key / value. When
  293. // true, generate keys with the same user provided key, different
  294. // user-defined timestamps (if udt enabled), different sequence
  295. // numbers. This test mode is used for testing `Get`. When false,
  296. // generate keys with different user provided key, same user-defined
  297. // timestamps (if udt enabled), same sequence number. This test mode is
  298. // used for testing `Get`, `MultiGet`, and `NewIterator`.
  299. // Param 9: test both the default comparator and a reverse comparator.
  300. class BlockBasedTableReaderTest
  301. : public BlockBasedTableReaderBaseTest,
  302. public testing::WithParamInterface<BlockBasedTableReaderTestParam> {
  303. protected:
  304. void SetUp() override {
  305. auto param = GetParam();
  306. compression_type_ = param.compression_type;
  307. use_direct_reads_ = param.use_direct_reads;
  308. test::UserDefinedTimestampTestMode udt_test_mode = param.udt_test_mode;
  309. udt_enabled_ = test::IsUDTEnabled(udt_test_mode);
  310. persist_udt_ = test::ShouldPersistUDT(udt_test_mode);
  311. compression_parallel_threads_ = param.compression_parallel_threads;
  312. compression_dict_bytes_ = param.compression_dict_bytes;
  313. same_key_diff_ts_ = param.same_key_diff_ts;
  314. comparator_ = param.comparator;
  315. BlockBasedTableReaderBaseTest::SetUp();
  316. }
  317. void ConfigureTableFactory() override {
  318. BlockBasedTableOptions opts;
  319. auto param = GetParam();
  320. opts.index_type = param.index_type;
  321. opts.no_block_cache = param.no_block_cache;
  322. opts.super_block_alignment_size = param.super_block_alignment_size;
  323. opts.super_block_alignment_space_overhead_ratio =
  324. param.super_block_alignment_space_overhead_ratio;
  325. opts.filter_policy.reset(NewBloomFilterPolicy(10, false));
  326. opts.partition_filters =
  327. opts.index_type ==
  328. BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
  329. opts.metadata_cache_options.partition_pinning = PinningTier::kAll;
  330. options_.table_factory.reset(
  331. static_cast<BlockBasedTableFactory*>(NewBlockBasedTableFactory(opts)));
  332. options_.prefix_extractor =
  333. std::shared_ptr<const SliceTransform>(NewFixedPrefixTransform(3));
  334. }
  335. CompressionType compression_type_;
  336. bool use_direct_reads_;
  337. bool udt_enabled_;
  338. bool persist_udt_;
  339. uint32_t compression_parallel_threads_;
  340. uint32_t compression_dict_bytes_;
  341. bool same_key_diff_ts_;
  342. const Comparator* comparator_{};
  343. };
  344. class BlockBasedTableReaderGetTest : public BlockBasedTableReaderTest {};
  345. TEST_P(BlockBasedTableReaderGetTest, Get) {
  346. Options options;
  347. if (udt_enabled_) {
  348. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  349. }
  350. options.persist_user_defined_timestamps = persist_udt_;
  351. size_t ts_sz = options.comparator->timestamp_size();
  352. std::vector<std::pair<std::string, std::string>> kv =
  353. BlockBasedTableReaderBaseTest::GenerateKVMap(
  354. 100 /* num_block */,
  355. true /* mixed_with_human_readable_string_value */, ts_sz,
  356. same_key_diff_ts_);
  357. std::string table_name = "BlockBasedTableReaderGetTest_Get" +
  358. CompressionTypeToString(compression_type_);
  359. ImmutableOptions ioptions(options);
  360. CreateTable(table_name, ioptions, compression_type_, kv,
  361. compression_parallel_threads_, compression_dict_bytes_);
  362. std::unique_ptr<BlockBasedTable> table;
  363. FileOptions foptions;
  364. foptions.use_direct_reads = use_direct_reads_;
  365. InternalKeyComparator comparator(options.comparator);
  366. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  367. true /* prefetch_index_and_filter_in_cache */,
  368. nullptr /* status */, persist_udt_);
  369. ReadOptions read_opts;
  370. ASSERT_OK(
  371. table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum));
  372. for (size_t i = 0; i < kv.size(); i += 1) {
  373. Slice key = kv[i].first;
  374. Slice lkey = key;
  375. std::string lookup_ikey;
  376. if (udt_enabled_ && !persist_udt_) {
  377. // When user-defined timestamps are collapsed to be the minimum timestamp,
  378. // we also read with the minimum timestamp to be able to retrieve each
  379. // value.
  380. ReplaceInternalKeyWithMinTimestamp(&lookup_ikey, key, ts_sz);
  381. lkey = lookup_ikey;
  382. }
  383. // Reading the first entry in a block caches the whole block.
  384. if (i % kEntriesPerBlock == 0) {
  385. ASSERT_FALSE(table->TEST_KeyInCache(read_opts, lkey.ToString()));
  386. } else {
  387. ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString()));
  388. }
  389. PinnableSlice value;
  390. GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
  391. GetContext::kNotFound, ExtractUserKey(key), &value,
  392. nullptr, nullptr, nullptr, nullptr,
  393. true /* do_merge */, nullptr, nullptr, nullptr,
  394. nullptr, nullptr, nullptr);
  395. ASSERT_OK(table->Get(read_opts, lkey, &get_context, nullptr));
  396. ASSERT_EQ(value.ToString(), kv[i].second);
  397. ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString()));
  398. }
  399. }
  400. // Tests MultiGet in both direct IO and non-direct IO mode.
  401. // The keys should be in cache after MultiGet.
  402. TEST_P(BlockBasedTableReaderTest, MultiGet) {
  403. Options options;
  404. ReadOptions read_opts;
  405. std::string dummy_ts(sizeof(uint64_t), '\0');
  406. Slice read_timestamp = dummy_ts;
  407. if (udt_enabled_) {
  408. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  409. read_opts.timestamp = &read_timestamp;
  410. }
  411. options.persist_user_defined_timestamps = persist_udt_;
  412. size_t ts_sz = options.comparator->timestamp_size();
  413. std::vector<std::pair<std::string, std::string>> kv =
  414. BlockBasedTableReaderBaseTest::GenerateKVMap(
  415. 100 /* num_block */,
  416. true /* mixed_with_human_readable_string_value */, ts_sz);
  417. // Prepare keys, values, and statuses for MultiGet.
  418. autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> keys;
  419. autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> keys_without_timestamps;
  420. autovector<PinnableSlice, MultiGetContext::MAX_BATCH_SIZE> values;
  421. autovector<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
  422. autovector<const std::string*, MultiGetContext::MAX_BATCH_SIZE>
  423. expected_values;
  424. {
  425. const int step =
  426. static_cast<int>(kv.size()) / MultiGetContext::MAX_BATCH_SIZE;
  427. auto it = kv.begin();
  428. for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE; i++) {
  429. keys.emplace_back(it->first);
  430. if (ts_sz > 0) {
  431. Slice ukey_without_ts =
  432. ExtractUserKeyAndStripTimestamp(it->first, ts_sz);
  433. keys_without_timestamps.push_back(ukey_without_ts);
  434. } else {
  435. keys_without_timestamps.emplace_back(ExtractUserKey(it->first));
  436. }
  437. values.emplace_back();
  438. statuses.emplace_back();
  439. expected_values.push_back(&(it->second));
  440. std::advance(it, step);
  441. }
  442. }
  443. std::string table_name = "BlockBasedTableReaderTest_MultiGet" +
  444. CompressionTypeToString(compression_type_);
  445. ImmutableOptions ioptions(options);
  446. CreateTable(table_name, ioptions, compression_type_, kv,
  447. compression_parallel_threads_, compression_dict_bytes_);
  448. std::unique_ptr<BlockBasedTable> table;
  449. FileOptions foptions;
  450. foptions.use_direct_reads = use_direct_reads_;
  451. InternalKeyComparator comparator(options.comparator);
  452. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  453. true /* bool prefetch_index_and_filter_in_cache */,
  454. nullptr /* status */, persist_udt_);
  455. ASSERT_OK(
  456. table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum));
  457. // Ensure that keys are not in cache before MultiGet.
  458. for (auto& key : keys) {
  459. ASSERT_FALSE(table->TEST_KeyInCache(read_opts, key.ToString()));
  460. }
  461. // Prepare MultiGetContext.
  462. autovector<GetContext, MultiGetContext::MAX_BATCH_SIZE> get_context;
  463. autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
  464. autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
  465. for (size_t i = 0; i < keys.size(); ++i) {
  466. get_context.emplace_back(options.comparator, nullptr, nullptr, nullptr,
  467. GetContext::kNotFound, ExtractUserKey(keys[i]),
  468. &values[i], nullptr, nullptr, nullptr, nullptr,
  469. true /* do_merge */, nullptr, nullptr, nullptr,
  470. nullptr, nullptr, nullptr);
  471. key_context.emplace_back(nullptr, keys_without_timestamps[i], &values[i],
  472. nullptr, nullptr, &statuses.back());
  473. key_context.back().get_context = &get_context.back();
  474. }
  475. for (auto& key_ctx : key_context) {
  476. sorted_keys.emplace_back(&key_ctx);
  477. }
  478. MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, read_opts,
  479. fs_.get(), nullptr);
  480. // Execute MultiGet.
  481. MultiGetContext::Range range = ctx.GetMultiGetRange();
  482. PerfContext* perf_ctx = get_perf_context();
  483. perf_ctx->Reset();
  484. table->MultiGet(read_opts, &range, nullptr);
  485. ASSERT_GE(perf_ctx->block_read_count - perf_ctx->index_block_read_count -
  486. perf_ctx->filter_block_read_count -
  487. perf_ctx->compression_dict_block_read_count,
  488. 1);
  489. ASSERT_GE(perf_ctx->block_read_byte, 1);
  490. for (const Status& status : statuses) {
  491. ASSERT_OK(status);
  492. }
  493. // Check that keys are in cache after MultiGet.
  494. for (size_t i = 0; i < keys.size(); i++) {
  495. ASSERT_TRUE(table->TEST_KeyInCache(read_opts, keys[i]));
  496. ASSERT_EQ(values[i].ToString(), *expected_values[i]);
  497. }
  498. }
  499. TEST_P(BlockBasedTableReaderTest, NewIterator) {
  500. Options options;
  501. ReadOptions read_opts;
  502. std::string dummy_ts(sizeof(uint64_t), '\0');
  503. Slice read_timestamp = dummy_ts;
  504. if (udt_enabled_) {
  505. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  506. read_opts.timestamp = &read_timestamp;
  507. }
  508. options.persist_user_defined_timestamps = persist_udt_;
  509. size_t ts_sz = options.comparator->timestamp_size();
  510. std::vector<std::pair<std::string, std::string>> kv =
  511. BlockBasedTableReaderBaseTest::GenerateKVMap(
  512. 100 /* num_block */,
  513. true /* mixed_with_human_readable_string_value */, ts_sz);
  514. std::string table_name = "BlockBasedTableReaderTest_NewIterator" +
  515. CompressionTypeToString(compression_type_);
  516. ImmutableOptions ioptions(options);
  517. CreateTable(table_name, ioptions, compression_type_, kv,
  518. compression_parallel_threads_, compression_dict_bytes_);
  519. std::unique_ptr<BlockBasedTable> table;
  520. FileOptions foptions;
  521. foptions.use_direct_reads = use_direct_reads_;
  522. InternalKeyComparator comparator(options.comparator);
  523. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  524. true /* bool prefetch_index_and_filter_in_cache */,
  525. nullptr /* status */, persist_udt_);
  526. ASSERT_OK(
  527. table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum));
  528. std::unique_ptr<InternalIterator> iter;
  529. iter.reset(table->NewIterator(
  530. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  531. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  532. // Test forward scan.
  533. ASSERT_TRUE(!iter->Valid());
  534. iter->SeekToFirst();
  535. ASSERT_OK(iter->status());
  536. for (auto kv_iter = kv.begin(); kv_iter != kv.end(); kv_iter++) {
  537. ASSERT_EQ(iter->key().ToString(), kv_iter->first);
  538. ASSERT_EQ(iter->value().ToString(), kv_iter->second);
  539. iter->Next();
  540. ASSERT_OK(iter->status());
  541. }
  542. ASSERT_TRUE(!iter->Valid());
  543. ASSERT_OK(iter->status());
  544. // Test backward scan.
  545. iter->SeekToLast();
  546. ASSERT_OK(iter->status());
  547. for (auto kv_iter = kv.rbegin(); kv_iter != kv.rend(); kv_iter++) {
  548. ASSERT_EQ(iter->key().ToString(), kv_iter->first);
  549. ASSERT_EQ(iter->value().ToString(), kv_iter->second);
  550. iter->Prev();
  551. ASSERT_OK(iter->status());
  552. }
  553. ASSERT_TRUE(!iter->Valid());
  554. ASSERT_OK(iter->status());
  555. }
  556. class ChargeTableReaderTest
  557. : public BlockBasedTableReaderBaseTest,
  558. public testing::WithParamInterface<
  559. CacheEntryRoleOptions::Decision /* charge_table_reader_mem */> {
  560. protected:
  561. static std::size_t CalculateMaxTableReaderNumBeforeCacheFull(
  562. std::size_t cache_capacity, std::size_t approx_table_reader_mem) {
  563. // To make calculation easier for testing
  564. assert(cache_capacity % CacheReservationManagerImpl<
  565. CacheEntryRole::kBlockBasedTableReader>::
  566. GetDummyEntrySize() ==
  567. 0 &&
  568. cache_capacity >= 2 * CacheReservationManagerImpl<
  569. CacheEntryRole::kBlockBasedTableReader>::
  570. GetDummyEntrySize());
  571. // We need to subtract 1 for max_num_dummy_entry to account for dummy
  572. // entries' overhead, assumed the overhead is no greater than 1 dummy entry
  573. // size
  574. std::size_t max_num_dummy_entry =
  575. (size_t)std::floor((
  576. 1.0 * cache_capacity /
  577. CacheReservationManagerImpl<
  578. CacheEntryRole::kBlockBasedTableReader>::GetDummyEntrySize())) -
  579. 1;
  580. std::size_t cache_capacity_rounded_to_dummy_entry_multiples =
  581. max_num_dummy_entry *
  582. CacheReservationManagerImpl<
  583. CacheEntryRole::kBlockBasedTableReader>::GetDummyEntrySize();
  584. std::size_t max_table_reader_num_capped = static_cast<std::size_t>(
  585. std::floor(1.0 * cache_capacity_rounded_to_dummy_entry_multiples /
  586. approx_table_reader_mem));
  587. return max_table_reader_num_capped;
  588. }
  589. void SetUp() override {
  590. // To cache and re-use the same kv map and compression type in the test
  591. // suite for elimiating variance caused by these two factors
  592. kv_ = BlockBasedTableReaderBaseTest::GenerateKVMap();
  593. compression_type_ = CompressionType::kNoCompression;
  594. table_reader_charge_tracking_cache_ = std::make_shared<
  595. TargetCacheChargeTrackingCache<
  596. CacheEntryRole::kBlockBasedTableReader>>((NewLRUCache(
  597. 4 * CacheReservationManagerImpl<
  598. CacheEntryRole::kBlockBasedTableReader>::GetDummyEntrySize(),
  599. 0 /* num_shard_bits */, true /* strict_capacity_limit */)));
  600. // To ApproximateTableReaderMem() without being affected by
  601. // the feature of charging its memory, we turn off the feature
  602. charge_table_reader_ = CacheEntryRoleOptions::Decision::kDisabled;
  603. BlockBasedTableReaderBaseTest::SetUp();
  604. approx_table_reader_mem_ = ApproximateTableReaderMem();
  605. // Now we condtionally turn on the feature to test
  606. charge_table_reader_ = GetParam();
  607. ConfigureTableFactory();
  608. }
  609. void ConfigureTableFactory() override {
  610. BlockBasedTableOptions table_options;
  611. table_options.cache_usage_options.options_overrides.insert(
  612. {CacheEntryRole::kBlockBasedTableReader,
  613. {/*.charged = */ charge_table_reader_}});
  614. table_options.block_cache = table_reader_charge_tracking_cache_;
  615. table_options.cache_index_and_filter_blocks = false;
  616. table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  617. table_options.partition_filters = true;
  618. table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
  619. options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
  620. }
  621. CacheEntryRoleOptions::Decision charge_table_reader_;
  622. std::shared_ptr<
  623. TargetCacheChargeTrackingCache<CacheEntryRole::kBlockBasedTableReader>>
  624. table_reader_charge_tracking_cache_;
  625. std::size_t approx_table_reader_mem_;
  626. std::vector<std::pair<std::string, std::string>> kv_;
  627. CompressionType compression_type_;
  628. private:
  629. std::size_t ApproximateTableReaderMem() {
  630. std::size_t approx_table_reader_mem = 0;
  631. std::string table_name = "table_for_approx_table_reader_mem";
  632. ImmutableOptions ioptions(options_);
  633. CreateTable(table_name, ioptions, compression_type_, kv_);
  634. std::unique_ptr<BlockBasedTable> table;
  635. Status s;
  636. NewBlockBasedTableReader(
  637. FileOptions(), ImmutableOptions(options_),
  638. InternalKeyComparator(options_.comparator), table_name, &table,
  639. false /* prefetch_index_and_filter_in_cache */, &s);
  640. assert(s.ok());
  641. approx_table_reader_mem = table->ApproximateMemoryUsage();
  642. assert(approx_table_reader_mem > 0);
  643. return approx_table_reader_mem;
  644. }
  645. };
  646. INSTANTIATE_TEST_CASE_P(
  647. ChargeTableReaderTest, ChargeTableReaderTest,
  648. ::testing::Values(CacheEntryRoleOptions::Decision::kEnabled,
  649. CacheEntryRoleOptions::Decision::kDisabled));
  650. TEST_P(ChargeTableReaderTest, Basic) {
  651. const std::size_t max_table_reader_num_capped =
  652. ChargeTableReaderTest::CalculateMaxTableReaderNumBeforeCacheFull(
  653. table_reader_charge_tracking_cache_->GetCapacity(),
  654. approx_table_reader_mem_);
  655. // Acceptable estimtation errors coming from
  656. // 1. overstimate max_table_reader_num_capped due to # dummy entries is high
  657. // and results in metadata charge overhead greater than 1 dummy entry size
  658. // (violating our assumption in calculating max_table_reader_num_capped)
  659. // 2. overestimate/underestimate max_table_reader_num_capped due to the gap
  660. // between ApproximateTableReaderMem() and actual table reader mem
  661. std::size_t max_table_reader_num_capped_upper_bound =
  662. (std::size_t)(max_table_reader_num_capped * 1.05);
  663. std::size_t max_table_reader_num_capped_lower_bound =
  664. (std::size_t)(max_table_reader_num_capped * 0.95);
  665. std::size_t max_table_reader_num_uncapped =
  666. (std::size_t)(max_table_reader_num_capped * 1.1);
  667. ASSERT_GT(max_table_reader_num_uncapped,
  668. max_table_reader_num_capped_upper_bound)
  669. << "We need `max_table_reader_num_uncapped` > "
  670. "`max_table_reader_num_capped_upper_bound` to differentiate cases "
  671. "between "
  672. "charge_table_reader_ == kDisabled and == kEnabled)";
  673. Status s = Status::OK();
  674. std::size_t opened_table_reader_num = 0;
  675. std::string table_name;
  676. std::vector<std::unique_ptr<BlockBasedTable>> tables;
  677. ImmutableOptions ioptions(options_);
  678. // Keep creating BlockBasedTableReader till hiting the memory limit based on
  679. // cache capacity and creation fails (when charge_table_reader_ ==
  680. // kEnabled) or reaching a specfied big number of table readers (when
  681. // charge_table_reader_ == kDisabled)
  682. while (s.ok() && opened_table_reader_num < max_table_reader_num_uncapped) {
  683. table_name = "table_" + std::to_string(opened_table_reader_num);
  684. CreateTable(table_name, ioptions, compression_type_, kv_);
  685. tables.push_back(std::unique_ptr<BlockBasedTable>());
  686. NewBlockBasedTableReader(
  687. FileOptions(), ImmutableOptions(options_),
  688. InternalKeyComparator(options_.comparator), table_name, &tables.back(),
  689. false /* prefetch_index_and_filter_in_cache */, &s);
  690. if (s.ok()) {
  691. ++opened_table_reader_num;
  692. }
  693. }
  694. if (charge_table_reader_ == CacheEntryRoleOptions::Decision::kEnabled) {
  695. EXPECT_TRUE(s.IsMemoryLimit()) << "s: " << s.ToString();
  696. EXPECT_TRUE(s.ToString().find(
  697. kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
  698. CacheEntryRole::kBlockBasedTableReader)]) !=
  699. std::string::npos);
  700. EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") !=
  701. std::string::npos);
  702. EXPECT_GE(opened_table_reader_num, max_table_reader_num_capped_lower_bound);
  703. EXPECT_LE(opened_table_reader_num, max_table_reader_num_capped_upper_bound);
  704. std::size_t updated_max_table_reader_num_capped =
  705. ChargeTableReaderTest::CalculateMaxTableReaderNumBeforeCacheFull(
  706. table_reader_charge_tracking_cache_->GetCapacity() / 2,
  707. approx_table_reader_mem_);
  708. // Keep deleting BlockBasedTableReader to lower down memory usage from the
  709. // memory limit to make the next creation succeeds
  710. while (opened_table_reader_num >= updated_max_table_reader_num_capped) {
  711. tables.pop_back();
  712. --opened_table_reader_num;
  713. }
  714. table_name = "table_for_successful_table_reader_open";
  715. CreateTable(table_name, ioptions, compression_type_, kv_);
  716. tables.push_back(std::unique_ptr<BlockBasedTable>());
  717. NewBlockBasedTableReader(
  718. FileOptions(), ImmutableOptions(options_),
  719. InternalKeyComparator(options_.comparator), table_name, &tables.back(),
  720. false /* prefetch_index_and_filter_in_cache */, &s);
  721. EXPECT_TRUE(s.ok()) << s.ToString();
  722. tables.clear();
  723. EXPECT_EQ(table_reader_charge_tracking_cache_->GetCacheCharge(), 0);
  724. } else {
  725. EXPECT_TRUE(s.ok() &&
  726. opened_table_reader_num == max_table_reader_num_uncapped)
  727. << "s: " << s.ToString() << " opened_table_reader_num: "
  728. << std::to_string(opened_table_reader_num);
  729. EXPECT_EQ(table_reader_charge_tracking_cache_->GetCacheCharge(), 0);
  730. }
  731. }
  732. class StrictCapacityLimitReaderTest : public BlockBasedTableReaderTest {
  733. public:
  734. StrictCapacityLimitReaderTest() : BlockBasedTableReaderTest() {}
  735. protected:
  736. void ConfigureTableFactory() override {
  737. BlockBasedTableOptions table_options;
  738. table_options.block_cache = std::make_shared<
  739. TargetCacheChargeTrackingCache<CacheEntryRole::kBlockBasedTableReader>>(
  740. (NewLRUCache(4 * 1024, 0 /* num_shard_bits */,
  741. true /* strict_capacity_limit */)));
  742. table_options.cache_index_and_filter_blocks = false;
  743. table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
  744. table_options.partition_filters = true;
  745. table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
  746. options_.table_factory.reset(NewBlockBasedTableFactory(table_options));
  747. }
  748. };
  749. TEST_P(StrictCapacityLimitReaderTest, Get) {
  750. // Test that we get error status when we exceed
  751. // the strict_capacity_limit
  752. Options options;
  753. size_t ts_sz = options.comparator->timestamp_size();
  754. std::vector<std::pair<std::string, std::string>> kv =
  755. BlockBasedTableReaderBaseTest::GenerateKVMap(
  756. 2 /* num_block */, true /* mixed_with_human_readable_string_value */,
  757. ts_sz, false);
  758. std::string table_name = "StrictCapacityLimitReaderTest_Get" +
  759. CompressionTypeToString(compression_type_);
  760. ImmutableOptions ioptions(options);
  761. CreateTable(table_name, ioptions, compression_type_, kv);
  762. std::unique_ptr<BlockBasedTable> table;
  763. FileOptions foptions;
  764. foptions.use_direct_reads = true;
  765. InternalKeyComparator comparator(options.comparator);
  766. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  767. true /* prefetch_index_and_filter_in_cache */,
  768. nullptr /* status */);
  769. ReadOptions read_opts;
  770. ASSERT_OK(
  771. table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum));
  772. bool hit_memory_limit = false;
  773. for (size_t i = 0; i < kv.size(); i += 1) {
  774. Slice key = kv[i].first;
  775. Slice lkey = key;
  776. std::string lookup_ikey;
  777. // Reading the first entry in a block caches the whole block.
  778. if (i % kEntriesPerBlock == 0) {
  779. ASSERT_FALSE(table->TEST_KeyInCache(read_opts, lkey.ToString()));
  780. } else if (!hit_memory_limit) {
  781. ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString()));
  782. }
  783. PinnableSlice value;
  784. GetContext get_context(options.comparator, nullptr, nullptr, nullptr,
  785. GetContext::kNotFound, ExtractUserKey(key), &value,
  786. nullptr, nullptr, nullptr, nullptr,
  787. true /* do_merge */, nullptr, nullptr, nullptr,
  788. nullptr, nullptr, nullptr);
  789. Status s = table->Get(read_opts, lkey, &get_context, nullptr);
  790. if (!s.ok()) {
  791. EXPECT_TRUE(s.IsMemoryLimit());
  792. EXPECT_TRUE(s.ToString().find("Memory limit reached: Insert failed due "
  793. "to LRU cache being full") !=
  794. std::string::npos);
  795. hit_memory_limit = true;
  796. } else {
  797. ASSERT_EQ(value.ToString(), kv[i].second);
  798. ASSERT_TRUE(table->TEST_KeyInCache(read_opts, lkey.ToString()));
  799. }
  800. }
  801. ASSERT_TRUE(hit_memory_limit);
  802. }
  803. TEST_P(StrictCapacityLimitReaderTest, MultiGet) {
  804. // Test that we get error status when we exceed
  805. // the strict_capacity_limit
  806. Options options;
  807. ReadOptions read_opts;
  808. std::string dummy_ts(sizeof(uint64_t), '\0');
  809. Slice read_timestamp = dummy_ts;
  810. if (udt_enabled_) {
  811. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  812. read_opts.timestamp = &read_timestamp;
  813. }
  814. options.persist_user_defined_timestamps = persist_udt_;
  815. size_t ts_sz = options.comparator->timestamp_size();
  816. std::vector<std::pair<std::string, std::string>> kv =
  817. BlockBasedTableReaderBaseTest::GenerateKVMap(
  818. 2 /* num_block */, true /* mixed_with_human_readable_string_value */,
  819. ts_sz);
  820. // Prepare keys, values, and statuses for MultiGet.
  821. autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> keys;
  822. autovector<Slice, MultiGetContext::MAX_BATCH_SIZE> keys_without_timestamps;
  823. autovector<PinnableSlice, MultiGetContext::MAX_BATCH_SIZE> values;
  824. autovector<Status, MultiGetContext::MAX_BATCH_SIZE> statuses;
  825. autovector<const std::string*, MultiGetContext::MAX_BATCH_SIZE>
  826. expected_values;
  827. {
  828. const int step =
  829. static_cast<int>(kv.size()) / MultiGetContext::MAX_BATCH_SIZE;
  830. auto it = kv.begin();
  831. for (int i = 0; i < MultiGetContext::MAX_BATCH_SIZE; i++) {
  832. keys.emplace_back(it->first);
  833. if (ts_sz > 0) {
  834. Slice ukey_without_ts =
  835. ExtractUserKeyAndStripTimestamp(it->first, ts_sz);
  836. keys_without_timestamps.push_back(ukey_without_ts);
  837. } else {
  838. keys_without_timestamps.emplace_back(ExtractUserKey(it->first));
  839. }
  840. values.emplace_back();
  841. statuses.emplace_back();
  842. expected_values.push_back(&(it->second));
  843. std::advance(it, step);
  844. }
  845. }
  846. std::string table_name = "StrictCapacityLimitReaderTest_MultiGet" +
  847. CompressionTypeToString(compression_type_);
  848. ImmutableOptions ioptions(options);
  849. CreateTable(table_name, ioptions, compression_type_, kv,
  850. compression_parallel_threads_, compression_dict_bytes_);
  851. std::unique_ptr<BlockBasedTable> table;
  852. FileOptions foptions;
  853. foptions.use_direct_reads = use_direct_reads_;
  854. InternalKeyComparator comparator(options.comparator);
  855. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  856. true /* bool prefetch_index_and_filter_in_cache */,
  857. nullptr /* status */, persist_udt_);
  858. ASSERT_OK(
  859. table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum));
  860. // Ensure that keys are not in cache before MultiGet.
  861. for (auto& key : keys) {
  862. ASSERT_FALSE(table->TEST_KeyInCache(read_opts, key.ToString()));
  863. }
  864. // Prepare MultiGetContext.
  865. autovector<GetContext, MultiGetContext::MAX_BATCH_SIZE> get_context;
  866. autovector<KeyContext, MultiGetContext::MAX_BATCH_SIZE> key_context;
  867. autovector<KeyContext*, MultiGetContext::MAX_BATCH_SIZE> sorted_keys;
  868. for (size_t i = 0; i < keys.size(); ++i) {
  869. get_context.emplace_back(options.comparator, nullptr, nullptr, nullptr,
  870. GetContext::kNotFound, ExtractUserKey(keys[i]),
  871. &values[i], nullptr, nullptr, nullptr, nullptr,
  872. true /* do_merge */, nullptr, nullptr, nullptr,
  873. nullptr, nullptr, nullptr);
  874. key_context.emplace_back(nullptr, keys_without_timestamps[i], &values[i],
  875. nullptr, nullptr, &statuses.back());
  876. key_context.back().get_context = &get_context.back();
  877. }
  878. for (auto& key_ctx : key_context) {
  879. sorted_keys.emplace_back(&key_ctx);
  880. }
  881. MultiGetContext ctx(&sorted_keys, 0, sorted_keys.size(), 0, read_opts,
  882. fs_.get(), nullptr);
  883. // Execute MultiGet.
  884. MultiGetContext::Range range = ctx.GetMultiGetRange();
  885. PerfContext* perf_ctx = get_perf_context();
  886. perf_ctx->Reset();
  887. table->MultiGet(read_opts, &range, nullptr);
  888. ASSERT_GE(perf_ctx->block_read_count - perf_ctx->index_block_read_count -
  889. perf_ctx->filter_block_read_count -
  890. perf_ctx->compression_dict_block_read_count,
  891. 1);
  892. ASSERT_GE(perf_ctx->block_read_byte, 1);
  893. bool hit_memory_limit = false;
  894. for (const Status& status : statuses) {
  895. if (!status.ok()) {
  896. EXPECT_TRUE(status.IsMemoryLimit());
  897. hit_memory_limit = true;
  898. }
  899. }
  900. ASSERT_TRUE(hit_memory_limit);
  901. }
  902. class BlockBasedTableReaderTestVerifyChecksum
  903. : public BlockBasedTableReaderTest {
  904. public:
  905. BlockBasedTableReaderTestVerifyChecksum() : BlockBasedTableReaderTest() {}
  906. };
  907. TEST_P(BlockBasedTableReaderTestVerifyChecksum, ChecksumMismatch) {
  908. Options options;
  909. ReadOptions read_opts;
  910. std::string dummy_ts(sizeof(uint64_t), '\0');
  911. Slice read_timestamp = dummy_ts;
  912. if (udt_enabled_) {
  913. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  914. read_opts.timestamp = &read_timestamp;
  915. }
  916. options.persist_user_defined_timestamps = persist_udt_;
  917. size_t ts_sz = options.comparator->timestamp_size();
  918. std::vector<std::pair<std::string, std::string>> kv =
  919. BlockBasedTableReaderBaseTest::GenerateKVMap(
  920. 800 /* num_block */,
  921. false /* mixed_with_human_readable_string_value=*/, ts_sz);
  922. options.statistics = CreateDBStatistics();
  923. ImmutableOptions ioptions(options);
  924. std::string table_name =
  925. "BlockBasedTableReaderTest" + CompressionTypeToString(compression_type_);
  926. CreateTable(table_name, ioptions, compression_type_, kv,
  927. compression_parallel_threads_, compression_dict_bytes_);
  928. std::unique_ptr<BlockBasedTable> table;
  929. FileOptions foptions;
  930. foptions.use_direct_reads = use_direct_reads_;
  931. InternalKeyComparator comparator(options.comparator);
  932. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  933. true /* bool prefetch_index_and_filter_in_cache */,
  934. nullptr /* status */, persist_udt_);
  935. // Use the top level iterator to find the offset/size of the first
  936. // 2nd level index block and corrupt the block
  937. IndexBlockIter iiter_on_stack;
  938. BlockCacheLookupContext context{TableReaderCaller::kUserVerifyChecksum};
  939. InternalIteratorBase<IndexValue>* iiter = table->NewIndexIterator(
  940. read_opts, /*need_upper_bound_check=*/false, &iiter_on_stack,
  941. /*get_context=*/nullptr, &context);
  942. std::unique_ptr<InternalIteratorBase<IndexValue>> iiter_unique_ptr;
  943. if (iiter != &iiter_on_stack) {
  944. iiter_unique_ptr = std::unique_ptr<InternalIteratorBase<IndexValue>>(iiter);
  945. }
  946. ASSERT_OK(iiter->status());
  947. iiter->SeekToFirst();
  948. BlockHandle handle = static_cast<PartitionedIndexIterator*>(iiter)
  949. ->index_iter_->value()
  950. .handle;
  951. table.reset();
  952. // Corrupt the block pointed to by handle
  953. ASSERT_OK(test::CorruptFile(options.env, Path(table_name),
  954. static_cast<int>(handle.offset()), 128));
  955. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  956. true /* bool prefetch_index_and_filter_in_cache */,
  957. nullptr /* status */, persist_udt_);
  958. ASSERT_EQ(0,
  959. options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
  960. Status s =
  961. table->VerifyChecksum(read_opts, TableReaderCaller::kUserVerifyChecksum);
  962. ASSERT_EQ(1,
  963. options.statistics->getTickerCount(BLOCK_CHECKSUM_MISMATCH_COUNT));
  964. ASSERT_EQ(s.code(), Status::kCorruption);
  965. }
  966. class BlockBasedTableReaderMultiScanTest : public BlockBasedTableReaderTest {
  967. public:
  968. void SetUp() override {
  969. BlockBasedTableReaderTest::SetUp();
  970. options_.comparator = comparator_;
  971. }
  972. };
  973. class BlockBasedTableReaderMultiScanAsyncIOTest
  974. : public BlockBasedTableReaderMultiScanTest {};
  975. // TODO: test no block cache case
  976. TEST_P(BlockBasedTableReaderMultiScanAsyncIOTest, MultiScanPrepare) {
  977. auto param = GetParam();
  978. auto fill_cache = param.fill_cache;
  979. auto use_async_io = param.use_async_io;
  980. options_.statistics = CreateDBStatistics();
  981. std::shared_ptr<FileSystem> fs = options_.env->GetFileSystem();
  982. ReadOptions read_opts;
  983. read_opts.fill_cache = fill_cache;
  984. size_t ts_sz = options_.comparator->timestamp_size();
  985. std::vector<std::pair<std::string, std::string>> kv =
  986. BlockBasedTableReaderBaseTest::GenerateKVMap(
  987. 100 /* num_block */,
  988. true /* mixed_with_human_readable_string_value */, ts_sz,
  989. same_key_diff_ts_, comparator_);
  990. std::string table_name = "BlockBasedTableReaderTest_NewIterator" +
  991. CompressionTypeToString(compression_type_) +
  992. "_async" + std::to_string(use_async_io);
  993. ImmutableOptions ioptions(options_);
  994. // Only insert 60 out of 100 blocks
  995. CreateTable(table_name, ioptions, compression_type_,
  996. std::vector<std::pair<std::string, std::string>>{
  997. kv.begin() + 20 * kEntriesPerBlock,
  998. kv.begin() + 80 * kEntriesPerBlock},
  999. compression_parallel_threads_, compression_dict_bytes_);
  1000. std::unique_ptr<BlockBasedTable> table;
  1001. FileOptions foptions;
  1002. foptions.use_direct_reads = use_direct_reads_;
  1003. InternalKeyComparator comparator(options_.comparator);
  1004. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  1005. true /* bool prefetch_index_and_filter_in_cache */,
  1006. nullptr /* status */, persist_udt_);
  1007. // 1. Should coalesce into a single I/O
  1008. std::unique_ptr<InternalIterator> iter;
  1009. iter.reset(table->NewIterator(
  1010. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1011. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1012. MultiScanArgs scan_options(comparator_);
  1013. scan_options.use_async_io = use_async_io;
  1014. scan_options.insert(ExtractUserKey(kv[30 * kEntriesPerBlock].first),
  1015. ExtractUserKey(kv[31 * kEntriesPerBlock].first));
  1016. scan_options.insert(ExtractUserKey(kv[32 * kEntriesPerBlock].first),
  1017. ExtractUserKey(kv[33 * kEntriesPerBlock].first));
  1018. auto read_count_before =
  1019. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1020. iter->Prepare(&scan_options);
  1021. iter->Seek(kv[30 * kEntriesPerBlock].first);
  1022. for (size_t i = 30 * kEntriesPerBlock; i <= 31 * kEntriesPerBlock; ++i) {
  1023. ASSERT_TRUE(iter->status().ok()) << iter->status().ToString();
  1024. ASSERT_TRUE(iter->Valid()) << i;
  1025. ASSERT_EQ(iter->key().ToString(), kv[i].first);
  1026. iter->Next();
  1027. }
  1028. // Iter may still be valid after scan range. Upper layer (DBIter) handles
  1029. // exact upper bound checking. So we don't check !iter->Valid() here.
  1030. ASSERT_OK(iter->status());
  1031. iter->Seek(kv[32 * kEntriesPerBlock].first);
  1032. for (size_t i = 32 * kEntriesPerBlock; i < 33 * kEntriesPerBlock; ++i) {
  1033. ASSERT_TRUE(iter->Valid());
  1034. ASSERT_EQ(iter->key().ToString(), kv[i].first);
  1035. iter->Next();
  1036. }
  1037. ASSERT_OK(iter->status());
  1038. auto read_count_after =
  1039. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1040. ASSERT_EQ(read_count_before + 1, read_count_after);
  1041. // 2. No IO coalesce, should do MultiRead/ReadAsync with 2 read requests.
  1042. iter.reset(table->NewIterator(
  1043. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1044. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1045. scan_options = MultiScanArgs(comparator_);
  1046. scan_options.insert(ExtractUserKey(kv[40 * kEntriesPerBlock].first),
  1047. ExtractUserKey(kv[45 * kEntriesPerBlock].first));
  1048. scan_options.insert(ExtractUserKey(kv[70 * kEntriesPerBlock].first),
  1049. ExtractUserKey(kv[75 * kEntriesPerBlock].first));
  1050. read_count_before =
  1051. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1052. iter->Prepare(&scan_options);
  1053. iter->Seek(kv[40 * kEntriesPerBlock].first);
  1054. for (size_t i = 40 * kEntriesPerBlock; i < 45 * kEntriesPerBlock; ++i) {
  1055. ASSERT_TRUE(iter->Valid());
  1056. ASSERT_EQ(iter->key().ToString(), kv[i].first);
  1057. iter->Next();
  1058. }
  1059. ASSERT_OK(iter->status());
  1060. iter->Seek(kv[70 * kEntriesPerBlock].first);
  1061. for (size_t i = 70 * kEntriesPerBlock; i < 75 * kEntriesPerBlock; ++i) {
  1062. ASSERT_TRUE(iter->Valid());
  1063. ASSERT_EQ(iter->key().ToString(), kv[i].first);
  1064. iter->Next();
  1065. }
  1066. ASSERT_OK(iter->status());
  1067. read_count_after =
  1068. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1069. ASSERT_EQ(read_count_before + 2, read_count_after);
  1070. iter.reset(table->NewIterator(
  1071. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1072. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1073. // 3. Tests I/O excludes blocks already in cache.
  1074. // Reading blocks from 40-79
  1075. // From reads above, blocks 40-44 and 70-74 already in cache
  1076. // So we should read 45-69, 75-79 in two I/Os.
  1077. // If fill_cache is false, then we'll do one giant I/O.
  1078. scan_options = MultiScanArgs(comparator_);
  1079. scan_options.use_async_io = use_async_io;
  1080. scan_options.insert(ExtractUserKey(kv[40 * kEntriesPerBlock].first),
  1081. ExtractUserKey(kv[80 * kEntriesPerBlock].first));
  1082. read_count_before =
  1083. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1084. iter->Prepare(&scan_options);
  1085. read_count_after =
  1086. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1087. if (!use_async_io) {
  1088. if (!fill_cache) {
  1089. ASSERT_EQ(read_count_before + 1, read_count_after);
  1090. } else {
  1091. ASSERT_EQ(read_count_before + 2, read_count_after);
  1092. }
  1093. } else {
  1094. // stat is recorded in async callback which happens in Poll(), and
  1095. // Poll() happens during scanning.
  1096. ASSERT_EQ(read_count_before, read_count_after);
  1097. }
  1098. iter->Seek(kv[40 * kEntriesPerBlock].first);
  1099. for (size_t i = 40 * kEntriesPerBlock; i < 80 * kEntriesPerBlock; ++i) {
  1100. ASSERT_TRUE(iter->Valid());
  1101. ASSERT_EQ(iter->key().ToString(), kv[i].first);
  1102. iter->Next();
  1103. }
  1104. ASSERT_FALSE(iter->Valid());
  1105. ASSERT_OK(iter->status());
  1106. read_count_after =
  1107. options_.statistics->getTickerCount(NON_LAST_LEVEL_READ_COUNT);
  1108. if (!fill_cache) {
  1109. ASSERT_EQ(read_count_before + 1, read_count_after);
  1110. } else {
  1111. ASSERT_EQ(read_count_before + 2, read_count_after);
  1112. }
  1113. // 4. Check cases when Seek key does not match start key in ScanOptions
  1114. iter.reset(table->NewIterator(
  1115. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1116. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1117. scan_options = MultiScanArgs(comparator_);
  1118. scan_options.use_async_io = use_async_io;
  1119. scan_options.insert(ExtractUserKey(kv[30 * kEntriesPerBlock].first),
  1120. ExtractUserKey(kv[40 * kEntriesPerBlock].first));
  1121. scan_options.insert(ExtractUserKey(kv[50 * kEntriesPerBlock].first),
  1122. ExtractUserKey(kv[60 * kEntriesPerBlock].first));
  1123. iter->Prepare(&scan_options);
  1124. // Match start key
  1125. iter->Seek(kv[30 * kEntriesPerBlock].first);
  1126. for (size_t i = 30 * kEntriesPerBlock; i < 40 * kEntriesPerBlock; ++i) {
  1127. ASSERT_TRUE(iter->Valid());
  1128. ASSERT_EQ(iter->key().ToString(), kv[i].first);
  1129. iter->Next();
  1130. }
  1131. ASSERT_OK(iter->status());
  1132. // Seek a key that is larger than next start key is allowed, as long as it is
  1133. // larger than the previous key
  1134. iter->Seek(kv[50 * kEntriesPerBlock + 1].first);
  1135. ASSERT_OK(iter->status());
  1136. // Check seek key going backward
  1137. iter.reset(table->NewIterator(
  1138. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1139. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1140. scan_options = MultiScanArgs(comparator_);
  1141. scan_options.use_async_io = use_async_io;
  1142. scan_options.insert(ExtractUserKey(kv[30 * kEntriesPerBlock].first),
  1143. ExtractUserKey(kv[31 * kEntriesPerBlock].first));
  1144. scan_options.insert(ExtractUserKey(kv[32 * kEntriesPerBlock].first),
  1145. ExtractUserKey(kv[33 * kEntriesPerBlock].first));
  1146. iter->Prepare(&scan_options);
  1147. iter->Seek(kv[32 * kEntriesPerBlock].first);
  1148. auto key = iter->key();
  1149. ASSERT_OK(iter->status());
  1150. iter->Seek(kv[30 * kEntriesPerBlock].first);
  1151. // When seek key goes backward, it is adjusted to the last seeked position.
  1152. // Assert the key read is same as before.
  1153. ASSERT_EQ(key, iter->key());
  1154. ASSERT_OK(iter->status());
  1155. // Test prefetch limit reached.
  1156. iter.reset(table->NewIterator(
  1157. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1158. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1159. scan_options = MultiScanArgs(comparator_);
  1160. scan_options.use_async_io = use_async_io;
  1161. scan_options.max_prefetch_size = 1024; // less than block size
  1162. scan_options.insert(ExtractUserKey(kv[30 * kEntriesPerBlock].first),
  1163. ExtractUserKey(kv[40 * kEntriesPerBlock].first));
  1164. iter->Prepare(&scan_options);
  1165. iter->Seek(kv[31 * kEntriesPerBlock].first);
  1166. ASSERT_TRUE(iter->status().IsIncomplete());
  1167. // Randomly seek keys on the file, as long as the key is moving forward, it
  1168. // is allowed
  1169. if (use_async_io) {
  1170. // Skip following test when async io is enabled. There is some issue with
  1171. // IO_uring that I am still trying to root cause.
  1172. // TODO : enable the test again with async IO
  1173. return;
  1174. }
  1175. for (int i = 0; i < 100; i++) {
  1176. iter.reset(table->NewIterator(
  1177. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1178. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1179. scan_options = MultiScanArgs(comparator_);
  1180. scan_options.use_async_io = use_async_io;
  1181. scan_options.insert(ExtractUserKey(kv[5 * kEntriesPerBlock].first),
  1182. ExtractUserKey(kv[10 * kEntriesPerBlock].first));
  1183. scan_options.insert(ExtractUserKey(kv[25 * kEntriesPerBlock].first),
  1184. ExtractUserKey(kv[35 * kEntriesPerBlock].first));
  1185. scan_options.insert(ExtractUserKey(kv[35 * kEntriesPerBlock].first),
  1186. ExtractUserKey(kv[40 * kEntriesPerBlock].first));
  1187. scan_options.insert(ExtractUserKey(kv[45 * kEntriesPerBlock].first),
  1188. ExtractUserKey(kv[50 * kEntriesPerBlock].first));
  1189. scan_options.insert(ExtractUserKey(kv[75 * kEntriesPerBlock].first),
  1190. ExtractUserKey(kv[85 * kEntriesPerBlock].first));
  1191. scan_options.insert(ExtractUserKey(kv[85 * kEntriesPerBlock].first),
  1192. ExtractUserKey(kv[95 * kEntriesPerBlock].first));
  1193. iter->Prepare(&scan_options);
  1194. auto random_seed = static_cast<uint32_t>(
  1195. std::chrono::duration_cast<std::chrono::nanoseconds>(
  1196. std::chrono::system_clock::now().time_since_epoch())
  1197. .count());
  1198. Random rnd(random_seed);
  1199. std::cout << random_seed << std::endl;
  1200. SCOPED_TRACE("Random seed " + std::to_string(random_seed));
  1201. // Search key always start from the start key of first prepared range.
  1202. int last_read_key_index = rnd.Uniform(100) + 5 * kEntriesPerBlock;
  1203. while (last_read_key_index < 100 * kEntriesPerBlock) {
  1204. iter->Seek(kv[last_read_key_index].first);
  1205. EXPECT_OK(iter->status());
  1206. // iterate for a few keys
  1207. while (iter->Valid()) {
  1208. iter->Next();
  1209. last_read_key_index++;
  1210. EXPECT_OK(iter->status());
  1211. }
  1212. last_read_key_index += rnd.Uniform(100);
  1213. }
  1214. }
  1215. }
  1216. TEST_P(BlockBasedTableReaderMultiScanTest, MultiScanPrefetchSizeLimit) {
  1217. if (compression_type_ != kNoCompression) {
  1218. // This test relies on block sizes to be close to what's set in option.
  1219. ROCKSDB_GTEST_BYPASS("This test assumes no compression.");
  1220. return;
  1221. }
  1222. ReadOptions read_opts;
  1223. size_t ts_sz = options_.comparator->timestamp_size();
  1224. // Generate data that spans multiple blocks
  1225. std::vector<std::pair<std::string, std::string>> kv =
  1226. BlockBasedTableReaderBaseTest::GenerateKVMap(
  1227. 20 /* num_block */, true /* mixed_with_human_readable_string_value */,
  1228. ts_sz, same_key_diff_ts_, comparator_);
  1229. std::string table_name = "BlockBasedTableReaderTest_PrefetchSizeLimit" +
  1230. CompressionTypeToString(compression_type_);
  1231. ImmutableOptions ioptions(options_);
  1232. CreateTable(table_name, ioptions, compression_type_, kv,
  1233. compression_parallel_threads_, compression_dict_bytes_);
  1234. std::unique_ptr<BlockBasedTable> table;
  1235. FileOptions foptions;
  1236. foptions.use_direct_reads = use_direct_reads_;
  1237. InternalKeyComparator comparator(options_.comparator);
  1238. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  1239. true /* bool prefetch_index_and_filter_in_cache */,
  1240. nullptr /* status */, persist_udt_);
  1241. // Default block size is 4KB
  1242. //
  1243. // Tests when no block is loaded
  1244. {
  1245. std::unique_ptr<InternalIterator> iter;
  1246. iter.reset(table->NewIterator(
  1247. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1248. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1249. MultiScanArgs scan_options(comparator_);
  1250. scan_options.max_prefetch_size = 1024; // less than block size
  1251. scan_options.insert(ExtractUserKey(kv[0].first),
  1252. ExtractUserKey(kv[5].first));
  1253. iter->Prepare(&scan_options);
  1254. // Should be able to scan the first block, but not more
  1255. iter->Seek(kv[0].first);
  1256. ASSERT_FALSE(iter->Valid());
  1257. ASSERT_TRUE(iter->status().IsPrefetchLimitReached());
  1258. }
  1259. // Some blocks are loaded
  1260. {
  1261. std::unique_ptr<InternalIterator> iter;
  1262. iter.reset(table->NewIterator(
  1263. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1264. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1265. MultiScanArgs scan_options(comparator_);
  1266. scan_options.max_prefetch_size = 9 * 1024; // 9KB - 2 blocks with buffer
  1267. scan_options.insert(ExtractUserKey(kv[1 * kEntriesPerBlock].first),
  1268. ExtractUserKey(kv[8 * kEntriesPerBlock].first));
  1269. iter->Prepare(&scan_options);
  1270. iter->Seek(kv[1 * kEntriesPerBlock].first);
  1271. size_t scanned_keys = 0;
  1272. // Should be able to scan up to 2 blocks worth of data
  1273. while (iter->Valid()) {
  1274. ASSERT_EQ(iter->key().ToString(),
  1275. kv[scanned_keys + 1 * kEntriesPerBlock].first);
  1276. iter->Next();
  1277. scanned_keys++;
  1278. }
  1279. ASSERT_TRUE(iter->status().IsPrefetchLimitReached());
  1280. ASSERT_EQ(scanned_keys, 2 * kEntriesPerBlock);
  1281. }
  1282. // Tests with some block loaded in cache already:
  1283. // Blocks 1 and 2 are already in cache by the above test.
  1284. // Here we try blocks 0 - 5, with prefetch limit to 3 blocks, and expect to
  1285. // read 3 blocks.
  1286. {
  1287. std::unique_ptr<InternalIterator> iter;
  1288. iter.reset(table->NewIterator(
  1289. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1290. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1291. MultiScanArgs scan_options(comparator_);
  1292. scan_options.max_prefetch_size = 3 * 4 * 1024 + 1024; // 3 blocks + 1KB
  1293. scan_options.insert(ExtractUserKey(kv[0].first),
  1294. ExtractUserKey(kv[5 * kEntriesPerBlock].first));
  1295. iter->Prepare(&scan_options);
  1296. iter->Seek(kv[0].first);
  1297. size_t scanned_keys = 0;
  1298. // Should only read 3 blocks (blocks 0, 1, 2)
  1299. // already cached.
  1300. while (iter->Valid()) {
  1301. ASSERT_EQ(iter->key().ToString(), kv[scanned_keys].first);
  1302. iter->Next();
  1303. scanned_keys++;
  1304. }
  1305. ASSERT_TRUE(iter->status().IsPrefetchLimitReached());
  1306. ASSERT_EQ(scanned_keys, 3 * kEntriesPerBlock);
  1307. }
  1308. // Multiple scan ranges with prefetch limit
  1309. {
  1310. std::unique_ptr<InternalIterator> iter;
  1311. iter.reset(table->NewIterator(
  1312. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1313. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1314. MultiScanArgs scan_options(comparator_);
  1315. scan_options.max_prefetch_size = 5 * 4 * 1024 + 1024; // 5 blocks + 1KB
  1316. // Will read 5 entries from first scan range, and 4 blocks from the second
  1317. // scan range
  1318. scan_options.insert(ExtractUserKey(kv[0].first),
  1319. ExtractUserKey(kv[5].first));
  1320. scan_options.insert(ExtractUserKey(kv[12 * kEntriesPerBlock].first),
  1321. ExtractUserKey(kv[17 * kEntriesPerBlock].first));
  1322. scan_options.insert(ExtractUserKey(kv[18 * kEntriesPerBlock].first),
  1323. ExtractUserKey(kv[19 * kEntriesPerBlock].first));
  1324. iter->Prepare(&scan_options);
  1325. iter->Seek(kv[0].first);
  1326. size_t scanned_keys = 0;
  1327. size_t key_idx = 0;
  1328. while (iter->Valid()) {
  1329. ASSERT_EQ(iter->key().ToString(), kv[key_idx].first);
  1330. iter->Next();
  1331. scanned_keys++;
  1332. key_idx++;
  1333. if (key_idx == 5) {
  1334. iter->Seek(kv[12 * kEntriesPerBlock].first);
  1335. key_idx = 12 * kEntriesPerBlock;
  1336. }
  1337. }
  1338. ASSERT_EQ(scanned_keys, 5 + 4 * kEntriesPerBlock);
  1339. ASSERT_TRUE(iter->status().IsPrefetchLimitReached());
  1340. }
  1341. // Prefetch limit is big enough for all scan ranges.
  1342. {
  1343. std::unique_ptr<InternalIterator> iter;
  1344. iter.reset(table->NewIterator(
  1345. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1346. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1347. MultiScanArgs scan_options(comparator_);
  1348. scan_options.max_prefetch_size = 10 * 1024 * 1024; // 10MB
  1349. scan_options.insert(ExtractUserKey(kv[0].first),
  1350. ExtractUserKey(kv[5].first));
  1351. scan_options.insert(ExtractUserKey(kv[8 * kEntriesPerBlock].first),
  1352. ExtractUserKey(kv[12 * kEntriesPerBlock].first));
  1353. scan_options.insert(ExtractUserKey(kv[18 * kEntriesPerBlock].first),
  1354. ExtractUserKey(kv[19 * kEntriesPerBlock].first));
  1355. iter->Prepare(&scan_options);
  1356. iter->Seek(kv[0].first);
  1357. size_t scanned_keys = 0;
  1358. size_t key_idx = 0;
  1359. // Scan first range
  1360. while (iter->Valid() && key_idx < 5) {
  1361. ASSERT_EQ(iter->key().ToString(), kv[key_idx].first);
  1362. iter->Next();
  1363. scanned_keys++;
  1364. key_idx++;
  1365. }
  1366. // Move to second range
  1367. iter->Seek(kv[8 * kEntriesPerBlock].first);
  1368. key_idx = 8 * kEntriesPerBlock;
  1369. while (iter->Valid() && key_idx < 12 * kEntriesPerBlock) {
  1370. ASSERT_EQ(iter->key().ToString(), kv[key_idx].first);
  1371. iter->Next();
  1372. scanned_keys++;
  1373. key_idx++;
  1374. }
  1375. // Move to third range
  1376. iter->Seek(kv[18 * kEntriesPerBlock].first);
  1377. key_idx = 18 * kEntriesPerBlock;
  1378. while (iter->Valid() && key_idx < 19 * kEntriesPerBlock) {
  1379. ASSERT_EQ(iter->key().ToString(), kv[key_idx].first);
  1380. iter->Next();
  1381. scanned_keys++;
  1382. key_idx++;
  1383. }
  1384. // Should not hit prefetch limit
  1385. ASSERT_OK(iter->status());
  1386. ASSERT_EQ(scanned_keys, 5 + 4 * kEntriesPerBlock + 1 * kEntriesPerBlock);
  1387. }
  1388. }
  1389. TEST_P(BlockBasedTableReaderMultiScanTest, MultiScanUnpinPreviousBlocks) {
  1390. std::vector<std::pair<std::string, std::string>> kv =
  1391. BlockBasedTableReaderBaseTest::GenerateKVMap(
  1392. 30 /* num_block */, true /* mixed_with_human_readable_string_value */,
  1393. comparator_->timestamp_size(), same_key_diff_ts_, comparator_);
  1394. std::string table_name = "BlockBasedTableReaderTest_UnpinPreviousBlocks" +
  1395. CompressionTypeToString(compression_type_);
  1396. ImmutableOptions ioptions(options_);
  1397. CreateTable(table_name, ioptions, compression_type_, kv,
  1398. compression_parallel_threads_, compression_dict_bytes_);
  1399. std::unique_ptr<BlockBasedTable> table;
  1400. FileOptions foptions;
  1401. foptions.use_direct_reads = use_direct_reads_;
  1402. InternalKeyComparator comparator(options_.comparator);
  1403. NewBlockBasedTableReader(foptions, ioptions, comparator, table_name, &table,
  1404. true /* bool prefetch_index_and_filter_in_cache */,
  1405. nullptr /* status */, persist_udt_);
  1406. ReadOptions read_opts;
  1407. std::unique_ptr<InternalIterator> iter;
  1408. iter.reset(table->NewIterator(
  1409. read_opts, options_.prefix_extractor.get(), /*arena=*/nullptr,
  1410. /*skip_filters=*/false, TableReaderCaller::kUncategorized));
  1411. MultiScanArgs scan_options(BytewiseComparator());
  1412. // Range 1: block 0-4, Range 2: block 4-4, Range 3: block 5-15
  1413. scan_options.insert(ExtractUserKey(kv[0 * kEntriesPerBlock].first),
  1414. ExtractUserKey(kv[5 * kEntriesPerBlock - 5].first));
  1415. scan_options.insert(ExtractUserKey(kv[5 * kEntriesPerBlock - 4].first),
  1416. ExtractUserKey(kv[5 * kEntriesPerBlock - 3].first));
  1417. scan_options.insert(ExtractUserKey(kv[5 * kEntriesPerBlock - 2].first),
  1418. ExtractUserKey(kv[15 * kEntriesPerBlock - 1].first));
  1419. iter->Prepare(&scan_options);
  1420. auto* bbiter = dynamic_cast<BlockBasedTableIterator*>(iter.get());
  1421. ASSERT_TRUE(bbiter);
  1422. for (int block = 0; block < 15; ++block) {
  1423. ASSERT_TRUE(bbiter->TEST_IsBlockPinnedByMultiScan(block)) << block;
  1424. }
  1425. // MultiScan require seeks to be called in scan_option order
  1426. iter->Seek(kv[0 * kEntriesPerBlock].first);
  1427. ASSERT_TRUE(iter->Valid());
  1428. ASSERT_OK(iter->status());
  1429. // Seek to second range - should unpin blocks from first range
  1430. iter->Seek(kv[5 * kEntriesPerBlock - 4].first);
  1431. ASSERT_TRUE(iter->Valid());
  1432. ASSERT_OK(iter->status());
  1433. ASSERT_EQ(iter->key(), kv[5 * kEntriesPerBlock - 4].first);
  1434. ASSERT_EQ(iter->value(), kv[5 * kEntriesPerBlock - 4].second);
  1435. // The last block (block 4) is shared with the second range, so
  1436. // it's not unpinned yet.
  1437. for (int block = 0; block < 4; ++block) {
  1438. ASSERT_FALSE(bbiter->TEST_IsBlockPinnedByMultiScan(block)) << block;
  1439. }
  1440. // Blocks from second range still in cache.
  1441. // We skip block 4 here since it's ownership is moved to the actual data
  1442. // block iter.
  1443. for (int block = 5; block < 15; ++block) {
  1444. ASSERT_TRUE(bbiter->TEST_IsBlockPinnedByMultiScan(block)) << block;
  1445. }
  1446. iter->Seek(kv[5 * kEntriesPerBlock - 2].first);
  1447. ASSERT_TRUE(iter->Valid());
  1448. ASSERT_OK(iter->status());
  1449. ASSERT_EQ(iter->key(), kv[5 * kEntriesPerBlock - 2].first);
  1450. ASSERT_EQ(iter->value(), kv[5 * kEntriesPerBlock - 2].second);
  1451. // Still pinned
  1452. for (int block = 5; block < 15; ++block) {
  1453. ASSERT_TRUE(bbiter->TEST_IsBlockPinnedByMultiScan(block)) << block;
  1454. }
  1455. }
  1456. // Test that fs_prefetch_support flag is correctly initialized during table
  1457. // construction based on filesystem capabilities
  1458. TEST_P(BlockBasedTableReaderTest, FSPrefetchSupportInitializedCorrectly) {
  1459. class ConfigurablePrefetchFS : public FileSystemWrapper {
  1460. public:
  1461. ConfigurablePrefetchFS(const std::shared_ptr<FileSystem>& target,
  1462. bool support_prefetch)
  1463. : FileSystemWrapper(target), support_prefetch_(support_prefetch) {}
  1464. static const char* kClassName() { return "ConfigurablePrefetchFS"; }
  1465. const char* Name() const override { return kClassName(); }
  1466. void SupportedOps(int64_t& supported_ops) override {
  1467. target()->SupportedOps(supported_ops);
  1468. if (!support_prefetch_) { // Disable prefetch support if requested
  1469. supported_ops &= ~(1 << FSSupportedOps::kFSPrefetch);
  1470. }
  1471. }
  1472. private:
  1473. bool support_prefetch_;
  1474. };
  1475. // Prepare test table
  1476. Options options;
  1477. options.persist_user_defined_timestamps = persist_udt_;
  1478. if (udt_enabled_) {
  1479. options.comparator = test::BytewiseComparatorWithU64TsWrapper();
  1480. }
  1481. size_t ts_sz = options.comparator->timestamp_size();
  1482. auto kv = BlockBasedTableReaderBaseTest::GenerateKVMap(5, true, ts_sz);
  1483. std::string table_name = "BlockBasedTableReaderTest_BlockPrefetcherTest" +
  1484. CompressionTypeToString(compression_type_);
  1485. ImmutableOptions ioptions(options);
  1486. CreateTable(table_name, ioptions, compression_type_, kv,
  1487. compression_parallel_threads_, compression_dict_bytes_);
  1488. // Test Case 1: Filesystem supports prefetch, fs_prefetch_support should be
  1489. // true
  1490. {
  1491. auto fs_with_prefetch = std::make_shared<ConfigurablePrefetchFS>(
  1492. env_->GetFileSystem(), true /* support_prefetch */);
  1493. std::unique_ptr<Env> env_wrapper(
  1494. new CompositeEnvWrapper(env_, fs_with_prefetch));
  1495. options.env = env_wrapper.get();
  1496. FileOptions fopts;
  1497. fopts.use_direct_reads = use_direct_reads_;
  1498. InternalKeyComparator cmp(options.comparator);
  1499. ImmutableOptions iopts(options);
  1500. std::unique_ptr<BlockBasedTable> table;
  1501. NewBlockBasedTableReader(fopts, iopts, cmp, table_name, &table,
  1502. false /* prefetch_index_and_filter_in_cache */,
  1503. nullptr, persist_udt_);
  1504. ASSERT_TRUE(table->get_rep()->fs_prefetch_support);
  1505. ASSERT_TRUE(CheckFSFeatureSupport(fs_with_prefetch.get(),
  1506. FSSupportedOps::kFSPrefetch));
  1507. }
  1508. // Test Case 2: Filesystem doesn't support prefetch, fs_prefetch_support
  1509. // should be false
  1510. {
  1511. auto fs_without_prefetch = std::make_shared<ConfigurablePrefetchFS>(
  1512. env_->GetFileSystem(), false /* support_prefetch */);
  1513. std::unique_ptr<Env> env_wrapper(
  1514. new CompositeEnvWrapper(env_, fs_without_prefetch));
  1515. options.env = env_wrapper.get();
  1516. FileOptions fopts;
  1517. fopts.use_direct_reads = use_direct_reads_;
  1518. InternalKeyComparator cmp(options.comparator);
  1519. ImmutableOptions iopts(options);
  1520. std::unique_ptr<BlockBasedTable> table;
  1521. NewBlockBasedTableReader(fopts, iopts, cmp, table_name, &table,
  1522. false /* prefetch_index_and_filter_in_cache */,
  1523. nullptr, persist_udt_);
  1524. ASSERT_FALSE(table->get_rep()->fs_prefetch_support);
  1525. ASSERT_FALSE(CheckFSFeatureSupport(fs_without_prefetch.get(),
  1526. FSSupportedOps::kFSPrefetch));
  1527. }
  1528. }
  1529. std::vector<BlockBasedTableReaderTestParam> GenerateCombinedParameters(
  1530. const std::vector<CompressionType>& compression_types,
  1531. const std::vector<bool>& use_direct_read_flags,
  1532. const std::vector<BlockBasedTableOptions::IndexType>& index_types,
  1533. const std::vector<bool>& no_block_cache_flags,
  1534. const std::vector<test::UserDefinedTimestampTestMode>& udt_test_modes,
  1535. const std::vector<int>& parallel_compression_thread_counts,
  1536. const std::vector<uint32_t>& compression_dict_byte_counts,
  1537. const std::vector<bool>& same_key_diff_ts_flags,
  1538. const std::vector<const Comparator*>& comparators,
  1539. const std::vector<bool>& fill_cache_flags,
  1540. const std::vector<bool>& use_async_io_flags,
  1541. const std::vector<bool>& block_align_flags,
  1542. const std::vector<size_t>& super_block_alignment_sizes,
  1543. const std::vector<size_t>& super_block_alignment_space_overhead_ratios) {
  1544. std::vector<BlockBasedTableReaderTestParam> params;
  1545. for (const auto& compression_type : compression_types) {
  1546. for (auto use_direct_read : use_direct_read_flags) {
  1547. for (const auto& index_type : index_types) {
  1548. for (auto no_block_cache : no_block_cache_flags) {
  1549. for (const auto& udt_test_mode : udt_test_modes) {
  1550. for (auto parallel_compression_thread_count :
  1551. parallel_compression_thread_counts) {
  1552. for (auto compression_dict_byte_count :
  1553. compression_dict_byte_counts) {
  1554. for (auto same_key_diff_ts_flag : same_key_diff_ts_flags) {
  1555. for (const auto& comparator : comparators) {
  1556. for (auto fill_cache : fill_cache_flags) {
  1557. for (auto use_async_io : use_async_io_flags) {
  1558. for (auto block_align : block_align_flags) {
  1559. for (auto super_block_alignment_size :
  1560. super_block_alignment_sizes) {
  1561. for (
  1562. auto
  1563. super_block_alignment_space_overhead_ratio :
  1564. super_block_alignment_space_overhead_ratios) {
  1565. if (super_block_alignment_size == 0) {
  1566. // Override padding size to 0 if alignment size
  1567. // is 0, which means no super block alignment
  1568. super_block_alignment_space_overhead_ratio = 0;
  1569. }
  1570. params.emplace_back(
  1571. compression_type, use_direct_read, index_type,
  1572. no_block_cache, udt_test_mode,
  1573. parallel_compression_thread_count,
  1574. compression_dict_byte_count,
  1575. same_key_diff_ts_flag, comparator, fill_cache,
  1576. use_async_io, block_align,
  1577. super_block_alignment_size,
  1578. super_block_alignment_space_overhead_ratio);
  1579. }
  1580. }
  1581. }
  1582. }
  1583. }
  1584. }
  1585. }
  1586. }
  1587. }
  1588. }
  1589. }
  1590. }
  1591. }
  1592. }
  1593. return params;
  1594. }
  1595. std::vector<bool> Bool() { return {true, false}; }
  1596. struct BlockBasedTableReaderTestParamBuilder {
  1597. BlockBasedTableReaderTestParamBuilder() {
  1598. // Default values
  1599. compression_types = GetSupportedCompressions();
  1600. use_direct_read_flags = Bool();
  1601. index_types = {
  1602. BlockBasedTableOptions::IndexType::kBinarySearch,
  1603. BlockBasedTableOptions::IndexType::kHashSearch,
  1604. BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch,
  1605. BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey};
  1606. no_block_cache_flags = {false};
  1607. udt_test_modes = {
  1608. test::UserDefinedTimestampTestMode::kStripUserDefinedTimestamp};
  1609. parallel_compression_thread_counts = {1, 2};
  1610. compression_dict_byte_counts = {0, 4096};
  1611. same_key_diff_ts_flags = {false};
  1612. comparators = {BytewiseComparator()};
  1613. fill_cache_flags = {true};
  1614. use_async_io_flags = {false};
  1615. block_align_flags = {false};
  1616. super_block_alignment_sizes = {0};
  1617. super_block_alignment_space_overhead_ratios = {128};
  1618. }
  1619. // builder methods for each member
  1620. BlockBasedTableReaderTestParamBuilder& WithCompressionTypes(
  1621. const std::vector<CompressionType>& _compression_types) {
  1622. compression_types = _compression_types;
  1623. return *this;
  1624. }
  1625. BlockBasedTableReaderTestParamBuilder& WithUseDirectReadFlags(
  1626. const std::vector<bool>& _use_direct_read_flags) {
  1627. use_direct_read_flags = _use_direct_read_flags;
  1628. return *this;
  1629. }
  1630. BlockBasedTableReaderTestParamBuilder& WithIndexTypes(
  1631. const std::vector<BlockBasedTableOptions::IndexType>& _index_types) {
  1632. index_types = _index_types;
  1633. return *this;
  1634. }
  1635. BlockBasedTableReaderTestParamBuilder& WithNoBlockCacheFlags(
  1636. const std::vector<bool>& _no_block_cache_flags) {
  1637. no_block_cache_flags = _no_block_cache_flags;
  1638. return *this;
  1639. }
  1640. BlockBasedTableReaderTestParamBuilder& WithUDTTestModes(
  1641. const std::vector<test::UserDefinedTimestampTestMode>& _udt_test_modes) {
  1642. udt_test_modes = _udt_test_modes;
  1643. return *this;
  1644. }
  1645. BlockBasedTableReaderTestParamBuilder& WithParallelCompressionThreadCounts(
  1646. const std::vector<int>& _parallel_compression_thread_counts) {
  1647. parallel_compression_thread_counts = _parallel_compression_thread_counts;
  1648. return *this;
  1649. }
  1650. BlockBasedTableReaderTestParamBuilder& WithCompressionDictByteCounts(
  1651. const std::vector<uint32_t>& _compression_dict_byte_counts) {
  1652. compression_dict_byte_counts = _compression_dict_byte_counts;
  1653. return *this;
  1654. }
  1655. BlockBasedTableReaderTestParamBuilder& WithSameKeyDiffTsFlags(
  1656. const std::vector<bool>& _same_key_diff_ts_flags) {
  1657. same_key_diff_ts_flags = _same_key_diff_ts_flags;
  1658. return *this;
  1659. }
  1660. BlockBasedTableReaderTestParamBuilder& WithComparators(
  1661. const std::vector<const Comparator*>& _comparators) {
  1662. comparators = _comparators;
  1663. return *this;
  1664. }
  1665. BlockBasedTableReaderTestParamBuilder& WithFillCacheFlags(
  1666. const std::vector<bool>& _fill_cache_flags) {
  1667. fill_cache_flags = _fill_cache_flags;
  1668. return *this;
  1669. }
  1670. BlockBasedTableReaderTestParamBuilder& WithUseAsyncIoFlags(
  1671. const std::vector<bool>& _use_async_io_flags) {
  1672. use_async_io_flags = _use_async_io_flags;
  1673. return *this;
  1674. }
  1675. BlockBasedTableReaderTestParamBuilder& WithBlockAlignFlags(
  1676. const std::vector<bool>& _block_align_flags) {
  1677. block_align_flags = _block_align_flags;
  1678. return *this;
  1679. }
  1680. BlockBasedTableReaderTestParamBuilder& WithSuperBlockAlignmentSizes(
  1681. const std::vector<size_t>& _super_block_alignment_sizes) {
  1682. super_block_alignment_sizes = _super_block_alignment_sizes;
  1683. return *this;
  1684. }
  1685. BlockBasedTableReaderTestParamBuilder&
  1686. WithSuperBlockAlignmentSpaceOverheadRatios(
  1687. const std::vector<size_t>& _super_block_alignment_space_overhead_ratios) {
  1688. super_block_alignment_space_overhead_ratios =
  1689. _super_block_alignment_space_overhead_ratios;
  1690. return *this;
  1691. }
  1692. std::vector<BlockBasedTableReaderTestParam> build() {
  1693. return GenerateCombinedParameters(
  1694. compression_types, use_direct_read_flags, index_types,
  1695. no_block_cache_flags, udt_test_modes,
  1696. parallel_compression_thread_counts, compression_dict_byte_counts,
  1697. same_key_diff_ts_flags, comparators, fill_cache_flags,
  1698. use_async_io_flags, block_align_flags, super_block_alignment_sizes,
  1699. super_block_alignment_space_overhead_ratios);
  1700. }
  1701. std::vector<CompressionType> compression_types;
  1702. std::vector<bool> use_direct_read_flags;
  1703. std::vector<BlockBasedTableOptions::IndexType> index_types;
  1704. std::vector<bool> no_block_cache_flags;
  1705. std::vector<test::UserDefinedTimestampTestMode> udt_test_modes;
  1706. std::vector<int> parallel_compression_thread_counts;
  1707. std::vector<uint32_t> compression_dict_byte_counts;
  1708. std::vector<bool> same_key_diff_ts_flags;
  1709. std::vector<const Comparator*> comparators;
  1710. std::vector<bool> fill_cache_flags;
  1711. std::vector<bool> use_async_io_flags;
  1712. std::vector<bool> block_align_flags;
  1713. std::vector<size_t> super_block_alignment_sizes;
  1714. std::vector<size_t> super_block_alignment_space_overhead_ratios;
  1715. };
  1716. std::vector<bool> IOUringFlags() {
  1717. #ifdef ROCKSDB_IOURING_PRESENT
  1718. return {false, true};
  1719. #else
  1720. return {false};
  1721. #endif
  1722. }
  1723. INSTANTIATE_TEST_CASE_P(
  1724. BlockBasedTableReaderTest, BlockBasedTableReaderTest,
  1725. ::testing::ValuesIn(BlockBasedTableReaderTestParamBuilder()
  1726. .WithUDTTestModes(test::GetUDTTestModes())
  1727. .build()));
  1728. INSTANTIATE_TEST_CASE_P(
  1729. BlockBasedTableReaderMultiScanAsyncIOTest,
  1730. BlockBasedTableReaderMultiScanAsyncIOTest,
  1731. ::testing::ValuesIn(BlockBasedTableReaderTestParamBuilder()
  1732. .WithComparators({BytewiseComparator(),
  1733. ReverseBytewiseComparator()})
  1734. .WithFillCacheFlags(Bool())
  1735. .WithUseAsyncIoFlags(IOUringFlags())
  1736. .build()));
  1737. INSTANTIATE_TEST_CASE_P(
  1738. BlockBasedTableReaderMultiScanTest, BlockBasedTableReaderMultiScanTest,
  1739. ::testing::ValuesIn(BlockBasedTableReaderTestParamBuilder()
  1740. .WithComparators({BytewiseComparator(),
  1741. ReverseBytewiseComparator()})
  1742. .build()));
  1743. INSTANTIATE_TEST_CASE_P(
  1744. BlockBasedTableReaderGetTest, BlockBasedTableReaderGetTest,
  1745. ::testing::ValuesIn(BlockBasedTableReaderTestParamBuilder()
  1746. .WithUDTTestModes(test::GetUDTTestModes())
  1747. .WithSameKeyDiffTsFlags(Bool())
  1748. .WithComparators({BytewiseComparator(),
  1749. ReverseBytewiseComparator()})
  1750. .WithFillCacheFlags({false})
  1751. .build()));
  1752. INSTANTIATE_TEST_CASE_P(
  1753. BlockBasedTableReaderSuperBlockAlignTest, BlockBasedTableReaderGetTest,
  1754. ::testing::ValuesIn(
  1755. BlockBasedTableReaderTestParamBuilder()
  1756. .WithIndexTypes(
  1757. {BlockBasedTableOptions::IndexType::kBinarySearch,
  1758. BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch})
  1759. .WithFillCacheFlags({false})
  1760. .WithBlockAlignFlags(Bool())
  1761. .WithSuperBlockAlignmentSizes({0, 32 * 1024, 16 * 1024})
  1762. .WithSuperBlockAlignmentSpaceOverheadRatios({0, 4, 256})
  1763. .build()));
  1764. INSTANTIATE_TEST_CASE_P(
  1765. StrictCapacityLimitReaderTest, StrictCapacityLimitReaderTest,
  1766. ::testing::ValuesIn(
  1767. BlockBasedTableReaderTestParamBuilder()
  1768. .WithIndexTypes(
  1769. {BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch})
  1770. .WithUDTTestModes(test::GetUDTTestModes())
  1771. .WithCompressionDictByteCounts({0})
  1772. .WithSameKeyDiffTsFlags(Bool())
  1773. .WithFillCacheFlags({false})
  1774. .build()));
  1775. INSTANTIATE_TEST_CASE_P(
  1776. VerifyChecksum, BlockBasedTableReaderTestVerifyChecksum,
  1777. ::testing::ValuesIn(
  1778. BlockBasedTableReaderTestParamBuilder()
  1779. .WithUseDirectReadFlags({false})
  1780. .WithIndexTypes(
  1781. {BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch})
  1782. .WithNoBlockCacheFlags({true})
  1783. .WithUDTTestModes(test::GetUDTTestModes())
  1784. .WithCompressionDictByteCounts({0})
  1785. .WithFillCacheFlags({false})
  1786. .build()));
  1787. } // namespace ROCKSDB_NAMESPACE
  1788. int main(int argc, char** argv) {
  1789. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1790. ::testing::InitGoogleTest(&argc, argv);
  1791. return RUN_ALL_TESTS();
  1792. }