db_range_del_test.cc 64 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660
  1. // Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #include "db/db_test_util.h"
  6. #include "port/stack_trace.h"
  7. #include "rocksdb/utilities/write_batch_with_index.h"
  8. #include "test_util/testutil.h"
  9. #include "utilities/merge_operators.h"
  10. namespace ROCKSDB_NAMESPACE {
  11. class DBRangeDelTest : public DBTestBase {
  12. public:
  13. DBRangeDelTest() : DBTestBase("/db_range_del_test") {}
  14. std::string GetNumericStr(int key) {
  15. uint64_t uint64_key = static_cast<uint64_t>(key);
  16. std::string str;
  17. str.resize(8);
  18. memcpy(&str[0], static_cast<void*>(&uint64_key), 8);
  19. return str;
  20. }
  21. };
  22. // PlainTableFactory, WriteBatchWithIndex, and NumTableFilesAtLevel() are not
  23. // supported in ROCKSDB_LITE
  24. #ifndef ROCKSDB_LITE
  25. TEST_F(DBRangeDelTest, NonBlockBasedTableNotSupported) {
  26. // TODO: figure out why MmapReads trips the iterator pinning assertion in
  27. // RangeDelAggregator. Ideally it would be supported; otherwise it should at
  28. // least be explicitly unsupported.
  29. for (auto config : {kPlainTableAllBytesPrefix, /* kWalDirAndMmapReads */}) {
  30. option_config_ = config;
  31. DestroyAndReopen(CurrentOptions());
  32. ASSERT_TRUE(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  33. "dr1", "dr1")
  34. .IsNotSupported());
  35. }
  36. }
  37. TEST_F(DBRangeDelTest, WriteBatchWithIndexNotSupported) {
  38. WriteBatchWithIndex indexedBatch{};
  39. ASSERT_TRUE(indexedBatch.DeleteRange(db_->DefaultColumnFamily(), "dr1", "dr1")
  40. .IsNotSupported());
  41. ASSERT_TRUE(indexedBatch.DeleteRange("dr1", "dr1").IsNotSupported());
  42. }
  43. TEST_F(DBRangeDelTest, FlushOutputHasOnlyRangeTombstones) {
  44. do {
  45. DestroyAndReopen(CurrentOptions());
  46. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  47. "dr1", "dr2"));
  48. ASSERT_OK(db_->Flush(FlushOptions()));
  49. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  50. } while (ChangeOptions(kRangeDelSkipConfigs));
  51. }
  52. TEST_F(DBRangeDelTest, CompactionOutputHasOnlyRangeTombstone) {
  53. do {
  54. Options opts = CurrentOptions();
  55. opts.disable_auto_compactions = true;
  56. opts.statistics = CreateDBStatistics();
  57. DestroyAndReopen(opts);
  58. // snapshot protects range tombstone from dropping due to becoming obsolete.
  59. const Snapshot* snapshot = db_->GetSnapshot();
  60. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z");
  61. db_->Flush(FlushOptions());
  62. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  63. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  64. dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
  65. true /* disallow_trivial_move */);
  66. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  67. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  68. ASSERT_EQ(0, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
  69. db_->ReleaseSnapshot(snapshot);
  70. // Skip cuckoo memtables, which do not support snapshots. Skip non-leveled
  71. // compactions as the above assertions about the number of files in a level
  72. // do not hold true.
  73. } while (ChangeOptions(kRangeDelSkipConfigs | kSkipUniversalCompaction |
  74. kSkipFIFOCompaction));
  75. }
  76. TEST_F(DBRangeDelTest, CompactionOutputFilesExactlyFilled) {
  77. // regression test for exactly filled compaction output files. Previously
  78. // another file would be generated containing all range deletions, which
  79. // could invalidate the non-overlapping file boundary invariant.
  80. const int kNumPerFile = 4, kNumFiles = 2, kFileBytes = 9 << 10;
  81. Options options = CurrentOptions();
  82. options.disable_auto_compactions = true;
  83. options.level0_file_num_compaction_trigger = kNumFiles;
  84. options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  85. options.num_levels = 2;
  86. options.target_file_size_base = kFileBytes;
  87. BlockBasedTableOptions table_options;
  88. table_options.block_size_deviation = 50; // each block holds two keys
  89. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  90. Reopen(options);
  91. // snapshot protects range tombstone from dropping due to becoming obsolete.
  92. const Snapshot* snapshot = db_->GetSnapshot();
  93. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0), Key(1));
  94. Random rnd(301);
  95. for (int i = 0; i < kNumFiles; ++i) {
  96. std::vector<std::string> values;
  97. // Write 12K (4 values, each 3K)
  98. for (int j = 0; j < kNumPerFile; j++) {
  99. values.push_back(RandomString(&rnd, 3 << 10));
  100. ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
  101. if (j == 0 && i > 0) {
  102. dbfull()->TEST_WaitForFlushMemTable();
  103. }
  104. }
  105. }
  106. // put extra key to trigger final flush
  107. ASSERT_OK(Put("", ""));
  108. dbfull()->TEST_WaitForFlushMemTable();
  109. ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
  110. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  111. dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
  112. true /* disallow_trivial_move */);
  113. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  114. ASSERT_EQ(2, NumTableFilesAtLevel(1));
  115. db_->ReleaseSnapshot(snapshot);
  116. }
  117. TEST_F(DBRangeDelTest, MaxCompactionBytesCutsOutputFiles) {
  118. // Ensures range deletion spanning multiple compaction output files that are
  119. // cut by max_compaction_bytes will have non-overlapping key-ranges.
  120. // https://github.com/facebook/rocksdb/issues/1778
  121. const int kNumFiles = 2, kNumPerFile = 1 << 8, kBytesPerVal = 1 << 12;
  122. Options opts = CurrentOptions();
  123. opts.comparator = test::Uint64Comparator();
  124. opts.disable_auto_compactions = true;
  125. opts.level0_file_num_compaction_trigger = kNumFiles;
  126. opts.max_compaction_bytes = kNumPerFile * kBytesPerVal;
  127. opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  128. // Want max_compaction_bytes to trigger the end of compaction output file, not
  129. // target_file_size_base, so make the latter much bigger
  130. opts.target_file_size_base = 100 * opts.max_compaction_bytes;
  131. Reopen(opts);
  132. // snapshot protects range tombstone from dropping due to becoming obsolete.
  133. const Snapshot* snapshot = db_->GetSnapshot();
  134. // It spans the whole key-range, thus will be included in all output files
  135. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  136. GetNumericStr(0),
  137. GetNumericStr(kNumFiles * kNumPerFile - 1)));
  138. Random rnd(301);
  139. for (int i = 0; i < kNumFiles; ++i) {
  140. std::vector<std::string> values;
  141. // Write 1MB (256 values, each 4K)
  142. for (int j = 0; j < kNumPerFile; j++) {
  143. values.push_back(RandomString(&rnd, kBytesPerVal));
  144. ASSERT_OK(Put(GetNumericStr(kNumPerFile * i + j), values[j]));
  145. }
  146. // extra entry to trigger SpecialSkipListFactory's flush
  147. ASSERT_OK(Put(GetNumericStr(kNumPerFile), ""));
  148. dbfull()->TEST_WaitForFlushMemTable();
  149. ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
  150. }
  151. dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
  152. true /* disallow_trivial_move */);
  153. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  154. ASSERT_GE(NumTableFilesAtLevel(1), 2);
  155. std::vector<std::vector<FileMetaData>> files;
  156. dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
  157. for (size_t i = 0; i < files[1].size() - 1; ++i) {
  158. ASSERT_TRUE(InternalKeyComparator(opts.comparator)
  159. .Compare(files[1][i].largest, files[1][i + 1].smallest) <
  160. 0);
  161. }
  162. db_->ReleaseSnapshot(snapshot);
  163. }
  164. TEST_F(DBRangeDelTest, SentinelsOmittedFromOutputFile) {
  165. // Regression test for bug where sentinel range deletions (i.e., ones with
  166. // sequence number of zero) were included in output files.
  167. // snapshot protects range tombstone from dropping due to becoming obsolete.
  168. const Snapshot* snapshot = db_->GetSnapshot();
  169. // gaps between ranges creates sentinels in our internal representation
  170. std::vector<std::pair<std::string, std::string>> range_dels = {{"a", "b"}, {"c", "d"}, {"e", "f"}};
  171. for (const auto& range_del : range_dels) {
  172. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  173. range_del.first, range_del.second));
  174. }
  175. ASSERT_OK(db_->Flush(FlushOptions()));
  176. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  177. std::vector<std::vector<FileMetaData>> files;
  178. dbfull()->TEST_GetFilesMetaData(db_->DefaultColumnFamily(), &files);
  179. ASSERT_GT(files[0][0].fd.smallest_seqno, 0);
  180. db_->ReleaseSnapshot(snapshot);
  181. }
  182. TEST_F(DBRangeDelTest, FlushRangeDelsSameStartKey) {
  183. db_->Put(WriteOptions(), "b1", "val");
  184. ASSERT_OK(
  185. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
  186. db_->Put(WriteOptions(), "b2", "val");
  187. ASSERT_OK(
  188. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
  189. // first iteration verifies query correctness in memtable, second verifies
  190. // query correctness for a single SST file
  191. for (int i = 0; i < 2; ++i) {
  192. if (i > 0) {
  193. ASSERT_OK(db_->Flush(FlushOptions()));
  194. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  195. }
  196. std::string value;
  197. ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
  198. ASSERT_OK(db_->Get(ReadOptions(), "b2", &value));
  199. }
  200. }
  201. TEST_F(DBRangeDelTest, CompactRangeDelsSameStartKey) {
  202. db_->Put(WriteOptions(), "unused", "val"); // prevents empty after compaction
  203. db_->Put(WriteOptions(), "b1", "val");
  204. ASSERT_OK(db_->Flush(FlushOptions()));
  205. ASSERT_OK(
  206. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "c"));
  207. ASSERT_OK(db_->Flush(FlushOptions()));
  208. ASSERT_OK(
  209. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "b"));
  210. ASSERT_OK(db_->Flush(FlushOptions()));
  211. ASSERT_EQ(3, NumTableFilesAtLevel(0));
  212. for (int i = 0; i < 2; ++i) {
  213. if (i > 0) {
  214. dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
  215. true /* disallow_trivial_move */);
  216. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  217. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  218. }
  219. std::string value;
  220. ASSERT_TRUE(db_->Get(ReadOptions(), "b1", &value).IsNotFound());
  221. }
  222. }
  223. #endif // ROCKSDB_LITE
  224. TEST_F(DBRangeDelTest, FlushRemovesCoveredKeys) {
  225. const int kNum = 300, kRangeBegin = 50, kRangeEnd = 250;
  226. Options opts = CurrentOptions();
  227. opts.comparator = test::Uint64Comparator();
  228. Reopen(opts);
  229. // Write a third before snapshot, a third between snapshot and tombstone, and
  230. // a third after the tombstone. Keys older than snapshot or newer than the
  231. // tombstone should be preserved.
  232. const Snapshot* snapshot = nullptr;
  233. for (int i = 0; i < kNum; ++i) {
  234. if (i == kNum / 3) {
  235. snapshot = db_->GetSnapshot();
  236. } else if (i == 2 * kNum / 3) {
  237. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  238. GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
  239. }
  240. db_->Put(WriteOptions(), GetNumericStr(i), "val");
  241. }
  242. db_->Flush(FlushOptions());
  243. for (int i = 0; i < kNum; ++i) {
  244. ReadOptions read_opts;
  245. read_opts.ignore_range_deletions = true;
  246. std::string value;
  247. if (i < kRangeBegin || i > kRangeEnd || i < kNum / 3 || i >= 2 * kNum / 3) {
  248. ASSERT_OK(db_->Get(read_opts, GetNumericStr(i), &value));
  249. } else {
  250. ASSERT_TRUE(db_->Get(read_opts, GetNumericStr(i), &value).IsNotFound());
  251. }
  252. }
  253. db_->ReleaseSnapshot(snapshot);
  254. }
  255. // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
  256. #ifndef ROCKSDB_LITE
  257. TEST_F(DBRangeDelTest, CompactionRemovesCoveredKeys) {
  258. const int kNumPerFile = 100, kNumFiles = 4;
  259. Options opts = CurrentOptions();
  260. opts.comparator = test::Uint64Comparator();
  261. opts.disable_auto_compactions = true;
  262. opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  263. opts.num_levels = 2;
  264. opts.statistics = CreateDBStatistics();
  265. Reopen(opts);
  266. for (int i = 0; i < kNumFiles; ++i) {
  267. if (i > 0) {
  268. // range tombstone covers first half of the previous file
  269. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  270. GetNumericStr((i - 1) * kNumPerFile),
  271. GetNumericStr((i - 1) * kNumPerFile + kNumPerFile / 2));
  272. }
  273. // Make sure a given key appears in each file so compaction won't be able to
  274. // use trivial move, which would happen if the ranges were non-overlapping.
  275. // Also, we need an extra element since flush is only triggered when the
  276. // number of keys is one greater than SpecialSkipListFactory's limit.
  277. // We choose a key outside the key-range used by the test to avoid conflict.
  278. db_->Put(WriteOptions(), GetNumericStr(kNumPerFile * kNumFiles), "val");
  279. for (int j = 0; j < kNumPerFile; ++j) {
  280. db_->Put(WriteOptions(), GetNumericStr(i * kNumPerFile + j), "val");
  281. }
  282. dbfull()->TEST_WaitForFlushMemTable();
  283. ASSERT_EQ(i + 1, NumTableFilesAtLevel(0));
  284. }
  285. db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  286. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  287. ASSERT_GT(NumTableFilesAtLevel(1), 0);
  288. ASSERT_EQ((kNumFiles - 1) * kNumPerFile / 2,
  289. TestGetTickerCount(opts, COMPACTION_KEY_DROP_RANGE_DEL));
  290. for (int i = 0; i < kNumFiles; ++i) {
  291. for (int j = 0; j < kNumPerFile; ++j) {
  292. ReadOptions read_opts;
  293. read_opts.ignore_range_deletions = true;
  294. std::string value;
  295. if (i == kNumFiles - 1 || j >= kNumPerFile / 2) {
  296. ASSERT_OK(
  297. db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value));
  298. } else {
  299. ASSERT_TRUE(
  300. db_->Get(read_opts, GetNumericStr(i * kNumPerFile + j), &value)
  301. .IsNotFound());
  302. }
  303. }
  304. }
  305. }
  306. TEST_F(DBRangeDelTest, ValidLevelSubcompactionBoundaries) {
  307. const int kNumPerFile = 100, kNumFiles = 4, kFileBytes = 100 << 10;
  308. Options options = CurrentOptions();
  309. options.disable_auto_compactions = true;
  310. options.level0_file_num_compaction_trigger = kNumFiles;
  311. options.max_bytes_for_level_base = 2 * kFileBytes;
  312. options.max_subcompactions = 4;
  313. options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  314. options.num_levels = 3;
  315. options.target_file_size_base = kFileBytes;
  316. options.target_file_size_multiplier = 1;
  317. Reopen(options);
  318. Random rnd(301);
  319. for (int i = 0; i < 2; ++i) {
  320. for (int j = 0; j < kNumFiles; ++j) {
  321. if (i > 0) {
  322. // delete [95,105) in two files, [295,305) in next two
  323. int mid = (j + (1 - j % 2)) * kNumPerFile;
  324. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  325. Key(mid - 5), Key(mid + 5));
  326. }
  327. std::vector<std::string> values;
  328. // Write 100KB (100 values, each 1K)
  329. for (int k = 0; k < kNumPerFile; k++) {
  330. values.push_back(RandomString(&rnd, 990));
  331. ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
  332. }
  333. // put extra key to trigger flush
  334. ASSERT_OK(Put("", ""));
  335. dbfull()->TEST_WaitForFlushMemTable();
  336. if (j < kNumFiles - 1) {
  337. // background compaction may happen early for kNumFiles'th file
  338. ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
  339. }
  340. if (j == options.level0_file_num_compaction_trigger - 1) {
  341. // When i == 1, compaction will output some files to L1, at which point
  342. // L1 is not bottommost so range deletions cannot be compacted away. The
  343. // new L1 files must be generated with non-overlapping key ranges even
  344. // though multiple subcompactions see the same ranges deleted, else an
  345. // assertion will fail.
  346. //
  347. // Only enable auto-compactions when we're ready; otherwise, the
  348. // oversized L0 (relative to base_level) causes the compaction to run
  349. // earlier.
  350. ASSERT_OK(db_->EnableAutoCompaction({db_->DefaultColumnFamily()}));
  351. dbfull()->TEST_WaitForCompact();
  352. ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
  353. {{"disable_auto_compactions", "true"}}));
  354. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  355. ASSERT_GT(NumTableFilesAtLevel(1), 0);
  356. ASSERT_GT(NumTableFilesAtLevel(2), 0);
  357. }
  358. }
  359. }
  360. }
  361. TEST_F(DBRangeDelTest, ValidUniversalSubcompactionBoundaries) {
  362. const int kNumPerFile = 100, kFilesPerLevel = 4, kNumLevels = 4;
  363. Options options = CurrentOptions();
  364. options.compaction_options_universal.min_merge_width = kFilesPerLevel;
  365. options.compaction_options_universal.max_merge_width = kFilesPerLevel;
  366. options.compaction_options_universal.size_ratio = 10;
  367. options.compaction_style = kCompactionStyleUniversal;
  368. options.level0_file_num_compaction_trigger = kFilesPerLevel;
  369. options.max_subcompactions = 4;
  370. options.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  371. options.num_levels = kNumLevels;
  372. options.target_file_size_base = kNumPerFile << 10;
  373. options.target_file_size_multiplier = 1;
  374. Reopen(options);
  375. Random rnd(301);
  376. for (int i = 0; i < kNumLevels - 1; ++i) {
  377. for (int j = 0; j < kFilesPerLevel; ++j) {
  378. if (i == kNumLevels - 2) {
  379. // insert range deletions [95,105) in two files, [295,305) in next two
  380. // to prepare L1 for later manual compaction.
  381. int mid = (j + (1 - j % 2)) * kNumPerFile;
  382. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  383. Key(mid - 5), Key(mid + 5));
  384. }
  385. std::vector<std::string> values;
  386. // Write 100KB (100 values, each 1K)
  387. for (int k = 0; k < kNumPerFile; k++) {
  388. values.push_back(RandomString(&rnd, 990));
  389. ASSERT_OK(Put(Key(j * kNumPerFile + k), values[k]));
  390. }
  391. // put extra key to trigger flush
  392. ASSERT_OK(Put("", ""));
  393. dbfull()->TEST_WaitForFlushMemTable();
  394. if (j < kFilesPerLevel - 1) {
  395. // background compaction may happen early for kFilesPerLevel'th file
  396. ASSERT_EQ(NumTableFilesAtLevel(0), j + 1);
  397. }
  398. }
  399. dbfull()->TEST_WaitForCompact();
  400. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  401. ASSERT_GT(NumTableFilesAtLevel(kNumLevels - 1 - i), kFilesPerLevel - 1);
  402. }
  403. // Now L1-L3 are full, when we compact L1->L2 we should see (1) subcompactions
  404. // happen since input level > 0; (2) range deletions are not dropped since
  405. // output level is not bottommost. If no file boundary assertion fails, that
  406. // probably means universal compaction + subcompaction + range deletion are
  407. // compatible.
  408. ASSERT_OK(dbfull()->RunManualCompaction(
  409. reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
  410. ->cfd(),
  411. 1 /* input_level */, 2 /* output_level */, CompactRangeOptions(),
  412. nullptr /* begin */, nullptr /* end */, true /* exclusive */,
  413. true /* disallow_trivial_move */,
  414. port::kMaxUint64 /* max_file_num_to_ignore */));
  415. }
  416. #endif // ROCKSDB_LITE
  417. TEST_F(DBRangeDelTest, CompactionRemovesCoveredMergeOperands) {
  418. const int kNumPerFile = 3, kNumFiles = 3;
  419. Options opts = CurrentOptions();
  420. opts.disable_auto_compactions = true;
  421. opts.memtable_factory.reset(new SpecialSkipListFactory(2 * kNumPerFile));
  422. opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
  423. opts.num_levels = 2;
  424. Reopen(opts);
  425. // Iterates kNumFiles * kNumPerFile + 1 times since flushing the last file
  426. // requires an extra entry.
  427. for (int i = 0; i <= kNumFiles * kNumPerFile; ++i) {
  428. if (i % kNumPerFile == 0 && i / kNumPerFile == kNumFiles - 1) {
  429. // Delete merge operands from all but the last file
  430. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
  431. "key_");
  432. }
  433. std::string val;
  434. PutFixed64(&val, i);
  435. db_->Merge(WriteOptions(), "key", val);
  436. // we need to prevent trivial move using Puts so compaction will actually
  437. // process the merge operands.
  438. db_->Put(WriteOptions(), "prevent_trivial_move", "");
  439. if (i > 0 && i % kNumPerFile == 0) {
  440. dbfull()->TEST_WaitForFlushMemTable();
  441. }
  442. }
  443. ReadOptions read_opts;
  444. read_opts.ignore_range_deletions = true;
  445. std::string expected, actual;
  446. ASSERT_OK(db_->Get(read_opts, "key", &actual));
  447. PutFixed64(&expected, 45); // 1+2+...+9
  448. ASSERT_EQ(expected, actual);
  449. db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  450. expected.clear();
  451. ASSERT_OK(db_->Get(read_opts, "key", &actual));
  452. uint64_t tmp;
  453. Slice tmp2(actual);
  454. GetFixed64(&tmp2, &tmp);
  455. PutFixed64(&expected, 30); // 6+7+8+9 (earlier operands covered by tombstone)
  456. ASSERT_EQ(expected, actual);
  457. }
  458. TEST_F(DBRangeDelTest, PutDeleteRangeMergeFlush) {
  459. // Test the sequence of operations: (1) Put, (2) DeleteRange, (3) Merge, (4)
  460. // Flush. The `CompactionIterator` previously had a bug where we forgot to
  461. // check for covering range tombstones when processing the (1) Put, causing
  462. // it to reappear after the flush.
  463. Options opts = CurrentOptions();
  464. opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
  465. Reopen(opts);
  466. std::string val;
  467. PutFixed64(&val, 1);
  468. ASSERT_OK(db_->Put(WriteOptions(), "key", val));
  469. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  470. "key", "key_"));
  471. ASSERT_OK(db_->Merge(WriteOptions(), "key", val));
  472. ASSERT_OK(db_->Flush(FlushOptions()));
  473. ReadOptions read_opts;
  474. std::string expected, actual;
  475. ASSERT_OK(db_->Get(read_opts, "key", &actual));
  476. PutFixed64(&expected, 1);
  477. ASSERT_EQ(expected, actual);
  478. }
  479. // NumTableFilesAtLevel() is not supported in ROCKSDB_LITE
  480. #ifndef ROCKSDB_LITE
  481. TEST_F(DBRangeDelTest, ObsoleteTombstoneCleanup) {
  482. // During compaction to bottommost level, verify range tombstones older than
  483. // the oldest snapshot are removed, while others are preserved.
  484. Options opts = CurrentOptions();
  485. opts.disable_auto_compactions = true;
  486. opts.num_levels = 2;
  487. opts.statistics = CreateDBStatistics();
  488. Reopen(opts);
  489. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr1",
  490. "dr10"); // obsolete after compaction
  491. db_->Put(WriteOptions(), "key", "val");
  492. db_->Flush(FlushOptions());
  493. const Snapshot* snapshot = db_->GetSnapshot();
  494. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "dr2",
  495. "dr20"); // protected by snapshot
  496. db_->Put(WriteOptions(), "key", "val");
  497. db_->Flush(FlushOptions());
  498. ASSERT_EQ(2, NumTableFilesAtLevel(0));
  499. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  500. db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  501. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  502. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  503. ASSERT_EQ(1, TestGetTickerCount(opts, COMPACTION_RANGE_DEL_DROP_OBSOLETE));
  504. db_->ReleaseSnapshot(snapshot);
  505. }
  506. TEST_F(DBRangeDelTest, TableEvictedDuringScan) {
  507. // The RangeDelAggregator holds pointers into range deletion blocks created by
  508. // table readers. This test ensures the aggregator can still access those
  509. // blocks even if it outlives the table readers that created them.
  510. //
  511. // DBIter always keeps readers open for L0 files. So, in order to test
  512. // aggregator outliving reader, we need to have deletions in L1 files, which
  513. // are opened/closed on-demand during the scan. This is accomplished by
  514. // setting kNumRanges > level0_stop_writes_trigger, which prevents deletions
  515. // from all lingering in L0 (there is at most one range deletion per L0 file).
  516. //
  517. // The first L1 file will contain a range deletion since its begin key is 0.
  518. // SeekToFirst() references that table's reader and adds its range tombstone
  519. // to the aggregator. Upon advancing beyond that table's key-range via Next(),
  520. // the table reader will be unreferenced by the iterator. Since we manually
  521. // call Evict() on all readers before the full scan, this unreference causes
  522. // the reader's refcount to drop to zero and thus be destroyed.
  523. //
  524. // When it is destroyed, we do not remove its range deletions from the
  525. // aggregator. So, subsequent calls to Next() must be able to use these
  526. // deletions to decide whether a key is covered. This will work as long as
  527. // the aggregator properly references the range deletion block.
  528. const int kNum = 25, kRangeBegin = 0, kRangeEnd = 7, kNumRanges = 5;
  529. Options opts = CurrentOptions();
  530. opts.comparator = test::Uint64Comparator();
  531. opts.level0_file_num_compaction_trigger = 4;
  532. opts.level0_stop_writes_trigger = 4;
  533. opts.memtable_factory.reset(new SpecialSkipListFactory(1));
  534. opts.num_levels = 2;
  535. BlockBasedTableOptions bbto;
  536. bbto.cache_index_and_filter_blocks = true;
  537. bbto.block_cache = NewLRUCache(8 << 20);
  538. opts.table_factory.reset(NewBlockBasedTableFactory(bbto));
  539. Reopen(opts);
  540. // Hold a snapshot so range deletions can't become obsolete during compaction
  541. // to bottommost level (i.e., L1).
  542. const Snapshot* snapshot = db_->GetSnapshot();
  543. for (int i = 0; i < kNum; ++i) {
  544. db_->Put(WriteOptions(), GetNumericStr(i), "val");
  545. if (i > 0) {
  546. dbfull()->TEST_WaitForFlushMemTable();
  547. }
  548. if (i >= kNum / 2 && i < kNum / 2 + kNumRanges) {
  549. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  550. GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
  551. }
  552. }
  553. // Must be > 1 so the first L1 file can be closed before scan finishes
  554. dbfull()->TEST_WaitForCompact();
  555. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  556. std::vector<uint64_t> file_numbers = ListTableFiles(env_, dbname_);
  557. ReadOptions read_opts;
  558. auto* iter = db_->NewIterator(read_opts);
  559. int expected = kRangeEnd;
  560. iter->SeekToFirst();
  561. for (auto file_number : file_numbers) {
  562. // This puts table caches in the state of being externally referenced only
  563. // so they are destroyed immediately upon iterator unreferencing.
  564. TableCache::Evict(dbfull()->TEST_table_cache(), file_number);
  565. }
  566. for (; iter->Valid(); iter->Next()) {
  567. ASSERT_EQ(GetNumericStr(expected), iter->key());
  568. ++expected;
  569. // Keep clearing block cache's LRU so range deletion block can be freed as
  570. // soon as its refcount drops to zero.
  571. bbto.block_cache->EraseUnRefEntries();
  572. }
  573. ASSERT_EQ(kNum, expected);
  574. delete iter;
  575. db_->ReleaseSnapshot(snapshot);
  576. }
  577. TEST_F(DBRangeDelTest, GetCoveredKeyFromMutableMemtable) {
  578. do {
  579. DestroyAndReopen(CurrentOptions());
  580. db_->Put(WriteOptions(), "key", "val");
  581. ASSERT_OK(
  582. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  583. ReadOptions read_opts;
  584. std::string value;
  585. ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
  586. } while (ChangeOptions(kRangeDelSkipConfigs));
  587. }
  588. TEST_F(DBRangeDelTest, GetCoveredKeyFromImmutableMemtable) {
  589. do {
  590. Options opts = CurrentOptions();
  591. opts.max_write_buffer_number = 3;
  592. opts.min_write_buffer_number_to_merge = 2;
  593. // SpecialSkipListFactory lets us specify maximum number of elements the
  594. // memtable can hold. It switches the active memtable to immutable (flush is
  595. // prevented by the above options) upon inserting an element that would
  596. // overflow the memtable.
  597. opts.memtable_factory.reset(new SpecialSkipListFactory(1));
  598. DestroyAndReopen(opts);
  599. db_->Put(WriteOptions(), "key", "val");
  600. ASSERT_OK(
  601. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  602. db_->Put(WriteOptions(), "blah", "val");
  603. ReadOptions read_opts;
  604. std::string value;
  605. ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
  606. } while (ChangeOptions(kRangeDelSkipConfigs));
  607. }
  608. TEST_F(DBRangeDelTest, GetCoveredKeyFromSst) {
  609. do {
  610. DestroyAndReopen(CurrentOptions());
  611. db_->Put(WriteOptions(), "key", "val");
  612. // snapshot prevents key from being deleted during flush
  613. const Snapshot* snapshot = db_->GetSnapshot();
  614. ASSERT_OK(
  615. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  616. ASSERT_OK(db_->Flush(FlushOptions()));
  617. ReadOptions read_opts;
  618. std::string value;
  619. ASSERT_TRUE(db_->Get(read_opts, "key", &value).IsNotFound());
  620. db_->ReleaseSnapshot(snapshot);
  621. } while (ChangeOptions(kRangeDelSkipConfigs));
  622. }
  623. TEST_F(DBRangeDelTest, GetCoveredMergeOperandFromMemtable) {
  624. const int kNumMergeOps = 10;
  625. Options opts = CurrentOptions();
  626. opts.merge_operator = MergeOperators::CreateUInt64AddOperator();
  627. Reopen(opts);
  628. for (int i = 0; i < kNumMergeOps; ++i) {
  629. std::string val;
  630. PutFixed64(&val, i);
  631. db_->Merge(WriteOptions(), "key", val);
  632. if (i == kNumMergeOps / 2) {
  633. // deletes [0, 5]
  634. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "key",
  635. "key_");
  636. }
  637. }
  638. ReadOptions read_opts;
  639. std::string expected, actual;
  640. ASSERT_OK(db_->Get(read_opts, "key", &actual));
  641. PutFixed64(&expected, 30); // 6+7+8+9
  642. ASSERT_EQ(expected, actual);
  643. expected.clear();
  644. read_opts.ignore_range_deletions = true;
  645. ASSERT_OK(db_->Get(read_opts, "key", &actual));
  646. PutFixed64(&expected, 45); // 0+1+2+...+9
  647. ASSERT_EQ(expected, actual);
  648. }
  649. TEST_F(DBRangeDelTest, GetIgnoresRangeDeletions) {
  650. Options opts = CurrentOptions();
  651. opts.max_write_buffer_number = 4;
  652. opts.min_write_buffer_number_to_merge = 3;
  653. opts.memtable_factory.reset(new SpecialSkipListFactory(1));
  654. Reopen(opts);
  655. db_->Put(WriteOptions(), "sst_key", "val");
  656. // snapshot prevents key from being deleted during flush
  657. const Snapshot* snapshot = db_->GetSnapshot();
  658. ASSERT_OK(
  659. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  660. ASSERT_OK(db_->Flush(FlushOptions()));
  661. db_->Put(WriteOptions(), "imm_key", "val");
  662. ASSERT_OK(
  663. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  664. db_->Put(WriteOptions(), "mem_key", "val");
  665. ASSERT_OK(
  666. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  667. ReadOptions read_opts;
  668. read_opts.ignore_range_deletions = true;
  669. for (std::string key : {"sst_key", "imm_key", "mem_key"}) {
  670. std::string value;
  671. ASSERT_OK(db_->Get(read_opts, key, &value));
  672. }
  673. db_->ReleaseSnapshot(snapshot);
  674. }
  675. TEST_F(DBRangeDelTest, IteratorRemovesCoveredKeys) {
  676. const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
  677. Options opts = CurrentOptions();
  678. opts.comparator = test::Uint64Comparator();
  679. opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  680. Reopen(opts);
  681. // Write half of the keys before the tombstone and half after the tombstone.
  682. // Only covered keys (i.e., within the range and older than the tombstone)
  683. // should be deleted.
  684. for (int i = 0; i < kNum; ++i) {
  685. if (i == kNum / 2) {
  686. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  687. GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
  688. }
  689. db_->Put(WriteOptions(), GetNumericStr(i), "val");
  690. }
  691. ReadOptions read_opts;
  692. auto* iter = db_->NewIterator(read_opts);
  693. int expected = 0;
  694. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  695. ASSERT_EQ(GetNumericStr(expected), iter->key());
  696. if (expected == kRangeBegin - 1) {
  697. expected = kNum / 2;
  698. } else {
  699. ++expected;
  700. }
  701. }
  702. ASSERT_EQ(kNum, expected);
  703. delete iter;
  704. }
  705. TEST_F(DBRangeDelTest, IteratorOverUserSnapshot) {
  706. const int kNum = 200, kRangeBegin = 50, kRangeEnd = 150, kNumPerFile = 25;
  707. Options opts = CurrentOptions();
  708. opts.comparator = test::Uint64Comparator();
  709. opts.memtable_factory.reset(new SpecialSkipListFactory(kNumPerFile));
  710. Reopen(opts);
  711. const Snapshot* snapshot = nullptr;
  712. // Put a snapshot before the range tombstone, verify an iterator using that
  713. // snapshot sees all inserted keys.
  714. for (int i = 0; i < kNum; ++i) {
  715. if (i == kNum / 2) {
  716. snapshot = db_->GetSnapshot();
  717. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  718. GetNumericStr(kRangeBegin), GetNumericStr(kRangeEnd));
  719. }
  720. db_->Put(WriteOptions(), GetNumericStr(i), "val");
  721. }
  722. ReadOptions read_opts;
  723. read_opts.snapshot = snapshot;
  724. auto* iter = db_->NewIterator(read_opts);
  725. int expected = 0;
  726. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  727. ASSERT_EQ(GetNumericStr(expected), iter->key());
  728. ++expected;
  729. }
  730. ASSERT_EQ(kNum / 2, expected);
  731. delete iter;
  732. db_->ReleaseSnapshot(snapshot);
  733. }
  734. TEST_F(DBRangeDelTest, IteratorIgnoresRangeDeletions) {
  735. Options opts = CurrentOptions();
  736. opts.max_write_buffer_number = 4;
  737. opts.min_write_buffer_number_to_merge = 3;
  738. opts.memtable_factory.reset(new SpecialSkipListFactory(1));
  739. Reopen(opts);
  740. db_->Put(WriteOptions(), "sst_key", "val");
  741. // snapshot prevents key from being deleted during flush
  742. const Snapshot* snapshot = db_->GetSnapshot();
  743. ASSERT_OK(
  744. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  745. ASSERT_OK(db_->Flush(FlushOptions()));
  746. db_->Put(WriteOptions(), "imm_key", "val");
  747. ASSERT_OK(
  748. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  749. db_->Put(WriteOptions(), "mem_key", "val");
  750. ASSERT_OK(
  751. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  752. ReadOptions read_opts;
  753. read_opts.ignore_range_deletions = true;
  754. auto* iter = db_->NewIterator(read_opts);
  755. int i = 0;
  756. std::string expected[] = {"imm_key", "mem_key", "sst_key"};
  757. for (iter->SeekToFirst(); iter->Valid(); iter->Next(), ++i) {
  758. std::string key;
  759. ASSERT_EQ(expected[i], iter->key());
  760. }
  761. ASSERT_EQ(3, i);
  762. delete iter;
  763. db_->ReleaseSnapshot(snapshot);
  764. }
  765. #ifndef ROCKSDB_UBSAN_RUN
  766. TEST_F(DBRangeDelTest, TailingIteratorRangeTombstoneUnsupported) {
  767. db_->Put(WriteOptions(), "key", "val");
  768. // snapshot prevents key from being deleted during flush
  769. const Snapshot* snapshot = db_->GetSnapshot();
  770. ASSERT_OK(
  771. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
  772. // iterations check unsupported in memtable, l0, and then l1
  773. for (int i = 0; i < 3; ++i) {
  774. ReadOptions read_opts;
  775. read_opts.tailing = true;
  776. auto* iter = db_->NewIterator(read_opts);
  777. if (i == 2) {
  778. // For L1+, iterators over files are created on-demand, so need seek
  779. iter->SeekToFirst();
  780. }
  781. ASSERT_TRUE(iter->status().IsNotSupported());
  782. delete iter;
  783. if (i == 0) {
  784. ASSERT_OK(db_->Flush(FlushOptions()));
  785. } else if (i == 1) {
  786. MoveFilesToLevel(1);
  787. }
  788. }
  789. db_->ReleaseSnapshot(snapshot);
  790. }
  791. #endif // !ROCKSDB_UBSAN_RUN
  792. TEST_F(DBRangeDelTest, SubcompactionHasEmptyDedicatedRangeDelFile) {
  793. const int kNumFiles = 2, kNumKeysPerFile = 4;
  794. Options options = CurrentOptions();
  795. options.compression = kNoCompression;
  796. options.disable_auto_compactions = true;
  797. options.level0_file_num_compaction_trigger = kNumFiles;
  798. options.max_subcompactions = 2;
  799. options.num_levels = 2;
  800. options.target_file_size_base = 4096;
  801. Reopen(options);
  802. // need a L1 file for subcompaction to be triggered
  803. ASSERT_OK(
  804. db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(0), "val"));
  805. ASSERT_OK(db_->Flush(FlushOptions()));
  806. MoveFilesToLevel(1);
  807. // put enough keys to fill up the first subcompaction, and later range-delete
  808. // them so that the first subcompaction outputs no key-values. In that case
  809. // it'll consider making an SST file dedicated to range deletions.
  810. for (int i = 0; i < kNumKeysPerFile; ++i) {
  811. ASSERT_OK(db_->Put(WriteOptions(), db_->DefaultColumnFamily(), Key(i),
  812. std::string(1024, 'a')));
  813. }
  814. ASSERT_OK(db_->Flush(FlushOptions()));
  815. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
  816. Key(kNumKeysPerFile)));
  817. // the above range tombstone can be dropped, so that one alone won't cause a
  818. // dedicated file to be opened. We can make one protected by snapshot that
  819. // must be considered. Make its range outside the first subcompaction's range
  820. // to exercise the tricky part of the code.
  821. const Snapshot* snapshot = db_->GetSnapshot();
  822. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  823. Key(kNumKeysPerFile + 1),
  824. Key(kNumKeysPerFile + 2)));
  825. ASSERT_OK(db_->Flush(FlushOptions()));
  826. ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
  827. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  828. db_->EnableAutoCompaction({db_->DefaultColumnFamily()});
  829. dbfull()->TEST_WaitForCompact();
  830. db_->ReleaseSnapshot(snapshot);
  831. }
  832. TEST_F(DBRangeDelTest, MemtableBloomFilter) {
  833. // regression test for #2743. the range delete tombstones in memtable should
  834. // be added even when Get() skips searching due to its prefix bloom filter
  835. const int kMemtableSize = 1 << 20; // 1MB
  836. const int kMemtablePrefixFilterSize = 1 << 13; // 8KB
  837. const int kNumKeys = 1000;
  838. const int kPrefixLen = 8;
  839. Options options = CurrentOptions();
  840. options.memtable_prefix_bloom_size_ratio =
  841. static_cast<double>(kMemtablePrefixFilterSize) / kMemtableSize;
  842. options.prefix_extractor.reset(
  843. ROCKSDB_NAMESPACE::NewFixedPrefixTransform(kPrefixLen));
  844. options.write_buffer_size = kMemtableSize;
  845. Reopen(options);
  846. for (int i = 0; i < kNumKeys; ++i) {
  847. ASSERT_OK(Put(Key(i), "val"));
  848. }
  849. Flush();
  850. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
  851. Key(kNumKeys)));
  852. for (int i = 0; i < kNumKeys; ++i) {
  853. std::string value;
  854. ASSERT_TRUE(db_->Get(ReadOptions(), Key(i), &value).IsNotFound());
  855. }
  856. }
  857. TEST_F(DBRangeDelTest, CompactionTreatsSplitInputLevelDeletionAtomically) {
  858. // This test originally verified that compaction treated files containing a
  859. // split range deletion in the input level as an atomic unit. I.e.,
  860. // compacting any input-level file(s) containing a portion of the range
  861. // deletion causes all other input-level files containing portions of that
  862. // same range deletion to be included in the compaction. Range deletion
  863. // tombstones are now truncated to sstable boundaries which removed the need
  864. // for that behavior (which could lead to excessively large
  865. // compactions).
  866. const int kNumFilesPerLevel = 4, kValueBytes = 4 << 10;
  867. Options options = CurrentOptions();
  868. options.compression = kNoCompression;
  869. options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
  870. options.memtable_factory.reset(
  871. new SpecialSkipListFactory(2 /* num_entries_flush */));
  872. options.target_file_size_base = kValueBytes;
  873. // i == 0: CompactFiles
  874. // i == 1: CompactRange
  875. // i == 2: automatic compaction
  876. for (int i = 0; i < 3; ++i) {
  877. DestroyAndReopen(options);
  878. ASSERT_OK(Put(Key(0), ""));
  879. ASSERT_OK(db_->Flush(FlushOptions()));
  880. MoveFilesToLevel(2);
  881. ASSERT_EQ(1, NumTableFilesAtLevel(2));
  882. // snapshot protects range tombstone from dropping due to becoming obsolete.
  883. const Snapshot* snapshot = db_->GetSnapshot();
  884. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
  885. Key(2 * kNumFilesPerLevel));
  886. Random rnd(301);
  887. std::string value = RandomString(&rnd, kValueBytes);
  888. for (int j = 0; j < kNumFilesPerLevel; ++j) {
  889. // give files overlapping key-ranges to prevent trivial move
  890. ASSERT_OK(Put(Key(j), value));
  891. ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
  892. if (j > 0) {
  893. dbfull()->TEST_WaitForFlushMemTable();
  894. ASSERT_EQ(j, NumTableFilesAtLevel(0));
  895. }
  896. }
  897. // put extra key to trigger final flush
  898. ASSERT_OK(Put("", ""));
  899. dbfull()->TEST_WaitForFlushMemTable();
  900. dbfull()->TEST_WaitForCompact();
  901. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  902. ASSERT_EQ(kNumFilesPerLevel, NumTableFilesAtLevel(1));
  903. ColumnFamilyMetaData meta;
  904. db_->GetColumnFamilyMetaData(&meta);
  905. if (i == 0) {
  906. ASSERT_OK(db_->CompactFiles(
  907. CompactionOptions(), {meta.levels[1].files[0].name}, 2 /* level */));
  908. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  909. } else if (i == 1) {
  910. auto begin_str = Key(0), end_str = Key(1);
  911. Slice begin = begin_str, end = end_str;
  912. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin, &end));
  913. ASSERT_EQ(3, NumTableFilesAtLevel(1));
  914. } else if (i == 2) {
  915. ASSERT_OK(db_->SetOptions(db_->DefaultColumnFamily(),
  916. {{"max_bytes_for_level_base", "10000"}}));
  917. dbfull()->TEST_WaitForCompact();
  918. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  919. }
  920. ASSERT_GT(NumTableFilesAtLevel(2), 0);
  921. db_->ReleaseSnapshot(snapshot);
  922. }
  923. }
  924. TEST_F(DBRangeDelTest, RangeTombstoneEndKeyAsSstableUpperBound) {
  925. // Test the handling of the range-tombstone end-key as the
  926. // upper-bound for an sstable.
  927. const int kNumFilesPerLevel = 2, kValueBytes = 4 << 10;
  928. Options options = CurrentOptions();
  929. options.compression = kNoCompression;
  930. options.level0_file_num_compaction_trigger = kNumFilesPerLevel;
  931. options.memtable_factory.reset(
  932. new SpecialSkipListFactory(2 /* num_entries_flush */));
  933. options.target_file_size_base = kValueBytes;
  934. options.disable_auto_compactions = true;
  935. DestroyAndReopen(options);
  936. // Create an initial sstable at L2:
  937. // [key000000#1,1, key000000#1,1]
  938. ASSERT_OK(Put(Key(0), ""));
  939. ASSERT_OK(db_->Flush(FlushOptions()));
  940. MoveFilesToLevel(2);
  941. ASSERT_EQ(1, NumTableFilesAtLevel(2));
  942. // A snapshot protects the range tombstone from dropping due to
  943. // becoming obsolete.
  944. const Snapshot* snapshot = db_->GetSnapshot();
  945. db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  946. Key(0), Key(2 * kNumFilesPerLevel));
  947. // Create 2 additional sstables in L0. Note that the first sstable
  948. // contains the range tombstone.
  949. // [key000000#3,1, key000004#72057594037927935,15]
  950. // [key000001#5,1, key000002#6,1]
  951. Random rnd(301);
  952. std::string value = RandomString(&rnd, kValueBytes);
  953. for (int j = 0; j < kNumFilesPerLevel; ++j) {
  954. // Give files overlapping key-ranges to prevent a trivial move when we
  955. // compact from L0 to L1.
  956. ASSERT_OK(Put(Key(j), value));
  957. ASSERT_OK(Put(Key(2 * kNumFilesPerLevel - 1 - j), value));
  958. ASSERT_OK(db_->Flush(FlushOptions()));
  959. ASSERT_EQ(j + 1, NumTableFilesAtLevel(0));
  960. }
  961. // Compact the 2 L0 sstables to L1, resulting in the following LSM. There
  962. // are 2 sstables generated in L1 due to the target_file_size_base setting.
  963. // L1:
  964. // [key000000#3,1, key000002#72057594037927935,15]
  965. // [key000002#6,1, key000004#72057594037927935,15]
  966. // L2:
  967. // [key000000#1,1, key000000#1,1]
  968. MoveFilesToLevel(1);
  969. ASSERT_EQ(2, NumTableFilesAtLevel(1));
  970. {
  971. // Compact the second sstable in L1:
  972. // L1:
  973. // [key000000#3,1, key000002#72057594037927935,15]
  974. // L2:
  975. // [key000000#1,1, key000000#1,1]
  976. // [key000002#6,1, key000004#72057594037927935,15]
  977. //
  978. // At the same time, verify the compaction does not cause the key at the
  979. // endpoint (key000002#6,1) to disappear.
  980. ASSERT_EQ(value, Get(Key(2)));
  981. auto begin_str = Key(3);
  982. const ROCKSDB_NAMESPACE::Slice begin = begin_str;
  983. dbfull()->TEST_CompactRange(1, &begin, nullptr);
  984. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  985. ASSERT_EQ(2, NumTableFilesAtLevel(2));
  986. ASSERT_EQ(value, Get(Key(2)));
  987. }
  988. {
  989. // Compact the first sstable in L1. This should be copacetic, but
  990. // was previously resulting in overlapping sstables in L2 due to
  991. // mishandling of the range tombstone end-key when used as the
  992. // largest key for an sstable. The resulting LSM structure should
  993. // be:
  994. //
  995. // L2:
  996. // [key000000#1,1, key000001#72057594037927935,15]
  997. // [key000001#5,1, key000002#72057594037927935,15]
  998. // [key000002#6,1, key000004#72057594037927935,15]
  999. auto begin_str = Key(0);
  1000. const ROCKSDB_NAMESPACE::Slice begin = begin_str;
  1001. dbfull()->TEST_CompactRange(1, &begin, &begin);
  1002. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  1003. ASSERT_EQ(3, NumTableFilesAtLevel(2));
  1004. }
  1005. db_->ReleaseSnapshot(snapshot);
  1006. }
  1007. TEST_F(DBRangeDelTest, UnorderedTombstones) {
  1008. // Regression test for #2752. Range delete tombstones between
  1009. // different snapshot stripes are not stored in order, so the first
  1010. // tombstone of each snapshot stripe should be checked as a smallest
  1011. // candidate.
  1012. Options options = CurrentOptions();
  1013. DestroyAndReopen(options);
  1014. auto cf = db_->DefaultColumnFamily();
  1015. ASSERT_OK(db_->Put(WriteOptions(), cf, "a", "a"));
  1016. ASSERT_OK(db_->Flush(FlushOptions(), cf));
  1017. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  1018. ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr));
  1019. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  1020. ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "b", "c"));
  1021. // Hold a snapshot to separate these two delete ranges.
  1022. auto snapshot = db_->GetSnapshot();
  1023. ASSERT_OK(db_->DeleteRange(WriteOptions(), cf, "a", "b"));
  1024. ASSERT_OK(db_->Flush(FlushOptions(), cf));
  1025. db_->ReleaseSnapshot(snapshot);
  1026. std::vector<std::vector<FileMetaData>> files;
  1027. dbfull()->TEST_GetFilesMetaData(cf, &files);
  1028. ASSERT_EQ(1, files[0].size());
  1029. ASSERT_EQ("a", files[0][0].smallest.user_key());
  1030. ASSERT_EQ("c", files[0][0].largest.user_key());
  1031. std::string v;
  1032. auto s = db_->Get(ReadOptions(), "a", &v);
  1033. ASSERT_TRUE(s.IsNotFound());
  1034. }
  1035. class MockMergeOperator : public MergeOperator {
  1036. // Mock non-associative operator. Non-associativity is expressed by lack of
  1037. // implementation for any `PartialMerge*` functions.
  1038. public:
  1039. bool FullMergeV2(const MergeOperationInput& merge_in,
  1040. MergeOperationOutput* merge_out) const override {
  1041. assert(merge_out != nullptr);
  1042. merge_out->new_value = merge_in.operand_list.back().ToString();
  1043. return true;
  1044. }
  1045. const char* Name() const override { return "MockMergeOperator"; }
  1046. };
  1047. TEST_F(DBRangeDelTest, KeyAtOverlappingEndpointReappears) {
  1048. // This test uses a non-associative merge operator since that is a convenient
  1049. // way to get compaction to write out files with overlapping user-keys at the
  1050. // endpoints. Note, however, overlapping endpoints can also occur with other
  1051. // value types (Put, etc.), assuming the right snapshots are present.
  1052. const int kFileBytes = 1 << 20;
  1053. const int kValueBytes = 1 << 10;
  1054. const int kNumFiles = 4;
  1055. Options options = CurrentOptions();
  1056. options.compression = kNoCompression;
  1057. options.disable_auto_compactions = true;
  1058. options.merge_operator.reset(new MockMergeOperator());
  1059. options.target_file_size_base = kFileBytes;
  1060. Reopen(options);
  1061. // Push dummy data to L3 so that our actual test files on L0-L2
  1062. // will not be considered "bottommost" level, otherwise compaction
  1063. // may prevent us from creating overlapping user keys
  1064. // as on the bottommost layer MergeHelper
  1065. ASSERT_OK(db_->Merge(WriteOptions(), "key", "dummy"));
  1066. ASSERT_OK(db_->Flush(FlushOptions()));
  1067. MoveFilesToLevel(3);
  1068. Random rnd(301);
  1069. const Snapshot* snapshot = nullptr;
  1070. for (int i = 0; i < kNumFiles; ++i) {
  1071. for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
  1072. auto value = RandomString(&rnd, kValueBytes);
  1073. ASSERT_OK(db_->Merge(WriteOptions(), "key", value));
  1074. }
  1075. if (i == kNumFiles - 1) {
  1076. // Take snapshot to prevent covered merge operands from being dropped by
  1077. // compaction.
  1078. snapshot = db_->GetSnapshot();
  1079. // The DeleteRange is the last write so all merge operands are covered.
  1080. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  1081. "key", "key_"));
  1082. }
  1083. ASSERT_OK(db_->Flush(FlushOptions()));
  1084. }
  1085. ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
  1086. std::string value;
  1087. ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
  1088. dbfull()->TEST_CompactRange(0 /* level */, nullptr /* begin */,
  1089. nullptr /* end */, nullptr /* column_family */,
  1090. true /* disallow_trivial_move */);
  1091. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  1092. // Now we have multiple files at L1 all containing a single user key, thus
  1093. // guaranteeing overlap in the file endpoints.
  1094. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  1095. // Verify no merge operands reappeared after the compaction.
  1096. ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
  1097. // Compact and verify again. It's worthwhile because now the files have
  1098. // tighter endpoints, so we can verify that doesn't mess anything up.
  1099. dbfull()->TEST_CompactRange(1 /* level */, nullptr /* begin */,
  1100. nullptr /* end */, nullptr /* column_family */,
  1101. true /* disallow_trivial_move */);
  1102. ASSERT_GT(NumTableFilesAtLevel(2), 1);
  1103. ASSERT_TRUE(db_->Get(ReadOptions(), "key", &value).IsNotFound());
  1104. db_->ReleaseSnapshot(snapshot);
  1105. }
  1106. TEST_F(DBRangeDelTest, UntruncatedTombstoneDoesNotDeleteNewerKey) {
  1107. // Verify a key newer than a range tombstone cannot be deleted by being
  1108. // compacted to the bottom level (and thus having its seqnum zeroed) before
  1109. // the range tombstone. This used to happen when range tombstones were
  1110. // untruncated on reads such that they extended past their file boundaries.
  1111. //
  1112. // Test summary:
  1113. //
  1114. // - L1 is bottommost.
  1115. // - A couple snapshots are strategically taken to prevent seqnums from being
  1116. // zeroed, range tombstone from being dropped, merge operands from being
  1117. // dropped, and merge operands from being combined.
  1118. // - Left half of files in L1 all have same user key, ensuring their file
  1119. // boundaries overlap. In the past this would cause range tombstones to be
  1120. // untruncated.
  1121. // - Right half of L1 files all have different keys, ensuring no overlap.
  1122. // - A range tombstone spans all L1 keys, so it is stored in every L1 file.
  1123. // - Keys in the right side of the key-range are overwritten. These are
  1124. // compacted down to L1 after releasing snapshots such that their seqnums
  1125. // will be zeroed.
  1126. // - A full range scan is performed. If the tombstone in the left L1 files
  1127. // were untruncated, it would now cover keys newer than it (but with zeroed
  1128. // seqnums) in the right L1 files.
  1129. const int kFileBytes = 1 << 20;
  1130. const int kValueBytes = 1 << 10;
  1131. const int kNumFiles = 4;
  1132. const int kMaxKey = kNumFiles* kFileBytes / kValueBytes;
  1133. const int kKeysOverwritten = 10;
  1134. Options options = CurrentOptions();
  1135. options.compression = kNoCompression;
  1136. options.disable_auto_compactions = true;
  1137. options.merge_operator.reset(new MockMergeOperator());
  1138. options.num_levels = 2;
  1139. options.target_file_size_base = kFileBytes;
  1140. Reopen(options);
  1141. Random rnd(301);
  1142. // - snapshots[0] prevents merge operands from being combined during
  1143. // compaction.
  1144. // - snapshots[1] prevents merge operands from being dropped due to the
  1145. // covering range tombstone.
  1146. const Snapshot* snapshots[] = {nullptr, nullptr};
  1147. for (int i = 0; i < kNumFiles; ++i) {
  1148. for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
  1149. auto value = RandomString(&rnd, kValueBytes);
  1150. std::string key;
  1151. if (i < kNumFiles / 2) {
  1152. key = Key(0);
  1153. } else {
  1154. key = Key(1 + i * kFileBytes / kValueBytes + j);
  1155. }
  1156. ASSERT_OK(db_->Merge(WriteOptions(), key, value));
  1157. }
  1158. if (i == 0) {
  1159. snapshots[0] = db_->GetSnapshot();
  1160. }
  1161. if (i == kNumFiles - 1) {
  1162. snapshots[1] = db_->GetSnapshot();
  1163. // The DeleteRange is the last write so all merge operands are covered.
  1164. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  1165. Key(0), Key(kMaxKey + 1)));
  1166. }
  1167. ASSERT_OK(db_->Flush(FlushOptions()));
  1168. }
  1169. ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
  1170. auto get_key_count = [this]() -> int {
  1171. auto* iter = db_->NewIterator(ReadOptions());
  1172. iter->SeekToFirst();
  1173. int keys_found = 0;
  1174. for (; iter->Valid(); iter->Next()) {
  1175. ++keys_found;
  1176. }
  1177. delete iter;
  1178. return keys_found;
  1179. };
  1180. // All keys should be covered
  1181. ASSERT_EQ(0, get_key_count());
  1182. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
  1183. nullptr /* end_key */));
  1184. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  1185. // Roughly the left half of L1 files should have overlapping boundary keys,
  1186. // while the right half should not.
  1187. ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
  1188. // Now overwrite a few keys that are in L1 files that definitely don't have
  1189. // overlapping boundary keys.
  1190. for (int i = kMaxKey; i > kMaxKey - kKeysOverwritten; --i) {
  1191. auto value = RandomString(&rnd, kValueBytes);
  1192. ASSERT_OK(db_->Merge(WriteOptions(), Key(i), value));
  1193. }
  1194. ASSERT_OK(db_->Flush(FlushOptions()));
  1195. // The overwritten keys are in L0 now, so clearly aren't covered by the range
  1196. // tombstone in L1.
  1197. ASSERT_EQ(kKeysOverwritten, get_key_count());
  1198. // Release snapshots so seqnums can be zeroed when L0->L1 happens.
  1199. db_->ReleaseSnapshot(snapshots[0]);
  1200. db_->ReleaseSnapshot(snapshots[1]);
  1201. auto begin_key_storage = Key(kMaxKey - kKeysOverwritten + 1);
  1202. auto end_key_storage = Key(kMaxKey);
  1203. Slice begin_key(begin_key_storage);
  1204. Slice end_key(end_key_storage);
  1205. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &begin_key, &end_key));
  1206. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  1207. ASSERT_GE(NumTableFilesAtLevel(1), kNumFiles);
  1208. ASSERT_EQ(kKeysOverwritten, get_key_count());
  1209. }
  1210. TEST_F(DBRangeDelTest, DeletedMergeOperandReappearsIterPrev) {
  1211. // Exposes a bug where we were using
  1212. // `RangeDelPositioningMode::kBackwardTraversal` while scanning merge operands
  1213. // in the forward direction. Confusingly, this case happened during
  1214. // `DBIter::Prev`. It could cause assertion failure, or reappearing keys.
  1215. const int kFileBytes = 1 << 20;
  1216. const int kValueBytes = 1 << 10;
  1217. // Need multiple keys so we can get results when calling `Prev()` after
  1218. // `SeekToLast()`.
  1219. const int kNumKeys = 3;
  1220. const int kNumFiles = 4;
  1221. Options options = CurrentOptions();
  1222. options.compression = kNoCompression;
  1223. options.disable_auto_compactions = true;
  1224. options.merge_operator.reset(new MockMergeOperator());
  1225. options.target_file_size_base = kFileBytes;
  1226. Reopen(options);
  1227. Random rnd(301);
  1228. const Snapshot* snapshot = nullptr;
  1229. for (int i = 0; i < kNumFiles; ++i) {
  1230. for (int j = 0; j < kFileBytes / kValueBytes; ++j) {
  1231. auto value = RandomString(&rnd, kValueBytes);
  1232. ASSERT_OK(db_->Merge(WriteOptions(), Key(j % kNumKeys), value));
  1233. if (i == 0 && j == kNumKeys) {
  1234. // Take snapshot to prevent covered merge operands from being dropped or
  1235. // merged by compaction.
  1236. snapshot = db_->GetSnapshot();
  1237. // Do a DeleteRange near the beginning so only the oldest merge operand
  1238. // for each key is covered. This ensures the sequence of events:
  1239. //
  1240. // - `DBIter::Prev()` is called
  1241. // - After several same versions of the same user key are encountered,
  1242. // it decides to seek using `DBIter::FindValueForCurrentKeyUsingSeek`.
  1243. // - Binary searches to the newest version of the key, which is in the
  1244. // leftmost file containing the user key.
  1245. // - Scans forwards to collect all merge operands. Eventually reaches
  1246. // the rightmost file containing the oldest merge operand, which
  1247. // should be covered by the `DeleteRange`. If `RangeDelAggregator`
  1248. // were not properly using `kForwardTraversal` here, that operand
  1249. // would reappear.
  1250. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  1251. Key(0), Key(kNumKeys + 1)));
  1252. }
  1253. }
  1254. ASSERT_OK(db_->Flush(FlushOptions()));
  1255. }
  1256. ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(0));
  1257. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr /* begin_key */,
  1258. nullptr /* end_key */));
  1259. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  1260. ASSERT_GT(NumTableFilesAtLevel(1), 1);
  1261. auto* iter = db_->NewIterator(ReadOptions());
  1262. iter->SeekToLast();
  1263. int keys_found = 0;
  1264. for (; iter->Valid(); iter->Prev()) {
  1265. ++keys_found;
  1266. }
  1267. delete iter;
  1268. ASSERT_EQ(kNumKeys, keys_found);
  1269. db_->ReleaseSnapshot(snapshot);
  1270. }
  1271. TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeys) {
  1272. const int kFileBytes = 1 << 20;
  1273. Options options = CurrentOptions();
  1274. options.compression = kNoCompression;
  1275. options.disable_auto_compactions = true;
  1276. options.target_file_size_base = kFileBytes;
  1277. Reopen(options);
  1278. ASSERT_OK(Put(Key(0), "a"));
  1279. const Snapshot* snapshot = db_->GetSnapshot();
  1280. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
  1281. Key(10)));
  1282. db_->Flush(FlushOptions());
  1283. ReadOptions read_opts;
  1284. read_opts.snapshot = snapshot;
  1285. auto* iter = db_->NewIterator(read_opts);
  1286. iter->SeekToFirst();
  1287. ASSERT_TRUE(iter->Valid());
  1288. ASSERT_EQ(Key(0), iter->key());
  1289. iter->Next();
  1290. ASSERT_FALSE(iter->Valid());
  1291. delete iter;
  1292. db_->ReleaseSnapshot(snapshot);
  1293. }
  1294. TEST_F(DBRangeDelTest, SnapshotPreventsDroppedKeysInImmMemTables) {
  1295. const int kFileBytes = 1 << 20;
  1296. Options options = CurrentOptions();
  1297. options.compression = kNoCompression;
  1298. options.disable_auto_compactions = true;
  1299. options.target_file_size_base = kFileBytes;
  1300. Reopen(options);
  1301. // block flush thread -> pin immtables in memory
  1302. SyncPoint::GetInstance()->DisableProcessing();
  1303. SyncPoint::GetInstance()->LoadDependency({
  1304. {"SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator",
  1305. "DBImpl::BGWorkFlush"},
  1306. });
  1307. SyncPoint::GetInstance()->EnableProcessing();
  1308. ASSERT_OK(Put(Key(0), "a"));
  1309. std::unique_ptr<const Snapshot, std::function<void(const Snapshot*)>>
  1310. snapshot(db_->GetSnapshot(),
  1311. [this](const Snapshot* s) { db_->ReleaseSnapshot(s); });
  1312. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(0),
  1313. Key(10)));
  1314. ASSERT_OK(dbfull()->TEST_SwitchMemtable());
  1315. ReadOptions read_opts;
  1316. read_opts.snapshot = snapshot.get();
  1317. std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
  1318. TEST_SYNC_POINT("SnapshotPreventsDroppedKeysInImmMemTables:AfterNewIterator");
  1319. iter->SeekToFirst();
  1320. ASSERT_TRUE(iter->Valid());
  1321. ASSERT_EQ(Key(0), iter->key());
  1322. iter->Next();
  1323. ASSERT_FALSE(iter->Valid());
  1324. }
  1325. TEST_F(DBRangeDelTest, RangeTombstoneWrittenToMinimalSsts) {
  1326. // Adapted from
  1327. // https://github.com/cockroachdb/cockroach/blob/de8b3ea603dd1592d9dc26443c2cc92c356fbc2f/pkg/storage/engine/rocksdb_test.go#L1267-L1398.
  1328. // Regression test for issue where range tombstone was written to more files
  1329. // than necessary when it began exactly at the begin key in the next
  1330. // compaction output file.
  1331. const int kFileBytes = 1 << 20;
  1332. const int kValueBytes = 4 << 10;
  1333. Options options = CurrentOptions();
  1334. options.compression = kNoCompression;
  1335. options.disable_auto_compactions = true;
  1336. // Have a bit of slack in the size limits but we enforce them more strictly
  1337. // when manually flushing/compacting.
  1338. options.max_compaction_bytes = 2 * kFileBytes;
  1339. options.target_file_size_base = 2 * kFileBytes;
  1340. options.write_buffer_size = 2 * kFileBytes;
  1341. Reopen(options);
  1342. Random rnd(301);
  1343. for (char first_char : {'a', 'b', 'c'}) {
  1344. for (int i = 0; i < kFileBytes / kValueBytes; ++i) {
  1345. std::string key(1, first_char);
  1346. key.append(Key(i));
  1347. std::string value = RandomString(&rnd, kValueBytes);
  1348. ASSERT_OK(Put(key, value));
  1349. }
  1350. db_->Flush(FlushOptions());
  1351. MoveFilesToLevel(2);
  1352. }
  1353. ASSERT_EQ(0, NumTableFilesAtLevel(0));
  1354. ASSERT_EQ(3, NumTableFilesAtLevel(2));
  1355. // Populate the memtable lightly while spanning the whole key-space. The
  1356. // setting of `max_compaction_bytes` will cause the L0->L1 to output multiple
  1357. // files to prevent a large L1->L2 compaction later.
  1358. ASSERT_OK(Put("a", "val"));
  1359. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
  1360. "c" + Key(1), "d"));
  1361. // Our compaction output file cutting logic currently only considers point
  1362. // keys. So, in order for the range tombstone to have a chance at landing at
  1363. // the start of a new file, we need a point key at the range tombstone's
  1364. // start.
  1365. // TODO(ajkr): remove this `Put` after file cutting accounts for range
  1366. // tombstones (#3977).
  1367. ASSERT_OK(Put("c" + Key(1), "value"));
  1368. db_->Flush(FlushOptions());
  1369. // Ensure manual L0->L1 compaction cuts the outputs before the range tombstone
  1370. // and the range tombstone is only placed in the second SST.
  1371. std::string begin_key_storage("c" + Key(1));
  1372. Slice begin_key(begin_key_storage);
  1373. std::string end_key_storage("d");
  1374. Slice end_key(end_key_storage);
  1375. dbfull()->TEST_CompactRange(0 /* level */, &begin_key /* begin */,
  1376. &end_key /* end */, nullptr /* column_family */,
  1377. true /* disallow_trivial_move */);
  1378. ASSERT_EQ(2, NumTableFilesAtLevel(1));
  1379. std::vector<LiveFileMetaData> all_metadata;
  1380. std::vector<LiveFileMetaData> l1_metadata;
  1381. db_->GetLiveFilesMetaData(&all_metadata);
  1382. for (const auto& metadata : all_metadata) {
  1383. if (metadata.level == 1) {
  1384. l1_metadata.push_back(metadata);
  1385. }
  1386. }
  1387. std::sort(l1_metadata.begin(), l1_metadata.end(),
  1388. [&](const LiveFileMetaData& a, const LiveFileMetaData& b) {
  1389. return options.comparator->Compare(a.smallestkey, b.smallestkey) <
  1390. 0;
  1391. });
  1392. ASSERT_EQ("a", l1_metadata[0].smallestkey);
  1393. ASSERT_EQ("a", l1_metadata[0].largestkey);
  1394. ASSERT_EQ("c" + Key(1), l1_metadata[1].smallestkey);
  1395. ASSERT_EQ("d", l1_metadata[1].largestkey);
  1396. TablePropertiesCollection all_table_props;
  1397. ASSERT_OK(db_->GetPropertiesOfAllTables(&all_table_props));
  1398. int64_t num_range_deletions = 0;
  1399. for (const auto& name_and_table_props : all_table_props) {
  1400. const auto& name = name_and_table_props.first;
  1401. const auto& table_props = name_and_table_props.second;
  1402. // The range tombstone should only be output to the second L1 SST.
  1403. if (name.size() >= l1_metadata[1].name.size() &&
  1404. name.substr(name.size() - l1_metadata[1].name.size()).compare(l1_metadata[1].name) == 0) {
  1405. ASSERT_EQ(1, table_props->num_range_deletions);
  1406. ++num_range_deletions;
  1407. } else {
  1408. ASSERT_EQ(0, table_props->num_range_deletions);
  1409. }
  1410. }
  1411. ASSERT_EQ(1, num_range_deletions);
  1412. }
  1413. TEST_F(DBRangeDelTest, OverlappedTombstones) {
  1414. const int kNumPerFile = 4, kNumFiles = 2;
  1415. Options options = CurrentOptions();
  1416. options.disable_auto_compactions = true;
  1417. options.max_compaction_bytes = 9 * 1024;
  1418. DestroyAndReopen(options);
  1419. Random rnd(301);
  1420. for (int i = 0; i < kNumFiles; ++i) {
  1421. std::vector<std::string> values;
  1422. // Write 12K (4 values, each 3K)
  1423. for (int j = 0; j < kNumPerFile; j++) {
  1424. values.push_back(RandomString(&rnd, 3 << 10));
  1425. ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
  1426. }
  1427. }
  1428. ASSERT_OK(db_->Flush(FlushOptions()));
  1429. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  1430. MoveFilesToLevel(2);
  1431. ASSERT_EQ(2, NumTableFilesAtLevel(2));
  1432. ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), Key(1),
  1433. Key((kNumFiles)*kNumPerFile + 1)));
  1434. ASSERT_OK(db_->Flush(FlushOptions()));
  1435. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  1436. dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
  1437. true /* disallow_trivial_move */);
  1438. // The tombstone range is not broken up into multiple SSTs which may incur a
  1439. // large compaction with L2.
  1440. ASSERT_EQ(1, NumTableFilesAtLevel(1));
  1441. std::vector<std::vector<FileMetaData>> files;
  1442. dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
  1443. true /* disallow_trivial_move */);
  1444. ASSERT_EQ(1, NumTableFilesAtLevel(2));
  1445. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  1446. }
  1447. TEST_F(DBRangeDelTest, OverlappedKeys) {
  1448. const int kNumPerFile = 4, kNumFiles = 2;
  1449. Options options = CurrentOptions();
  1450. options.disable_auto_compactions = true;
  1451. options.max_compaction_bytes = 9 * 1024;
  1452. DestroyAndReopen(options);
  1453. Random rnd(301);
  1454. for (int i = 0; i < kNumFiles; ++i) {
  1455. std::vector<std::string> values;
  1456. // Write 12K (4 values, each 3K)
  1457. for (int j = 0; j < kNumPerFile; j++) {
  1458. values.push_back(RandomString(&rnd, 3 << 10));
  1459. ASSERT_OK(Put(Key(i * kNumPerFile + j), values[j]));
  1460. }
  1461. }
  1462. ASSERT_OK(db_->Flush(FlushOptions()));
  1463. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  1464. MoveFilesToLevel(2);
  1465. ASSERT_EQ(2, NumTableFilesAtLevel(2));
  1466. for (int i = 1; i < kNumFiles * kNumPerFile + 1; i++) {
  1467. ASSERT_OK(Put(Key(i), "0x123"));
  1468. }
  1469. ASSERT_OK(db_->Flush(FlushOptions()));
  1470. ASSERT_EQ(1, NumTableFilesAtLevel(0));
  1471. // The key range is broken up into three SSTs to avoid a future big compaction
  1472. // with the grandparent
  1473. dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
  1474. true /* disallow_trivial_move */);
  1475. ASSERT_EQ(3, NumTableFilesAtLevel(1));
  1476. std::vector<std::vector<FileMetaData>> files;
  1477. dbfull()->TEST_CompactRange(1, nullptr, nullptr, nullptr,
  1478. true /* disallow_trivial_move */);
  1479. ASSERT_EQ(1, NumTableFilesAtLevel(2));
  1480. ASSERT_EQ(0, NumTableFilesAtLevel(1));
  1481. }
  1482. #endif // ROCKSDB_LITE
  1483. } // namespace ROCKSDB_NAMESPACE
  1484. int main(int argc, char** argv) {
  1485. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1486. ::testing::InitGoogleTest(&argc, argv);
  1487. return RUN_ALL_TESTS();
  1488. }