seqno_time_test.cc 57 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650
  1. // Copyright (c) Meta Platforms, Inc. and affiliates.
  2. //
  3. // This source code is licensed under both the GPLv2 (found in the
  4. // COPYING file in the root directory) and Apache 2.0 License
  5. // (found in the LICENSE.Apache file in the root directory).
  6. #include "db/db_test_util.h"
  7. #include "db/periodic_task_scheduler.h"
  8. #include "db/seqno_to_time_mapping.h"
  9. #include "port/stack_trace.h"
  10. #include "rocksdb/iostats_context.h"
  11. #include "rocksdb/utilities/debug.h"
  12. #include "test_util/mock_time_env.h"
  13. namespace ROCKSDB_NAMESPACE {
  14. class SeqnoTimeTest : public DBTestBase {
  15. public:
  16. SeqnoTimeTest() : DBTestBase("seqno_time_test", /*env_do_fsync=*/false) {
  17. mock_clock_ = std::make_shared<MockSystemClock>(env_->GetSystemClock());
  18. mock_clock_->SetCurrentTime(kMockStartTime);
  19. mock_env_ = std::make_unique<CompositeEnvWrapper>(env_, mock_clock_);
  20. }
  21. protected:
  22. std::unique_ptr<Env> mock_env_;
  23. std::shared_ptr<MockSystemClock> mock_clock_;
  24. // Sufficient starting time that preserve time doesn't under-flow into
  25. // pre-history
  26. static constexpr uint32_t kMockStartTime = 10000000;
  27. void SetUp() override {
  28. mock_clock_->InstallTimedWaitFixCallback();
  29. SyncPoint::GetInstance()->SetCallBack(
  30. "DBImpl::StartPeriodicTaskScheduler:Init",
  31. [mock_clock = mock_clock_](void* arg) {
  32. auto periodic_task_scheduler_ptr =
  33. static_cast<PeriodicTaskScheduler*>(arg);
  34. periodic_task_scheduler_ptr->TEST_OverrideTimer(mock_clock.get());
  35. });
  36. mock_clock_->SetCurrentTime(kMockStartTime);
  37. }
  38. // make sure the file is not in cache, otherwise it won't have IO info
  39. void AssertKeyTemperature(int key_id, Temperature expected_temperature) {
  40. get_iostats_context()->Reset();
  41. IOStatsContext* iostats = get_iostats_context();
  42. std::string result = Get(Key(key_id));
  43. ASSERT_FALSE(result.empty());
  44. ASSERT_GT(iostats->bytes_read, 0);
  45. switch (expected_temperature) {
  46. case Temperature::kUnknown:
  47. ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_read_count,
  48. 0);
  49. ASSERT_EQ(iostats->file_io_stats_by_temperature.cold_file_bytes_read,
  50. 0);
  51. break;
  52. case Temperature::kCold:
  53. ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_read_count,
  54. 0);
  55. ASSERT_GT(iostats->file_io_stats_by_temperature.cold_file_bytes_read,
  56. 0);
  57. break;
  58. default:
  59. // the test only support kCold now for the bottommost temperature
  60. FAIL();
  61. }
  62. }
  63. };
  64. TEST_F(SeqnoTimeTest, TemperatureBasicUniversal) {
  65. const int kNumTrigger = 4;
  66. const int kNumLevels = 7;
  67. const int kNumKeys = 100;
  68. const int kKeyPerSec = 10;
  69. Options options = CurrentOptions();
  70. options.compaction_style = kCompactionStyleUniversal;
  71. options.preclude_last_level_data_seconds = 10000;
  72. options.env = mock_env_.get();
  73. options.last_level_temperature = Temperature::kCold;
  74. options.num_levels = kNumLevels;
  75. DestroyAndReopen(options);
  76. int sst_num = 0;
  77. // Write files that are overlap and enough to trigger compaction
  78. for (; sst_num < kNumTrigger; sst_num++) {
  79. for (int i = 0; i < kNumKeys; i++) {
  80. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  81. dbfull()->TEST_WaitForPeriodicTaskRun([&] {
  82. mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
  83. });
  84. }
  85. ASSERT_OK(Flush());
  86. }
  87. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  88. // All data is hot, only output to proximal level
  89. ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
  90. ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
  91. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  92. // read a random key, which should be hot (kUnknown)
  93. AssertKeyTemperature(20, Temperature::kUnknown);
  94. // Write more data, but still all hot until the 10th SST, as:
  95. // write a key every 10 seconds, 100 keys per SST, each SST takes 1000 seconds
  96. // The preclude_last_level_data_seconds is 10k
  97. for (; sst_num < kNumTrigger * 2; sst_num++) {
  98. for (int i = 0; i < kNumKeys; i++) {
  99. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  100. dbfull()->TEST_WaitForPeriodicTaskRun([&] {
  101. mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
  102. });
  103. }
  104. ASSERT_OK(Flush());
  105. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  106. ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
  107. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  108. }
  109. // Now we have both hot data and cold data
  110. for (; sst_num < kNumTrigger * 3; sst_num++) {
  111. for (int i = 0; i < kNumKeys; i++) {
  112. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  113. dbfull()->TEST_WaitForPeriodicTaskRun([&] {
  114. mock_clock_->MockSleepForSeconds(static_cast<int>(kKeyPerSec));
  115. });
  116. }
  117. ASSERT_OK(Flush());
  118. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  119. }
  120. CompactRangeOptions cro;
  121. cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  122. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  123. uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
  124. uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold);
  125. ASSERT_GT(hot_data_size, 0);
  126. ASSERT_GT(cold_data_size, 0);
  127. // the first a few key should be cold
  128. AssertKeyTemperature(20, Temperature::kCold);
  129. for (int i = 0; i < 30; i++) {
  130. dbfull()->TEST_WaitForPeriodicTaskRun([&] {
  131. mock_clock_->MockSleepForSeconds(static_cast<int>(20 * kKeyPerSec));
  132. });
  133. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  134. // the hot/cold data cut off range should be between i * 20 + 200 -> 250
  135. AssertKeyTemperature(i * 20 + 250, Temperature::kUnknown);
  136. AssertKeyTemperature(i * 20 + 200, Temperature::kCold);
  137. }
  138. ASSERT_LT(GetSstSizeHelper(Temperature::kUnknown), hot_data_size);
  139. ASSERT_GT(GetSstSizeHelper(Temperature::kCold), cold_data_size);
  140. // Wait again, the most of the data should be cold after that
  141. // but it may not be all cold, because if there's no new data write to SST,
  142. // the compaction will not get the new seqno->time sampling to decide the last
  143. // a few data's time.
  144. for (int i = 0; i < 5; i++) {
  145. dbfull()->TEST_WaitForPeriodicTaskRun(
  146. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1000)); });
  147. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  148. }
  149. // any random data close to the end should be cold
  150. AssertKeyTemperature(1000, Temperature::kCold);
  151. // close explicitly, because the env is local variable which will be released
  152. // first.
  153. Close();
  154. }
  155. TEST_F(SeqnoTimeTest, TemperatureBasicLevel) {
  156. const int kNumLevels = 7;
  157. const int kNumKeys = 100;
  158. Options options = CurrentOptions();
  159. options.preclude_last_level_data_seconds = 10000;
  160. options.env = mock_env_.get();
  161. options.last_level_temperature = Temperature::kCold;
  162. options.num_levels = kNumLevels;
  163. options.level_compaction_dynamic_level_bytes = true;
  164. // TODO(zjay): for level compaction, auto-compaction may stuck in deadloop, if
  165. // the proximal level score > 1, but the hot is not cold enough to compact
  166. // to last level, which will keep triggering compaction.
  167. options.disable_auto_compactions = true;
  168. DestroyAndReopen(options);
  169. int sst_num = 0;
  170. // Write files that are overlap
  171. for (; sst_num < 4; sst_num++) {
  172. for (int i = 0; i < kNumKeys; i++) {
  173. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  174. dbfull()->TEST_WaitForPeriodicTaskRun(
  175. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
  176. }
  177. ASSERT_OK(Flush());
  178. }
  179. CompactRangeOptions cro;
  180. cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  181. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  182. // All data is hot, only output to proximal level
  183. ASSERT_EQ("0,0,0,0,0,1", FilesPerLevel());
  184. ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
  185. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  186. // read a random key, which should be hot (kUnknown)
  187. AssertKeyTemperature(20, Temperature::kUnknown);
  188. // Adding more data to have mixed hot and cold data
  189. for (; sst_num < 14; sst_num++) {
  190. for (int i = 0; i < kNumKeys; i++) {
  191. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  192. dbfull()->TEST_WaitForPeriodicTaskRun(
  193. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
  194. }
  195. ASSERT_OK(Flush());
  196. }
  197. // Second to last level
  198. MoveFilesToLevel(5);
  199. ASSERT_GT(GetSstSizeHelper(Temperature::kUnknown), 0);
  200. ASSERT_EQ(GetSstSizeHelper(Temperature::kCold), 0);
  201. // Compact the files to the last level which should split the hot/cold data
  202. MoveFilesToLevel(6);
  203. uint64_t hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
  204. uint64_t cold_data_size = GetSstSizeHelper(Temperature::kCold);
  205. ASSERT_GT(hot_data_size, 0);
  206. ASSERT_GT(cold_data_size, 0);
  207. // the first a few key should be cold
  208. AssertKeyTemperature(20, Temperature::kCold);
  209. // Wait some time, with each wait, the cold data is increasing and hot data is
  210. // decreasing
  211. for (int i = 0; i < 30; i++) {
  212. dbfull()->TEST_WaitForPeriodicTaskRun(
  213. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(200)); });
  214. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  215. uint64_t pre_hot = hot_data_size;
  216. uint64_t pre_cold = cold_data_size;
  217. hot_data_size = GetSstSizeHelper(Temperature::kUnknown);
  218. cold_data_size = GetSstSizeHelper(Temperature::kCold);
  219. ASSERT_LT(hot_data_size, pre_hot);
  220. ASSERT_GT(cold_data_size, pre_cold);
  221. // the hot/cold cut_off key should be around i * 20 + 400 -> 450
  222. AssertKeyTemperature(i * 20 + 450, Temperature::kUnknown);
  223. AssertKeyTemperature(i * 20 + 400, Temperature::kCold);
  224. }
  225. // Wait again, the most of the data should be cold after that
  226. // hot data might not be empty, because if we don't write new data, there's
  227. // no seqno->time sampling available to the compaction
  228. for (int i = 0; i < 5; i++) {
  229. dbfull()->TEST_WaitForPeriodicTaskRun(
  230. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1000)); });
  231. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  232. }
  233. // any random data close to the end should be cold
  234. AssertKeyTemperature(1000, Temperature::kCold);
  235. Close();
  236. }
  237. enum class SeqnoTimeTestType : char {
  238. kTrackInternalTimeSeconds = 0,
  239. kPrecludeLastLevel = 1,
  240. kBothSetTrackSmaller = 2,
  241. };
  242. class SeqnoTimeTablePropTest
  243. : public SeqnoTimeTest,
  244. public ::testing::WithParamInterface<SeqnoTimeTestType> {
  245. public:
  246. SeqnoTimeTablePropTest() : SeqnoTimeTest() {}
  247. void SetTrackTimeDurationOptions(uint64_t track_time_duration,
  248. Options& options) const {
  249. // either option set will enable the time tracking feature
  250. switch (GetParam()) {
  251. case SeqnoTimeTestType::kTrackInternalTimeSeconds:
  252. options.preclude_last_level_data_seconds = 0;
  253. options.preserve_internal_time_seconds = track_time_duration;
  254. break;
  255. case SeqnoTimeTestType::kPrecludeLastLevel:
  256. options.preclude_last_level_data_seconds = track_time_duration;
  257. options.preserve_internal_time_seconds = 0;
  258. break;
  259. case SeqnoTimeTestType::kBothSetTrackSmaller:
  260. options.preclude_last_level_data_seconds = track_time_duration;
  261. options.preserve_internal_time_seconds = track_time_duration / 10;
  262. break;
  263. }
  264. }
  265. };
  266. INSTANTIATE_TEST_CASE_P(
  267. SeqnoTimeTablePropTest, SeqnoTimeTablePropTest,
  268. ::testing::Values(SeqnoTimeTestType::kTrackInternalTimeSeconds,
  269. SeqnoTimeTestType::kPrecludeLastLevel,
  270. SeqnoTimeTestType::kBothSetTrackSmaller));
  271. TEST_P(SeqnoTimeTablePropTest, BasicSeqnoToTimeMapping) {
  272. Options options = CurrentOptions();
  273. SetTrackTimeDurationOptions(10000, options);
  274. options.env = mock_env_.get();
  275. options.disable_auto_compactions = true;
  276. DestroyAndReopen(options);
  277. std::set<uint64_t> checked_file_nums;
  278. SequenceNumber start_seq = dbfull()->GetLatestSequenceNumber() + 1;
  279. uint64_t start_time = mock_clock_->NowSeconds();
  280. // Write a key every 10 seconds
  281. for (int i = 0; i < 200; i++) {
  282. ASSERT_OK(Put(Key(i), "value"));
  283. dbfull()->TEST_WaitForPeriodicTaskRun(
  284. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
  285. }
  286. ASSERT_OK(Flush());
  287. TablePropertiesCollection tables_props;
  288. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  289. ASSERT_EQ(tables_props.size(), 1);
  290. auto it = tables_props.begin();
  291. SeqnoToTimeMapping tp_mapping;
  292. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  293. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  294. ASSERT_FALSE(tp_mapping.Empty());
  295. auto seqs = tp_mapping.TEST_GetInternalMapping();
  296. // about ~20 seqs->time entries, because the sample rate is 10000/100, and it
  297. // passes 2k time. Add (roughly) one for starting entry.
  298. // Revised: with automatic pre-population of mappings, some of these entries
  299. // might be purged to keep the DB mapping within capacity.
  300. EXPECT_GE(seqs.size(), 20 / 2);
  301. EXPECT_LE(seqs.size(), 22);
  302. auto ValidateProximalSeqnos = [&](const char* name, double fuzz_ratio) {
  303. SequenceNumber seq_end = dbfull()->GetLatestSequenceNumber() + 1;
  304. uint64_t end_time = mock_clock_->NowSeconds();
  305. uint64_t seqno_fuzz =
  306. static_cast<uint64_t>((seq_end - start_seq) * fuzz_ratio + 0.999999);
  307. for (unsigned time_pct = 0; time_pct <= 100; time_pct++) {
  308. SCOPED_TRACE("name=" + std::string(name) +
  309. " time_pct=" + std::to_string(time_pct));
  310. // Validate the important proximal API (GetProximalSeqnoBeforeTime)
  311. uint64_t t = start_time + time_pct * (end_time - start_time) / 100;
  312. auto seqno_reported = tp_mapping.GetProximalSeqnoBeforeTime(t);
  313. auto seqno_expected = start_seq + time_pct * (seq_end - start_seq) / 100;
  314. EXPECT_LE(seqno_reported, seqno_expected);
  315. if (end_time - t < 10000) {
  316. EXPECT_LE(seqno_expected, seqno_reported + seqno_fuzz);
  317. }
  318. }
  319. start_seq = seq_end;
  320. start_time = end_time;
  321. };
  322. ValidateProximalSeqnos("a", 0.1);
  323. checked_file_nums.insert(it->second->orig_file_number);
  324. // Write a key every 1 seconds
  325. for (int i = 0; i < 200; i++) {
  326. ASSERT_OK(Put(Key(i + 190), "value"));
  327. dbfull()->TEST_WaitForPeriodicTaskRun(
  328. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(1)); });
  329. }
  330. ASSERT_OK(Flush());
  331. tables_props.clear();
  332. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  333. ASSERT_EQ(tables_props.size(), 2);
  334. it = tables_props.begin();
  335. while (it != tables_props.end()) {
  336. if (!checked_file_nums.count(it->second->orig_file_number)) {
  337. break;
  338. }
  339. it++;
  340. }
  341. ASSERT_TRUE(it != tables_props.end());
  342. tp_mapping.Clear();
  343. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  344. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  345. seqs = tp_mapping.TEST_GetInternalMapping();
  346. // There only a few time sample
  347. ASSERT_GE(seqs.size(), 1);
  348. ASSERT_LE(seqs.size(), 3);
  349. // High fuzz ratio because of low number of samples
  350. ValidateProximalSeqnos("b", 0.5);
  351. checked_file_nums.insert(it->second->orig_file_number);
  352. // Write a key every 200 seconds
  353. for (int i = 0; i < 200; i++) {
  354. ASSERT_OK(Put(Key(i + 380), "value"));
  355. dbfull()->TEST_WaitForPeriodicTaskRun(
  356. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(200)); });
  357. }
  358. // seq_end = dbfull()->GetLatestSequenceNumber() + 1;
  359. ASSERT_OK(Flush());
  360. tables_props.clear();
  361. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  362. ASSERT_EQ(tables_props.size(), 3);
  363. it = tables_props.begin();
  364. while (it != tables_props.end()) {
  365. if (!checked_file_nums.count(it->second->orig_file_number)) {
  366. break;
  367. }
  368. it++;
  369. }
  370. ASSERT_TRUE(it != tables_props.end());
  371. tp_mapping.Clear();
  372. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  373. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  374. seqs = tp_mapping.TEST_GetInternalMapping();
  375. // For the preserved time span, only 10000/200=50 (+1) entries were recorded
  376. ASSERT_GE(seqs.size(), 50);
  377. ASSERT_LE(seqs.size(), 51);
  378. ValidateProximalSeqnos("c", 0.04);
  379. checked_file_nums.insert(it->second->orig_file_number);
  380. // Write a key every 100 seconds
  381. for (int i = 0; i < 200; i++) {
  382. ASSERT_OK(Put(Key(i + 570), "value"));
  383. dbfull()->TEST_WaitForPeriodicTaskRun(
  384. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  385. }
  386. ASSERT_OK(Flush());
  387. tables_props.clear();
  388. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  389. ASSERT_EQ(tables_props.size(), 4);
  390. it = tables_props.begin();
  391. while (it != tables_props.end()) {
  392. if (!checked_file_nums.count(it->second->orig_file_number)) {
  393. break;
  394. }
  395. it++;
  396. }
  397. ASSERT_TRUE(it != tables_props.end());
  398. tp_mapping.Clear();
  399. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  400. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  401. seqs = tp_mapping.TEST_GetInternalMapping();
  402. // For the preserved time span, max entries were recorded and
  403. // preserved (10000/100=100 (+1))
  404. ASSERT_GE(seqs.size(), 99);
  405. ASSERT_LE(seqs.size(), 101);
  406. checked_file_nums.insert(it->second->orig_file_number);
  407. // re-enable compaction
  408. ASSERT_OK(dbfull()->SetOptions({
  409. {"disable_auto_compactions", "false"},
  410. }));
  411. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  412. tables_props.clear();
  413. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  414. ASSERT_GE(tables_props.size(), 1);
  415. it = tables_props.begin();
  416. while (it != tables_props.end()) {
  417. if (!checked_file_nums.count(it->second->orig_file_number)) {
  418. break;
  419. }
  420. it++;
  421. }
  422. ASSERT_TRUE(it != tables_props.end());
  423. tp_mapping.Clear();
  424. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  425. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  426. seqs = tp_mapping.TEST_GetInternalMapping();
  427. ASSERT_GE(seqs.size(), 99);
  428. ASSERT_LE(seqs.size(), 101);
  429. ValidateProximalSeqnos("d", 0.02);
  430. ASSERT_OK(db_->Close());
  431. }
  432. TEST_P(SeqnoTimeTablePropTest, MultiCFs) {
  433. Options options = CurrentOptions();
  434. options.preclude_last_level_data_seconds = 0;
  435. options.preserve_internal_time_seconds = 0;
  436. options.env = mock_env_.get();
  437. options.stats_dump_period_sec = 0;
  438. options.stats_persist_period_sec = 0;
  439. ReopenWithColumnFamilies({"default"}, options);
  440. const PeriodicTaskScheduler& scheduler =
  441. dbfull()->TEST_GetPeriodicTaskScheduler();
  442. ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
  443. // Write some data and increase the current time
  444. for (int i = 0; i < 200; i++) {
  445. ASSERT_OK(Put(Key(i), "value"));
  446. dbfull()->TEST_WaitForPeriodicTaskRun(
  447. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  448. }
  449. ASSERT_OK(Flush());
  450. TablePropertiesCollection tables_props;
  451. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  452. ASSERT_EQ(tables_props.size(), 1);
  453. auto it = tables_props.begin();
  454. ASSERT_TRUE(it->second->seqno_to_time_mapping.empty());
  455. ASSERT_TRUE(dbfull()->TEST_GetSeqnoToTimeMapping().Empty());
  456. Options options_1 = options;
  457. SetTrackTimeDurationOptions(10000, options_1);
  458. CreateColumnFamilies({"one"}, options_1);
  459. ASSERT_TRUE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
  460. // Write some data to the default CF (without preclude_last_level feature)
  461. for (int i = 0; i < 200; i++) {
  462. ASSERT_OK(Put(Key(i), "value"));
  463. dbfull()->TEST_WaitForPeriodicTaskRun(
  464. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  465. }
  466. ASSERT_OK(Flush());
  467. // Write some data to the CF one
  468. for (int i = 0; i < 20; i++) {
  469. ASSERT_OK(Put(1, Key(i), "value"));
  470. dbfull()->TEST_WaitForPeriodicTaskRun(
  471. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
  472. }
  473. ASSERT_OK(Flush(1));
  474. tables_props.clear();
  475. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[1], &tables_props));
  476. ASSERT_EQ(tables_props.size(), 1);
  477. it = tables_props.begin();
  478. SeqnoToTimeMapping tp_mapping;
  479. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  480. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  481. ASSERT_FALSE(tp_mapping.Empty());
  482. auto seqs = tp_mapping.TEST_GetInternalMapping();
  483. ASSERT_GE(seqs.size(), 1);
  484. ASSERT_LE(seqs.size(), 4);
  485. // Create one more CF with larger preclude_last_level time
  486. Options options_2 = options;
  487. SetTrackTimeDurationOptions(1000000, options_2); // 1m
  488. CreateColumnFamilies({"two"}, options_2);
  489. // Add more data to CF "two" to fill the in memory mapping
  490. for (int i = 0; i < 2000; i++) {
  491. ASSERT_OK(Put(2, Key(i), "value"));
  492. dbfull()->TEST_WaitForPeriodicTaskRun(
  493. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  494. }
  495. seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping();
  496. ASSERT_GE(seqs.size(), 1000 - 1);
  497. // Non-strict limit can exceed capacity by a reasonable fraction
  498. ASSERT_LE(seqs.size(), 1000 * 9 / 8);
  499. ASSERT_OK(Flush(2));
  500. tables_props.clear();
  501. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props));
  502. ASSERT_EQ(tables_props.size(), 1);
  503. it = tables_props.begin();
  504. tp_mapping.Clear();
  505. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  506. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  507. seqs = tp_mapping.TEST_GetInternalMapping();
  508. // the max encoded entries is 100
  509. ASSERT_GE(seqs.size(), 100 - 1);
  510. ASSERT_LE(seqs.size(), 100 + 1);
  511. // Write some data to default CF, as all memtable with preclude_last_level
  512. // enabled have flushed, the in-memory seqno->time mapping should be cleared
  513. for (int i = 0; i < 10; i++) {
  514. ASSERT_OK(Put(0, Key(i), "value"));
  515. dbfull()->TEST_WaitForPeriodicTaskRun(
  516. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  517. }
  518. seqs = dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping();
  519. ASSERT_OK(Flush(0));
  520. // trigger compaction for CF "two" and make sure the compaction output has
  521. // seqno_to_time_mapping
  522. for (int j = 0; j < 3; j++) {
  523. for (int i = 0; i < 200; i++) {
  524. ASSERT_OK(Put(2, Key(i), "value"));
  525. dbfull()->TEST_WaitForPeriodicTaskRun(
  526. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  527. }
  528. ASSERT_OK(Flush(2));
  529. }
  530. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  531. tables_props.clear();
  532. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[2], &tables_props));
  533. ASSERT_EQ(tables_props.size(), 1);
  534. it = tables_props.begin();
  535. tp_mapping.Clear();
  536. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  537. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  538. seqs = tp_mapping.TEST_GetInternalMapping();
  539. ASSERT_GE(seqs.size(), 99);
  540. ASSERT_LE(seqs.size(), 101);
  541. for (int i = 0; i < 200; i++) {
  542. ASSERT_OK(Put(0, Key(i), "value"));
  543. dbfull()->TEST_WaitForPeriodicTaskRun(
  544. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  545. }
  546. ASSERT_OK(Flush(0));
  547. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  548. tables_props.clear();
  549. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(handles_[0], &tables_props));
  550. ASSERT_EQ(tables_props.size(), 1);
  551. it = tables_props.begin();
  552. ASSERT_TRUE(it->second->seqno_to_time_mapping.empty());
  553. // Write some data to CF "two", but don't flush to accumulate
  554. for (int i = 0; i < 1000; i++) {
  555. ASSERT_OK(Put(2, Key(i), "value"));
  556. dbfull()->TEST_WaitForPeriodicTaskRun(
  557. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  558. }
  559. ASSERT_GE(
  560. dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
  561. 500);
  562. // After dropping CF "one", the in-memory mapping will be change to only
  563. // follow CF "two" options.
  564. ASSERT_OK(db_->DropColumnFamily(handles_[1]));
  565. ASSERT_LE(
  566. dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
  567. 100 + 5);
  568. // After dropping CF "two", the in-memory mapping is also clear.
  569. ASSERT_OK(db_->DropColumnFamily(handles_[2]));
  570. ASSERT_EQ(
  571. dbfull()->TEST_GetSeqnoToTimeMapping().TEST_GetInternalMapping().size(),
  572. 0);
  573. // And the timer worker is stopped
  574. ASSERT_FALSE(scheduler.TEST_HasTask(PeriodicTaskType::kRecordSeqnoTime));
  575. Close();
  576. }
  577. TEST_P(SeqnoTimeTablePropTest, MultiInstancesBasic) {
  578. const int kInstanceNum = 2;
  579. Options options = CurrentOptions();
  580. SetTrackTimeDurationOptions(10000, options);
  581. options.env = mock_env_.get();
  582. options.stats_dump_period_sec = 0;
  583. options.stats_persist_period_sec = 0;
  584. auto dbs = std::vector<DB*>(kInstanceNum);
  585. for (int i = 0; i < kInstanceNum; i++) {
  586. ASSERT_OK(
  587. DB::Open(options, test::PerThreadDBPath(std::to_string(i)), &(dbs[i])));
  588. }
  589. // Make sure the second instance has the worker enabled
  590. auto dbi = static_cast_with_check<DBImpl>(dbs[1]);
  591. WriteOptions wo;
  592. for (int i = 0; i < 200; i++) {
  593. ASSERT_OK(dbi->Put(wo, Key(i), "value"));
  594. dbfull()->TEST_WaitForPeriodicTaskRun(
  595. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(100)); });
  596. }
  597. SeqnoToTimeMapping seqno_to_time_mapping = dbi->TEST_GetSeqnoToTimeMapping();
  598. ASSERT_GT(seqno_to_time_mapping.Size(), 10);
  599. for (int i = 0; i < kInstanceNum; i++) {
  600. ASSERT_OK(dbs[i]->Close());
  601. delete dbs[i];
  602. }
  603. }
  604. TEST_P(SeqnoTimeTablePropTest, SeqnoToTimeMappingUniversal) {
  605. const int kNumTrigger = 4;
  606. const int kNumLevels = 7;
  607. const int kNumKeys = 100;
  608. Options options = CurrentOptions();
  609. SetTrackTimeDurationOptions(10000, options);
  610. options.compaction_style = kCompactionStyleUniversal;
  611. options.num_levels = kNumLevels;
  612. options.env = mock_env_.get();
  613. DestroyAndReopen(options);
  614. std::atomic_uint64_t num_seqno_zeroing{0};
  615. SyncPoint::GetInstance()->DisableProcessing();
  616. SyncPoint::GetInstance()->ClearAllCallBacks();
  617. SyncPoint::GetInstance()->SetCallBack(
  618. "CompactionIterator::PrepareOutput:ZeroingSeq",
  619. [&](void* /*arg*/) { num_seqno_zeroing++; });
  620. SyncPoint::GetInstance()->EnableProcessing();
  621. int sst_num = 0;
  622. for (; sst_num < kNumTrigger - 1; sst_num++) {
  623. for (int i = 0; i < kNumKeys; i++) {
  624. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  625. dbfull()->TEST_WaitForPeriodicTaskRun(
  626. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
  627. }
  628. ASSERT_OK(Flush());
  629. }
  630. TablePropertiesCollection tables_props;
  631. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  632. ASSERT_EQ(tables_props.size(), 3);
  633. for (const auto& props : tables_props) {
  634. ASSERT_FALSE(props.second->seqno_to_time_mapping.empty());
  635. SeqnoToTimeMapping tp_mapping;
  636. ASSERT_OK(tp_mapping.DecodeFrom(props.second->seqno_to_time_mapping));
  637. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  638. ASSERT_FALSE(tp_mapping.Empty());
  639. auto seqs = tp_mapping.TEST_GetInternalMapping();
  640. // Add (roughly) one for starting entry.
  641. ASSERT_GE(seqs.size(), 10);
  642. ASSERT_LE(seqs.size(), 10 + 2);
  643. }
  644. // Trigger a compaction
  645. for (int i = 0; i < kNumKeys; i++) {
  646. ASSERT_OK(Put(Key(sst_num * (kNumKeys - 1) + i), "value"));
  647. dbfull()->TEST_WaitForPeriodicTaskRun(
  648. [&] { mock_clock_->MockSleepForSeconds(static_cast<int>(10)); });
  649. }
  650. sst_num++;
  651. ASSERT_OK(Flush());
  652. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  653. tables_props.clear();
  654. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  655. ASSERT_EQ(tables_props.size(), 1);
  656. auto it = tables_props.begin();
  657. SeqnoToTimeMapping tp_mapping;
  658. ASSERT_FALSE(it->second->seqno_to_time_mapping.empty());
  659. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  660. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  661. // compact to the last level
  662. CompactRangeOptions cro;
  663. cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  664. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  665. // make sure the data is all compacted to proximal level if the feature is
  666. // on, otherwise, compacted to the last level.
  667. if (options.preclude_last_level_data_seconds > 0) {
  668. ASSERT_GT(NumTableFilesAtLevel(5), 0);
  669. ASSERT_EQ(NumTableFilesAtLevel(6), 0);
  670. } else {
  671. ASSERT_EQ(NumTableFilesAtLevel(5), 0);
  672. ASSERT_GT(NumTableFilesAtLevel(6), 0);
  673. }
  674. // regardless the file is on the last level or not, it should keep the time
  675. // information and sequence number are not set
  676. tables_props.clear();
  677. tp_mapping.Clear();
  678. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  679. ASSERT_EQ(tables_props.size(), 1);
  680. ASSERT_EQ(num_seqno_zeroing, 0);
  681. it = tables_props.begin();
  682. ASSERT_FALSE(it->second->seqno_to_time_mapping.empty());
  683. ASSERT_OK(tp_mapping.DecodeFrom(it->second->seqno_to_time_mapping));
  684. ASSERT_TRUE(tp_mapping.TEST_IsEnforced());
  685. // make half of the data expired
  686. mock_clock_->MockSleepForSeconds(static_cast<int>(8000));
  687. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  688. tables_props.clear();
  689. tp_mapping.Clear();
  690. ASSERT_OK(dbfull()->GetPropertiesOfAllTables(&tables_props));
  691. if (options.preclude_last_level_data_seconds > 0) {
  692. ASSERT_EQ(tables_props.size(), 2);
  693. } else {
  694. ASSERT_EQ(tables_props.size(), 1);
  695. }
  696. ASSERT_GT(num_seqno_zeroing, 0);
  697. std::vector<KeyVersion> key_versions;
  698. ASSERT_OK(GetAllKeyVersions(db_, {}, {}, std::numeric_limits<size_t>::max(),
  699. &key_versions));
  700. // make sure there're more than 300 keys and first 100 keys are having seqno
  701. // zeroed out, the last 100 key seqno not zeroed out
  702. ASSERT_GT(key_versions.size(), 300);
  703. for (int i = 0; i < 100; i++) {
  704. ASSERT_EQ(key_versions[i].sequence, 0);
  705. }
  706. auto rit = key_versions.rbegin();
  707. for (int i = 0; i < 100; i++) {
  708. ASSERT_GT(rit->sequence, 0);
  709. rit++;
  710. }
  711. // make all data expired and compact again to push it to the last level
  712. // regardless if the tiering feature is enabled or not
  713. mock_clock_->MockSleepForSeconds(static_cast<int>(20000));
  714. ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
  715. ASSERT_GT(num_seqno_zeroing, 0);
  716. ASSERT_GT(NumTableFilesAtLevel(6), 0);
  717. Close();
  718. }
  719. TEST_P(SeqnoTimeTablePropTest, PrePopulateInDB) {
  720. Options base_options = CurrentOptions();
  721. base_options.env = mock_env_.get();
  722. base_options.disable_auto_compactions = true;
  723. base_options.create_missing_column_families = true;
  724. Options track_options = base_options;
  725. constexpr uint32_t kPreserveSecs = 1234567;
  726. SetTrackTimeDurationOptions(kPreserveSecs, track_options);
  727. SeqnoToTimeMapping sttm;
  728. SequenceNumber latest_seqno;
  729. uint64_t start_time, end_time;
  730. // #### DB#1, #2: No pre-population without preserve/preclude ####
  731. // #### But a single entry is added when preserve/preclude enabled ####
  732. for (bool with_write : {false, true}) {
  733. SCOPED_TRACE("with_write=" + std::to_string(with_write));
  734. DestroyAndReopen(base_options);
  735. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  736. ASSERT_TRUE(sttm.Empty());
  737. ASSERT_EQ(db_->GetLatestSequenceNumber(), 0U);
  738. if (with_write) {
  739. // Ensure that writes before new CF with preserve/preclude option don't
  740. // interfere with the seqno-to-time mapping getting a starting entry.
  741. ASSERT_OK(Put("foo", "bar"));
  742. ASSERT_OK(Flush());
  743. } else {
  744. // FIXME: currently, starting entry after CreateColumnFamily requires
  745. // non-zero seqno
  746. ASSERT_OK(Delete("blah"));
  747. }
  748. // Unfortunately, if we add a CF with preserve/preclude option after
  749. // open, that does not reserve seqnos with pre-populated time mappings.
  750. CreateColumnFamilies({"one"}, track_options);
  751. // No pre-population (unfortunately), just a single starting entry
  752. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  753. latest_seqno = db_->GetLatestSequenceNumber();
  754. start_time = mock_clock_->NowSeconds();
  755. ASSERT_EQ(sttm.Size(), 1);
  756. ASSERT_EQ(latest_seqno, 1U);
  757. // Current time maps to starting entry / seqno
  758. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), 1U);
  759. // Any older times are unknown.
  760. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - 1),
  761. kUnknownSeqnoBeforeAll);
  762. // Now check that writes can proceed normally (passing about 20% of preserve
  763. // time)
  764. for (int i = 0; i < 20; i++) {
  765. ASSERT_OK(Put(Key(i), "value"));
  766. dbfull()->TEST_WaitForPeriodicTaskRun([&] {
  767. mock_clock_->MockSleepForSeconds(static_cast<int>(kPreserveSecs / 99));
  768. });
  769. }
  770. ASSERT_OK(Flush());
  771. // Check that mappings are getting populated
  772. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  773. latest_seqno = db_->GetLatestSequenceNumber();
  774. end_time = mock_clock_->NowSeconds();
  775. ASSERT_EQ(sttm.Size(), 21);
  776. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(end_time), latest_seqno);
  777. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), 1U);
  778. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - 1),
  779. kUnknownSeqnoBeforeAll);
  780. }
  781. // ### DB#3, #4: Read-only DB with preserve/preclude after not ####
  782. // Make sure we don't hit issues with read-only DBs, which don't need
  783. // the mapping in the DB state (though it wouldn't hurt anything)
  784. for (bool with_write : {false, true}) {
  785. SCOPED_TRACE("with_write=" + std::to_string(with_write));
  786. DestroyAndReopen(base_options);
  787. if (with_write) {
  788. ASSERT_OK(Put("foo", "bar"));
  789. ASSERT_OK(Flush());
  790. }
  791. ASSERT_OK(ReadOnlyReopen(base_options));
  792. if (with_write) {
  793. ASSERT_EQ(Get("foo"), "bar");
  794. }
  795. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  796. ASSERT_EQ(sttm.Size(), 0);
  797. if (!with_write) {
  798. ASSERT_EQ(db_->GetLatestSequenceNumber(), 0);
  799. }
  800. ASSERT_OK(ReadOnlyReopen(track_options));
  801. if (with_write) {
  802. ASSERT_EQ(Get("foo"), "bar");
  803. }
  804. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  805. ASSERT_EQ(sttm.Size(), 0);
  806. if (!with_write) {
  807. ASSERT_EQ(db_->GetLatestSequenceNumber(), 0);
  808. // And even if we re-open read-write, we do not get pre-population,
  809. // because that's only for new DBs. We just get a single bootstrap
  810. // entry as a lower bound on write times of future writes.
  811. Reopen(track_options);
  812. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  813. ASSERT_EQ(sttm.Size(), 1);
  814. ASSERT_EQ(db_->GetLatestSequenceNumber(), 0);
  815. }
  816. }
  817. // #### DB#5: Destroy and open with preserve/preclude option ####
  818. DestroyAndReopen(track_options);
  819. // Ensure pre-population
  820. constexpr auto kPrePopPairs = kMaxSeqnoTimePairsPerSST;
  821. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  822. latest_seqno = db_->GetLatestSequenceNumber();
  823. start_time = mock_clock_->NowSeconds();
  824. ASSERT_EQ(sttm.Size(), kPrePopPairs);
  825. // One nono-zero sequence number per pre-populated pair (this could be
  826. // revised if we want to use interpolation for better approximate time
  827. // mappings with no guarantee of erring in just one direction).
  828. ASSERT_EQ(latest_seqno, kPrePopPairs);
  829. // Current time maps to last pre-allocated seqno
  830. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), latest_seqno);
  831. // Oldest tracking time maps to first pre-allocated seqno
  832. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1);
  833. // In more detail, check that estimated seqnos (pre-allocated) are uniformly
  834. // spread over the tracked time.
  835. for (auto ratio : {0.0, 0.433, 0.678, 0.987, 1.0}) {
  836. // Round up query time
  837. uint64_t t = start_time - kPreserveSecs +
  838. static_cast<uint64_t>(ratio * kPreserveSecs + 0.9999999);
  839. // Round down estimated seqno
  840. SequenceNumber s =
  841. static_cast<SequenceNumber>(ratio * (latest_seqno - 1)) + 1;
  842. // Match
  843. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(t), s);
  844. }
  845. // Now check that writes can proceed normally (passing about 20% of preserve
  846. // time)
  847. for (int i = 0; i < 20; i++) {
  848. ASSERT_OK(Put(Key(i), "value"));
  849. dbfull()->TEST_WaitForPeriodicTaskRun([&] {
  850. mock_clock_->MockSleepForSeconds(static_cast<int>(kPreserveSecs / 99));
  851. });
  852. }
  853. ASSERT_OK(Flush());
  854. // Can still see some pre-populated mappings, though some displaced
  855. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  856. latest_seqno = db_->GetLatestSequenceNumber();
  857. end_time = mock_clock_->NowSeconds();
  858. ASSERT_GE(sttm.Size(), kPrePopPairs);
  859. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(end_time), latest_seqno);
  860. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs / 2),
  861. kPrePopPairs / 2);
  862. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs),
  863. kUnknownSeqnoBeforeAll);
  864. // Make sure we don't hit issues with read-only DBs, which don't need
  865. // the mapping in the DB state (though it wouldn't hurt anything)
  866. ASSERT_OK(ReadOnlyReopen(track_options));
  867. ASSERT_EQ(Get(Key(0)), "value");
  868. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  869. ASSERT_EQ(sttm.Size(), 0);
  870. // #### DB#6: Destroy and open+create an extra CF with preserve/preclude ####
  871. // (default CF does not have the option)
  872. Destroy(track_options);
  873. ReopenWithColumnFamilies({"default", "one"},
  874. List({base_options, track_options}));
  875. // Ensure pre-population (not as exhaustive checking here)
  876. sttm = dbfull()->TEST_GetSeqnoToTimeMapping();
  877. latest_seqno = db_->GetLatestSequenceNumber();
  878. start_time = mock_clock_->NowSeconds();
  879. ASSERT_EQ(sttm.Size(), kPrePopPairs);
  880. // One nono-zero sequence number per pre-populated pair (this could be
  881. // revised if we want to use interpolation for better approximate time
  882. // mappings with no guarantee of erring in just one direction).
  883. ASSERT_EQ(latest_seqno, kPrePopPairs);
  884. // Current time maps to last pre-allocated seqno
  885. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time), latest_seqno);
  886. // Oldest tracking time maps to first pre-allocated seqno
  887. ASSERT_EQ(sttm.GetProximalSeqnoBeforeTime(start_time - kPreserveSecs), 1);
  888. // Even after no writes and DB re-open without tracking options, sequence
  889. // numbers should not go backward into those that were pre-allocated.
  890. // (Future work: persist the mapping)
  891. ReopenWithColumnFamilies({"default", "one"},
  892. List({base_options, base_options}));
  893. ASSERT_EQ(latest_seqno, db_->GetLatestSequenceNumber());
  894. Close();
  895. }
  896. TEST_F(SeqnoTimeTest, MappingAppend) {
  897. using P = SeqnoToTimeMapping::SeqnoTimePair;
  898. SeqnoToTimeMapping test;
  899. test.SetMaxTimeSpan(100).SetCapacity(10);
  900. // ignore seqno == 0, as it may mean the seqno is zeroed out
  901. ASSERT_FALSE(test.Append(0, 100));
  902. ASSERT_TRUE(test.Append(3, 200));
  903. auto size = test.Size();
  904. // normal add
  905. ASSERT_TRUE(test.Append(10, 300));
  906. size++;
  907. ASSERT_EQ(size, test.Size());
  908. // Append with the same seqno, newer time is rejected because that makes
  909. // GetProximalSeqnoBeforeTime queries worse (see later test)
  910. ASSERT_FALSE(test.Append(10, 301));
  911. ASSERT_EQ(size, test.Size());
  912. ASSERT_EQ(test.TEST_GetLastEntry(), P({10, 300}));
  913. // Same or new seqno with same or older time (as last successfully added) is
  914. // accepted by replacing last entry (improves GetProximalSeqnoBeforeTime
  915. // queries without blowing up size)
  916. ASSERT_FALSE(test.Append(10, 299));
  917. ASSERT_EQ(size, test.Size());
  918. ASSERT_EQ(test.TEST_GetLastEntry(), P({10, 299}));
  919. ASSERT_FALSE(test.Append(11, 299));
  920. ASSERT_EQ(size, test.Size());
  921. ASSERT_EQ(test.TEST_GetLastEntry(), P({11, 299}));
  922. ASSERT_FALSE(test.Append(11, 250));
  923. ASSERT_EQ(size, test.Size());
  924. ASSERT_EQ(test.TEST_GetLastEntry(), P({11, 250}));
  925. }
  926. TEST_F(SeqnoTimeTest, CapacityLimits) {
  927. using P = SeqnoToTimeMapping::SeqnoTimePair;
  928. SeqnoToTimeMapping test;
  929. test.SetCapacity(3);
  930. EXPECT_TRUE(test.Append(10, 300));
  931. EXPECT_TRUE(test.Append(20, 400));
  932. EXPECT_TRUE(test.Append(30, 500));
  933. EXPECT_TRUE(test.Append(40, 600));
  934. // Capacity 3 is small enough that the non-strict limit is
  935. // equal to the strict limit.
  936. EXPECT_EQ(3U, test.Size());
  937. EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
  938. // Same for Capacity 2
  939. test.SetCapacity(2);
  940. EXPECT_EQ(2U, test.Size());
  941. EXPECT_EQ(test.TEST_GetLastEntry(), P({40, 600}));
  942. EXPECT_TRUE(test.Append(50, 700));
  943. EXPECT_EQ(2U, test.Size());
  944. EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
  945. // Capacity 1 is difficult to work with internally, so is
  946. // coerced to 2.
  947. test.SetCapacity(1);
  948. EXPECT_EQ(2U, test.Size());
  949. EXPECT_EQ(test.TEST_GetLastEntry(), P({50, 700}));
  950. EXPECT_TRUE(test.Append(60, 800));
  951. EXPECT_EQ(2U, test.Size());
  952. EXPECT_EQ(test.TEST_GetLastEntry(), P({60, 800}));
  953. // Capacity 0 means throw everything away
  954. test.SetCapacity(0);
  955. EXPECT_EQ(0U, test.Size());
  956. EXPECT_FALSE(test.Append(70, 900));
  957. EXPECT_EQ(0U, test.Size());
  958. // Unlimited capacity
  959. test.SetCapacity(UINT64_MAX);
  960. for (unsigned i = 1; i <= 10101U; i++) {
  961. EXPECT_TRUE(test.Append(i, 11U * i));
  962. }
  963. EXPECT_EQ(10101U, test.Size());
  964. }
  965. TEST_F(SeqnoTimeTest, TimeSpanLimits) {
  966. SeqnoToTimeMapping test;
  967. // Default: no limit
  968. for (unsigned i = 1; i <= 63U; i++) {
  969. EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
  970. }
  971. // None dropped.
  972. EXPECT_EQ(63U, test.Size());
  973. test.Clear();
  974. // Explicit no limit
  975. test.SetMaxTimeSpan(UINT64_MAX);
  976. for (unsigned i = 1; i <= 63U; i++) {
  977. EXPECT_TRUE(test.Append(1000 + i, uint64_t{1} << i));
  978. }
  979. // None dropped.
  980. EXPECT_EQ(63U, test.Size());
  981. // We generally keep 2 entries as long as the configured max time span
  982. // is non-zero
  983. test.SetMaxTimeSpan(10);
  984. EXPECT_EQ(2U, test.Size());
  985. test.SetMaxTimeSpan(1);
  986. EXPECT_EQ(2U, test.Size());
  987. // But go down to 1 entry if the max time span is zero
  988. test.SetMaxTimeSpan(0);
  989. EXPECT_EQ(1U, test.Size());
  990. EXPECT_TRUE(test.Append(2000, (uint64_t{1} << 63) + 42U));
  991. EXPECT_EQ(1U, test.Size());
  992. test.Clear();
  993. // Test more typical behavior. Note that one entry at or beyond the max span
  994. // is kept.
  995. test.SetMaxTimeSpan(100);
  996. EXPECT_TRUE(test.Append(1001, 123));
  997. EXPECT_TRUE(test.Append(1002, 134));
  998. EXPECT_TRUE(test.Append(1003, 150));
  999. EXPECT_TRUE(test.Append(1004, 189));
  1000. EXPECT_TRUE(test.Append(1005, 220));
  1001. EXPECT_EQ(5U, test.Size());
  1002. EXPECT_TRUE(test.Append(1006, 233));
  1003. EXPECT_EQ(6U, test.Size());
  1004. EXPECT_TRUE(test.Append(1007, 234));
  1005. EXPECT_EQ(6U, test.Size());
  1006. EXPECT_TRUE(test.Append(1008, 235));
  1007. EXPECT_EQ(7U, test.Size());
  1008. EXPECT_TRUE(test.Append(1009, 300));
  1009. EXPECT_EQ(6U, test.Size());
  1010. EXPECT_TRUE(test.Append(1010, 350));
  1011. EXPECT_EQ(3U, test.Size());
  1012. EXPECT_TRUE(test.Append(1011, 470));
  1013. EXPECT_EQ(2U, test.Size());
  1014. }
  1015. TEST_F(SeqnoTimeTest, ProximalFunctions) {
  1016. SeqnoToTimeMapping test;
  1017. test.SetCapacity(10);
  1018. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1), kUnknownTimeBeforeAll);
  1019. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U),
  1020. kUnknownTimeBeforeAll);
  1021. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1), kUnknownSeqnoBeforeAll);
  1022. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U),
  1023. kUnknownSeqnoBeforeAll);
  1024. // (Taken from example in SeqnoToTimeMapping class comment)
  1025. // Time 500 is after seqno 10 and before seqno 11
  1026. EXPECT_TRUE(test.Append(10, 500));
  1027. // Seqno too early
  1028. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(9), kUnknownTimeBeforeAll);
  1029. // We only know that 500 is after 10
  1030. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll);
  1031. // Found
  1032. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U);
  1033. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), 500U);
  1034. // Time too early
  1035. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(499), kUnknownSeqnoBeforeAll);
  1036. // Found
  1037. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U);
  1038. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(501), 10U);
  1039. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), 10U);
  1040. // More samples
  1041. EXPECT_TRUE(test.Append(20, 600));
  1042. EXPECT_TRUE(test.Append(30, 700));
  1043. EXPECT_EQ(test.Size(), 3U);
  1044. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll);
  1045. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U);
  1046. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(20), 500U);
  1047. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(21), 600U);
  1048. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
  1049. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U);
  1050. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(1000000000000U), 700U);
  1051. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(499), kUnknownSeqnoBeforeAll);
  1052. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U);
  1053. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(501), 10U);
  1054. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), 10U);
  1055. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U);
  1056. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(601), 20U);
  1057. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
  1058. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
  1059. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(701), 30U);
  1060. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(1000000000000U), 30U);
  1061. // Redundant sample ignored
  1062. EXPECT_EQ(test.Size(), 3U);
  1063. EXPECT_FALSE(test.Append(30, 700));
  1064. EXPECT_EQ(test.Size(), 3U);
  1065. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
  1066. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U);
  1067. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
  1068. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
  1069. // Later sample with same seqno is ignored, to provide best results
  1070. // for GetProximalSeqnoBeforeTime function while saving entries
  1071. // in SeqnoToTimeMapping.
  1072. EXPECT_FALSE(test.Append(30, 800));
  1073. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
  1074. // Could return 800, but saving space in SeqnoToTimeMapping instead.
  1075. // Can reconsider if/when GetProximalTimeBeforeSeqno is used in
  1076. // production.
  1077. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(31), 700U);
  1078. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
  1079. // If the existing {30, 700} entry were replaced with {30, 800}, this
  1080. // would return seqno 20 instead of 30, which would preclude more than
  1081. // necessary for "preclude_last_level_data_seconds" feature.
  1082. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
  1083. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(800), 30U);
  1084. // Still OK
  1085. EXPECT_TRUE(test.Append(40, 900));
  1086. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(30), 600U);
  1087. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(41), 900U);
  1088. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 30U);
  1089. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 40U);
  1090. // Burst of writes during a short time creates an opportunity
  1091. // for better results from GetProximalSeqnoBeforeTime(), at the
  1092. // expense of GetProximalTimeBeforeSeqno(). False return indicates
  1093. // merge with previous entry.
  1094. EXPECT_FALSE(test.Append(50, 900));
  1095. // These are subject to later revision depending on priorities
  1096. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(49), 700U);
  1097. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(51), 900U);
  1098. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 30U);
  1099. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(900), 50U);
  1100. }
  1101. TEST_F(SeqnoTimeTest, PrePopulate) {
  1102. SeqnoToTimeMapping test;
  1103. test.SetMaxTimeSpan(100).SetCapacity(10);
  1104. EXPECT_EQ(test.Size(), 0U);
  1105. // Smallest case is like two Appends
  1106. test.PrePopulate(10, 11, 500, 600);
  1107. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(10), kUnknownTimeBeforeAll);
  1108. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(11), 500U);
  1109. EXPECT_EQ(test.GetProximalTimeBeforeSeqno(12), 600U);
  1110. test.Clear();
  1111. // Populate a small range
  1112. uint64_t kTimeIncrement = 1234567;
  1113. test.PrePopulate(1, 12, kTimeIncrement, kTimeIncrement * 2);
  1114. for (uint64_t i = 0; i <= 12; ++i) {
  1115. // NOTE: with 1 and 12 as the pre-populated end points, the duration is
  1116. // broken into 11 equal(-ish) spans
  1117. uint64_t t = kTimeIncrement + (i * kTimeIncrement) / 11 - 1;
  1118. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(t), i);
  1119. }
  1120. test.Clear();
  1121. // Populate an excessively large range (in the future we might want to
  1122. // interpolate estimated times for seqnos between entries)
  1123. test.PrePopulate(1, 34567, kTimeIncrement, kTimeIncrement * 2);
  1124. for (auto ratio : {0.0, 0.433, 0.678, 0.987, 1.0}) {
  1125. // Round up query time
  1126. uint64_t t = kTimeIncrement +
  1127. static_cast<uint64_t>(ratio * kTimeIncrement + 0.9999999);
  1128. // Round down estimated seqno
  1129. SequenceNumber s = static_cast<SequenceNumber>(ratio * (34567 - 1)) + 1;
  1130. // Match
  1131. // TODO: for now this is exact, but in the future might need approximation
  1132. // bounds to account for limited samples.
  1133. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(t), s);
  1134. }
  1135. }
  1136. TEST_F(SeqnoTimeTest, CopyFromSeqnoRange) {
  1137. SeqnoToTimeMapping test_from;
  1138. SeqnoToTimeMapping test_to;
  1139. // With zero to draw from
  1140. test_to.Clear();
  1141. test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
  1142. EXPECT_EQ(test_to.Size(), 0U);
  1143. test_to.Clear();
  1144. test_to.CopyFromSeqnoRange(test_from, 100, 100);
  1145. EXPECT_EQ(test_to.Size(), 0U);
  1146. test_to.Clear();
  1147. test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
  1148. EXPECT_EQ(test_to.Size(), 0U);
  1149. // With one to draw from
  1150. EXPECT_TRUE(test_from.Append(10, 500));
  1151. test_to.Clear();
  1152. test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
  1153. EXPECT_EQ(test_to.Size(), 1U);
  1154. // Includes one entry before range
  1155. test_to.Clear();
  1156. test_to.CopyFromSeqnoRange(test_from, 100, 100);
  1157. EXPECT_EQ(test_to.Size(), 1U);
  1158. // Includes one entry before range (even if somewhat nonsensical)
  1159. test_to.Clear();
  1160. test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
  1161. EXPECT_EQ(test_to.Size(), 1U);
  1162. test_to.Clear();
  1163. test_to.CopyFromSeqnoRange(test_from, 0, 9);
  1164. EXPECT_EQ(test_to.Size(), 0U);
  1165. test_to.Clear();
  1166. test_to.CopyFromSeqnoRange(test_from, 0, 10);
  1167. EXPECT_EQ(test_to.Size(), 1U);
  1168. // With more to draw from
  1169. EXPECT_TRUE(test_from.Append(20, 600));
  1170. EXPECT_TRUE(test_from.Append(30, 700));
  1171. EXPECT_TRUE(test_from.Append(40, 800));
  1172. EXPECT_TRUE(test_from.Append(50, 900));
  1173. test_to.Clear();
  1174. test_to.CopyFromSeqnoRange(test_from, 0, 1000000);
  1175. EXPECT_EQ(test_to.Size(), 5U);
  1176. // Includes one entry before range
  1177. test_to.Clear();
  1178. test_to.CopyFromSeqnoRange(test_from, 100, 100);
  1179. EXPECT_EQ(test_to.Size(), 1U);
  1180. test_to.Clear();
  1181. test_to.CopyFromSeqnoRange(test_from, 19, 19);
  1182. EXPECT_EQ(test_to.Size(), 1U);
  1183. // Includes one entry before range (even if somewhat nonsensical)
  1184. test_to.Clear();
  1185. test_to.CopyFromSeqnoRange(test_from, kMaxSequenceNumber, 0);
  1186. EXPECT_EQ(test_to.Size(), 1U);
  1187. test_to.Clear();
  1188. test_to.CopyFromSeqnoRange(test_from, 0, 9);
  1189. EXPECT_EQ(test_to.Size(), 0U);
  1190. test_to.Clear();
  1191. test_to.CopyFromSeqnoRange(test_from, 0, 10);
  1192. EXPECT_EQ(test_to.Size(), 1U);
  1193. test_to.Clear();
  1194. test_to.CopyFromSeqnoRange(test_from, 20, 20);
  1195. EXPECT_EQ(test_to.Size(), 2U);
  1196. test_to.Clear();
  1197. test_to.CopyFromSeqnoRange(test_from, 20, 29);
  1198. EXPECT_EQ(test_to.Size(), 2U);
  1199. test_to.Clear();
  1200. test_to.CopyFromSeqnoRange(test_from, 20, 30);
  1201. EXPECT_EQ(test_to.Size(), 3U);
  1202. }
  1203. TEST_F(SeqnoTimeTest, EnforceWithNow) {
  1204. constexpr uint64_t kMaxTimeSpan = 420;
  1205. SeqnoToTimeMapping test;
  1206. test.SetMaxTimeSpan(kMaxTimeSpan).SetCapacity(10);
  1207. EXPECT_EQ(test.Size(), 0U);
  1208. // Safe on empty mapping
  1209. test.Enforce(/*now=*/500);
  1210. EXPECT_EQ(test.Size(), 0U);
  1211. // (Taken from example in SeqnoToTimeMapping class comment)
  1212. // Time 500 is after seqno 10 and before seqno 11
  1213. EXPECT_TRUE(test.Append(10, 500));
  1214. EXPECT_TRUE(test.Append(20, 600));
  1215. EXPECT_TRUE(test.Append(30, 700));
  1216. EXPECT_TRUE(test.Append(40, 800));
  1217. EXPECT_TRUE(test.Append(50, 900));
  1218. EXPECT_EQ(test.Size(), 5U);
  1219. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), 10U);
  1220. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), 10U);
  1221. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U);
  1222. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
  1223. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
  1224. // etc.
  1225. // Must keep first entry
  1226. test.Enforce(/*now=*/500 + kMaxTimeSpan);
  1227. EXPECT_EQ(test.Size(), 5U);
  1228. test.Enforce(/*now=*/599 + kMaxTimeSpan);
  1229. EXPECT_EQ(test.Size(), 5U);
  1230. // Purges first entry
  1231. test.Enforce(/*now=*/600 + kMaxTimeSpan);
  1232. EXPECT_EQ(test.Size(), 4U);
  1233. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(500), kUnknownSeqnoBeforeAll);
  1234. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(599), kUnknownSeqnoBeforeAll);
  1235. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(600), 20U);
  1236. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(699), 20U);
  1237. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(700), 30U);
  1238. // No effect
  1239. test.Enforce(/*now=*/600 + kMaxTimeSpan);
  1240. EXPECT_EQ(test.Size(), 4U);
  1241. test.Enforce(/*now=*/699 + kMaxTimeSpan);
  1242. EXPECT_EQ(test.Size(), 4U);
  1243. // Purges next two
  1244. test.Enforce(/*now=*/899 + kMaxTimeSpan);
  1245. EXPECT_EQ(test.Size(), 2U);
  1246. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(799), kUnknownSeqnoBeforeAll);
  1247. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(899), 40U);
  1248. // Always keep last entry, to have a non-trivial seqno bound
  1249. test.Enforce(/*now=*/10000000);
  1250. EXPECT_EQ(test.Size(), 1U);
  1251. EXPECT_EQ(test.GetProximalSeqnoBeforeTime(10000000), 50U);
  1252. }
  1253. TEST_F(SeqnoTimeTest, Sort) {
  1254. SeqnoToTimeMapping test;
  1255. // single entry
  1256. test.AddUnenforced(10, 11);
  1257. test.Enforce();
  1258. ASSERT_EQ(test.Size(), 1);
  1259. // duplicate is ignored
  1260. test.AddUnenforced(10, 11);
  1261. test.Enforce();
  1262. ASSERT_EQ(test.Size(), 1);
  1263. // add some revised mappings for that seqno
  1264. test.AddUnenforced(10, 10);
  1265. test.AddUnenforced(10, 12);
  1266. // We currently favor GetProximalSeqnoBeforeTime over
  1267. // GetProximalTimeBeforeSeqno by keeping the older time.
  1268. test.Enforce();
  1269. auto seqs = test.TEST_GetInternalMapping();
  1270. std::deque<SeqnoToTimeMapping::SeqnoTimePair> expected;
  1271. expected.emplace_back(10, 10);
  1272. ASSERT_EQ(expected, seqs);
  1273. // add an inconsistent / unuseful mapping
  1274. test.AddUnenforced(9, 11);
  1275. test.Enforce();
  1276. seqs = test.TEST_GetInternalMapping();
  1277. ASSERT_EQ(expected, seqs);
  1278. // And a mapping that is considered more useful (for
  1279. // GetProximalSeqnoBeforeTime) and thus replaces that one
  1280. test.AddUnenforced(11, 9);
  1281. test.Enforce();
  1282. seqs = test.TEST_GetInternalMapping();
  1283. expected.clear();
  1284. expected.emplace_back(11, 9);
  1285. ASSERT_EQ(expected, seqs);
  1286. // Add more good, non-mergable entries
  1287. test.AddUnenforced(1, 5);
  1288. test.AddUnenforced(100, 100);
  1289. test.Enforce();
  1290. seqs = test.TEST_GetInternalMapping();
  1291. expected.clear();
  1292. expected.emplace_back(1, 5);
  1293. expected.emplace_back(11, 9);
  1294. expected.emplace_back(100, 100);
  1295. ASSERT_EQ(expected, seqs);
  1296. }
  1297. TEST_F(SeqnoTimeTest, EncodeDecodeBasic) {
  1298. constexpr uint32_t kOriginalSamples = 1000;
  1299. SeqnoToTimeMapping test;
  1300. test.SetCapacity(kOriginalSamples);
  1301. std::string output;
  1302. test.EncodeTo(output);
  1303. ASSERT_TRUE(output.empty());
  1304. ASSERT_OK(test.DecodeFrom(output));
  1305. ASSERT_EQ(test.Size(), 0U);
  1306. Random rnd(123);
  1307. for (uint32_t i = 1; i <= kOriginalSamples; i++) {
  1308. ASSERT_TRUE(test.Append(i, i * 10 + rnd.Uniform(10)));
  1309. }
  1310. output.clear();
  1311. test.EncodeTo(output);
  1312. ASSERT_FALSE(output.empty());
  1313. SeqnoToTimeMapping decoded;
  1314. ASSERT_OK(decoded.DecodeFrom(output));
  1315. ASSERT_TRUE(decoded.TEST_IsEnforced());
  1316. ASSERT_EQ(test.Size(), decoded.Size());
  1317. ASSERT_EQ(test.TEST_GetInternalMapping(), decoded.TEST_GetInternalMapping());
  1318. // Encode a reduced set of mappings
  1319. constexpr uint32_t kReducedSize = 51U;
  1320. output.clear();
  1321. SeqnoToTimeMapping(test).SetCapacity(kReducedSize).EncodeTo(output);
  1322. decoded.Clear();
  1323. ASSERT_OK(decoded.DecodeFrom(output));
  1324. ASSERT_TRUE(decoded.TEST_IsEnforced());
  1325. ASSERT_EQ(decoded.Size(), kReducedSize);
  1326. for (uint64_t t = 1; t <= kOriginalSamples * 11; t += 1 + t / 100) {
  1327. SCOPED_TRACE("t=" + std::to_string(t));
  1328. // `test` has the more accurate time mapping, but the reduced set should
  1329. // nicely span and approximate the whole range
  1330. auto orig_s = test.GetProximalSeqnoBeforeTime(t);
  1331. auto approx_s = decoded.GetProximalSeqnoBeforeTime(t);
  1332. // The oldest entry should be preserved exactly
  1333. ASSERT_EQ(orig_s == kUnknownSeqnoBeforeAll,
  1334. approx_s == kUnknownSeqnoBeforeAll);
  1335. // The newest entry should be preserved exactly
  1336. ASSERT_EQ(orig_s == kOriginalSamples, approx_s == kOriginalSamples);
  1337. // Approximate seqno before time should err toward older seqno to avoid
  1338. // classifying data as old too early, but should be within a reasonable
  1339. // bound.
  1340. constexpr uint32_t kSeqnoFuzz = kOriginalSamples * 3 / 2 / kReducedSize;
  1341. EXPECT_GE(approx_s + kSeqnoFuzz, orig_s);
  1342. EXPECT_GE(orig_s, approx_s);
  1343. }
  1344. }
  1345. TEST_F(SeqnoTimeTest, EncodeDecodeMinimizeTimeGaps) {
  1346. SeqnoToTimeMapping test;
  1347. test.SetCapacity(10);
  1348. test.Append(1, 10);
  1349. test.Append(5, 17);
  1350. test.Append(6, 25);
  1351. test.Append(8, 30);
  1352. std::string output;
  1353. SeqnoToTimeMapping(test).SetCapacity(3).EncodeTo(output);
  1354. SeqnoToTimeMapping decoded;
  1355. ASSERT_OK(decoded.DecodeFrom(output));
  1356. ASSERT_TRUE(decoded.TEST_IsEnforced());
  1357. ASSERT_EQ(decoded.Size(), 3);
  1358. auto seqs = decoded.TEST_GetInternalMapping();
  1359. std::deque<SeqnoToTimeMapping::SeqnoTimePair> expected;
  1360. expected.emplace_back(1, 10);
  1361. expected.emplace_back(5, 17);
  1362. expected.emplace_back(8, 30);
  1363. ASSERT_EQ(expected, seqs);
  1364. // Add a few large time number
  1365. test.Append(10, 100);
  1366. test.Append(13, 200);
  1367. test.Append(40, 250);
  1368. test.Append(70, 300);
  1369. output.clear();
  1370. SeqnoToTimeMapping(test).SetCapacity(4).EncodeTo(output);
  1371. decoded.Clear();
  1372. ASSERT_OK(decoded.DecodeFrom(output));
  1373. ASSERT_TRUE(decoded.TEST_IsEnforced());
  1374. ASSERT_EQ(decoded.Size(), 4);
  1375. expected.clear();
  1376. // Except for beginning and end, entries are removed that minimize the
  1377. // remaining time gaps, regardless of seqno gaps.
  1378. expected.emplace_back(1, 10);
  1379. expected.emplace_back(10, 100);
  1380. expected.emplace_back(13, 200);
  1381. expected.emplace_back(70, 300);
  1382. seqs = decoded.TEST_GetInternalMapping();
  1383. ASSERT_EQ(expected, seqs);
  1384. }
  1385. TEST(PackValueAndSeqnoTest, Basic) {
  1386. std::string packed_value_buf;
  1387. Slice packed_value_slice =
  1388. PackValueAndWriteTime("foo", 30u, &packed_value_buf);
  1389. auto [unpacked_value, write_time] =
  1390. ParsePackedValueWithWriteTime(packed_value_slice);
  1391. ASSERT_EQ(unpacked_value, "foo");
  1392. ASSERT_EQ(write_time, 30u);
  1393. ASSERT_EQ(ParsePackedValueForValue(packed_value_slice), "foo");
  1394. }
  1395. TEST(PackValueAndWriteTimeTest, Basic) {
  1396. std::string packed_value_buf;
  1397. Slice packed_value_slice = PackValueAndSeqno("foo", 30u, &packed_value_buf);
  1398. auto [unpacked_value, write_time] =
  1399. ParsePackedValueWithSeqno(packed_value_slice);
  1400. ASSERT_EQ(unpacked_value, "foo");
  1401. ASSERT_EQ(write_time, 30u);
  1402. ASSERT_EQ(ParsePackedValueForValue(packed_value_slice), "foo");
  1403. }
  1404. } // namespace ROCKSDB_NAMESPACE
  1405. int main(int argc, char** argv) {
  1406. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1407. ::testing::InitGoogleTest(&argc, argv);
  1408. return RUN_ALL_TESTS();
  1409. }