db_iterator_test.cc 89 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <functional>
  10. #include "db/arena_wrapped_db_iter.h"
  11. #include "db/db_iter.h"
  12. #include "db/db_test_util.h"
  13. #include "port/port.h"
  14. #include "port/stack_trace.h"
  15. #include "rocksdb/iostats_context.h"
  16. #include "rocksdb/perf_context.h"
  17. #include "table/block_based/flush_block_policy.h"
  18. namespace ROCKSDB_NAMESPACE {
  19. // A dumb ReadCallback which saying every key is committed.
  20. class DummyReadCallback : public ReadCallback {
  21. public:
  22. DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
  23. bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
  24. void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
  25. };
  26. // Test param:
  27. // bool: whether to pass read_callback to NewIterator().
  28. class DBIteratorTest : public DBTestBase,
  29. public testing::WithParamInterface<bool> {
  30. public:
  31. DBIteratorTest() : DBTestBase("/db_iterator_test") {}
  32. Iterator* NewIterator(const ReadOptions& read_options,
  33. ColumnFamilyHandle* column_family = nullptr) {
  34. if (column_family == nullptr) {
  35. column_family = db_->DefaultColumnFamily();
  36. }
  37. auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
  38. SequenceNumber seq = read_options.snapshot != nullptr
  39. ? read_options.snapshot->GetSequenceNumber()
  40. : db_->GetLatestSequenceNumber();
  41. bool use_read_callback = GetParam();
  42. DummyReadCallback* read_callback = nullptr;
  43. if (use_read_callback) {
  44. read_callback = new DummyReadCallback();
  45. read_callback->SetSnapshot(seq);
  46. InstrumentedMutexLock lock(&mutex_);
  47. read_callbacks_.push_back(
  48. std::unique_ptr<DummyReadCallback>(read_callback));
  49. }
  50. return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
  51. }
  52. private:
  53. InstrumentedMutex mutex_;
  54. std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
  55. };
  56. TEST_P(DBIteratorTest, IteratorProperty) {
  57. // The test needs to be changed if kPersistedTier is supported in iterator.
  58. Options options = CurrentOptions();
  59. CreateAndReopenWithCF({"pikachu"}, options);
  60. Put(1, "1", "2");
  61. Delete(1, "2");
  62. ReadOptions ropt;
  63. ropt.pin_data = false;
  64. {
  65. std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
  66. iter->SeekToFirst();
  67. std::string prop_value;
  68. ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
  69. ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  70. ASSERT_EQ("0", prop_value);
  71. ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
  72. ASSERT_EQ("1", prop_value);
  73. iter->Next();
  74. ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  75. ASSERT_EQ("Iterator is not valid.", prop_value);
  76. // Get internal key at which the iteration stopped (tombstone in this case).
  77. ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
  78. ASSERT_EQ("2", prop_value);
  79. }
  80. Close();
  81. }
  82. TEST_P(DBIteratorTest, PersistedTierOnIterator) {
  83. // The test needs to be changed if kPersistedTier is supported in iterator.
  84. Options options = CurrentOptions();
  85. CreateAndReopenWithCF({"pikachu"}, options);
  86. ReadOptions ropt;
  87. ropt.read_tier = kPersistedTier;
  88. auto* iter = db_->NewIterator(ropt, handles_[1]);
  89. ASSERT_TRUE(iter->status().IsNotSupported());
  90. delete iter;
  91. std::vector<Iterator*> iters;
  92. ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
  93. Close();
  94. }
  95. TEST_P(DBIteratorTest, NonBlockingIteration) {
  96. do {
  97. ReadOptions non_blocking_opts, regular_opts;
  98. Options options = CurrentOptions();
  99. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  100. non_blocking_opts.read_tier = kBlockCacheTier;
  101. CreateAndReopenWithCF({"pikachu"}, options);
  102. // write one kv to the database.
  103. ASSERT_OK(Put(1, "a", "b"));
  104. // scan using non-blocking iterator. We should find it because
  105. // it is in memtable.
  106. Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
  107. int count = 0;
  108. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  109. ASSERT_OK(iter->status());
  110. count++;
  111. }
  112. ASSERT_EQ(count, 1);
  113. delete iter;
  114. // flush memtable to storage. Now, the key should not be in the
  115. // memtable neither in the block cache.
  116. ASSERT_OK(Flush(1));
  117. // verify that a non-blocking iterator does not find any
  118. // kvs. Neither does it do any IOs to storage.
  119. uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
  120. uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
  121. iter = NewIterator(non_blocking_opts, handles_[1]);
  122. count = 0;
  123. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  124. count++;
  125. }
  126. ASSERT_EQ(count, 0);
  127. ASSERT_TRUE(iter->status().IsIncomplete());
  128. ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
  129. ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  130. delete iter;
  131. // read in the specified block via a regular get
  132. ASSERT_EQ(Get(1, "a"), "b");
  133. // verify that we can find it via a non-blocking scan
  134. numopen = TestGetTickerCount(options, NO_FILE_OPENS);
  135. cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
  136. iter = NewIterator(non_blocking_opts, handles_[1]);
  137. count = 0;
  138. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  139. ASSERT_OK(iter->status());
  140. count++;
  141. }
  142. ASSERT_EQ(count, 1);
  143. ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
  144. ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
  145. delete iter;
  146. // This test verifies block cache behaviors, which is not used by plain
  147. // table format.
  148. } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
  149. }
  150. TEST_P(DBIteratorTest, IterSeekBeforePrev) {
  151. ASSERT_OK(Put("a", "b"));
  152. ASSERT_OK(Put("c", "d"));
  153. dbfull()->Flush(FlushOptions());
  154. ASSERT_OK(Put("0", "f"));
  155. ASSERT_OK(Put("1", "h"));
  156. dbfull()->Flush(FlushOptions());
  157. ASSERT_OK(Put("2", "j"));
  158. auto iter = NewIterator(ReadOptions());
  159. iter->Seek(Slice("c"));
  160. iter->Prev();
  161. iter->Seek(Slice("a"));
  162. iter->Prev();
  163. delete iter;
  164. }
  165. TEST_P(DBIteratorTest, IterReseekNewUpperBound) {
  166. Random rnd(301);
  167. Options options = CurrentOptions();
  168. BlockBasedTableOptions table_options;
  169. table_options.block_size = 1024;
  170. table_options.block_size_deviation = 50;
  171. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  172. options.compression = kNoCompression;
  173. Reopen(options);
  174. ASSERT_OK(Put("a", RandomString(&rnd, 400)));
  175. ASSERT_OK(Put("aabb", RandomString(&rnd, 400)));
  176. ASSERT_OK(Put("aaef", RandomString(&rnd, 400)));
  177. ASSERT_OK(Put("b", RandomString(&rnd, 400)));
  178. dbfull()->Flush(FlushOptions());
  179. ReadOptions opts;
  180. Slice ub = Slice("aa");
  181. opts.iterate_upper_bound = &ub;
  182. auto iter = NewIterator(opts);
  183. iter->Seek(Slice("a"));
  184. ub = Slice("b");
  185. iter->Seek(Slice("aabc"));
  186. ASSERT_TRUE(iter->Valid());
  187. ASSERT_EQ(iter->key().ToString(), "aaef");
  188. delete iter;
  189. }
  190. TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
  191. ASSERT_OK(Put("a", "b"));
  192. ASSERT_OK(Put("c", "d"));
  193. dbfull()->Flush(FlushOptions());
  194. ASSERT_OK(Put("0", "f"));
  195. ASSERT_OK(Put("1", "h"));
  196. dbfull()->Flush(FlushOptions());
  197. ASSERT_OK(Put("2", "j"));
  198. auto iter = NewIterator(ReadOptions());
  199. iter->SeekForPrev(Slice("0"));
  200. iter->Next();
  201. iter->SeekForPrev(Slice("1"));
  202. iter->Next();
  203. delete iter;
  204. }
  205. namespace {
  206. std::string MakeLongKey(size_t length, char c) {
  207. return std::string(length, c);
  208. }
  209. } // namespace
  210. TEST_P(DBIteratorTest, IterLongKeys) {
  211. ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
  212. ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
  213. ASSERT_OK(Put("a", "b"));
  214. dbfull()->Flush(FlushOptions());
  215. ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
  216. ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
  217. ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
  218. auto iter = NewIterator(ReadOptions());
  219. // Create a key that needs to be skipped for Seq too new
  220. iter->Seek(MakeLongKey(20, 0));
  221. ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
  222. iter->Next();
  223. ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
  224. iter->Next();
  225. ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
  226. iter->Next();
  227. ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
  228. iter->Next();
  229. ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
  230. iter->SeekForPrev(MakeLongKey(127, 3));
  231. ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
  232. iter->Prev();
  233. ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
  234. iter->Prev();
  235. ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
  236. delete iter;
  237. iter = NewIterator(ReadOptions());
  238. iter->Seek(MakeLongKey(50, 1));
  239. ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
  240. iter->Next();
  241. ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
  242. iter->Next();
  243. ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
  244. delete iter;
  245. }
  246. TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
  247. ASSERT_OK(Put("0", "0"));
  248. dbfull()->Flush(FlushOptions());
  249. ASSERT_OK(Put("a", "b"));
  250. ASSERT_OK(Put("c", "d"));
  251. ASSERT_OK(Put("d", "e"));
  252. auto iter = NewIterator(ReadOptions());
  253. // Create a key that needs to be skipped for Seq too new
  254. for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
  255. i++) {
  256. ASSERT_OK(Put("b", "f"));
  257. }
  258. iter->Seek(Slice("a"));
  259. ASSERT_EQ(IterStatus(iter), "a->b");
  260. iter->Next();
  261. ASSERT_EQ(IterStatus(iter), "c->d");
  262. iter->SeekForPrev(Slice("b"));
  263. ASSERT_EQ(IterStatus(iter), "a->b");
  264. iter->Next();
  265. ASSERT_EQ(IterStatus(iter), "c->d");
  266. delete iter;
  267. }
  268. TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
  269. ASSERT_OK(Put("0", "0"));
  270. dbfull()->Flush(FlushOptions());
  271. ASSERT_OK(Put("a", "b"));
  272. ASSERT_OK(Put("c", "d"));
  273. ASSERT_OK(Put("d", "e"));
  274. auto iter = NewIterator(ReadOptions());
  275. // Create a key that needs to be skipped for Seq too new
  276. for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
  277. i++) {
  278. ASSERT_OK(Put("b", "f"));
  279. }
  280. iter->Seek(Slice("d"));
  281. ASSERT_EQ(IterStatus(iter), "d->e");
  282. iter->Prev();
  283. ASSERT_EQ(IterStatus(iter), "c->d");
  284. iter->Prev();
  285. ASSERT_EQ(IterStatus(iter), "a->b");
  286. iter->Prev();
  287. iter->SeekForPrev(Slice("d"));
  288. ASSERT_EQ(IterStatus(iter), "d->e");
  289. iter->Prev();
  290. ASSERT_EQ(IterStatus(iter), "c->d");
  291. iter->Prev();
  292. ASSERT_EQ(IterStatus(iter), "a->b");
  293. iter->Prev();
  294. delete iter;
  295. }
  296. TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
  297. ASSERT_OK(Put("0", "0"));
  298. dbfull()->Flush(FlushOptions());
  299. ASSERT_OK(Put("a", "b"));
  300. ASSERT_OK(Put("c", "d"));
  301. ASSERT_OK(Put("e", "f"));
  302. auto iter = NewIterator(ReadOptions());
  303. auto iter2 = NewIterator(ReadOptions());
  304. iter->Seek(Slice("c"));
  305. iter2->SeekForPrev(Slice("d"));
  306. ASSERT_EQ(IterStatus(iter), "c->d");
  307. ASSERT_EQ(IterStatus(iter2), "c->d");
  308. // Create a key that needs to be skipped for Seq too new
  309. for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
  310. i++) {
  311. ASSERT_OK(Put("b", "f"));
  312. }
  313. iter->Prev();
  314. ASSERT_EQ(IterStatus(iter), "a->b");
  315. iter->Prev();
  316. iter2->Prev();
  317. ASSERT_EQ(IterStatus(iter2), "a->b");
  318. iter2->Prev();
  319. delete iter;
  320. delete iter2;
  321. }
  322. TEST_P(DBIteratorTest, IterEmpty) {
  323. do {
  324. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  325. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  326. iter->SeekToFirst();
  327. ASSERT_EQ(IterStatus(iter), "(invalid)");
  328. iter->SeekToLast();
  329. ASSERT_EQ(IterStatus(iter), "(invalid)");
  330. iter->Seek("foo");
  331. ASSERT_EQ(IterStatus(iter), "(invalid)");
  332. iter->SeekForPrev("foo");
  333. ASSERT_EQ(IterStatus(iter), "(invalid)");
  334. delete iter;
  335. } while (ChangeCompactOptions());
  336. }
  337. TEST_P(DBIteratorTest, IterSingle) {
  338. do {
  339. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  340. ASSERT_OK(Put(1, "a", "va"));
  341. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  342. iter->SeekToFirst();
  343. ASSERT_EQ(IterStatus(iter), "a->va");
  344. iter->Next();
  345. ASSERT_EQ(IterStatus(iter), "(invalid)");
  346. iter->SeekToFirst();
  347. ASSERT_EQ(IterStatus(iter), "a->va");
  348. iter->Prev();
  349. ASSERT_EQ(IterStatus(iter), "(invalid)");
  350. iter->SeekToLast();
  351. ASSERT_EQ(IterStatus(iter), "a->va");
  352. iter->Next();
  353. ASSERT_EQ(IterStatus(iter), "(invalid)");
  354. iter->SeekToLast();
  355. ASSERT_EQ(IterStatus(iter), "a->va");
  356. iter->Prev();
  357. ASSERT_EQ(IterStatus(iter), "(invalid)");
  358. iter->Seek("");
  359. ASSERT_EQ(IterStatus(iter), "a->va");
  360. iter->Next();
  361. ASSERT_EQ(IterStatus(iter), "(invalid)");
  362. iter->SeekForPrev("");
  363. ASSERT_EQ(IterStatus(iter), "(invalid)");
  364. iter->Seek("a");
  365. ASSERT_EQ(IterStatus(iter), "a->va");
  366. iter->Next();
  367. ASSERT_EQ(IterStatus(iter), "(invalid)");
  368. iter->SeekForPrev("a");
  369. ASSERT_EQ(IterStatus(iter), "a->va");
  370. iter->Prev();
  371. ASSERT_EQ(IterStatus(iter), "(invalid)");
  372. iter->Seek("b");
  373. ASSERT_EQ(IterStatus(iter), "(invalid)");
  374. iter->SeekForPrev("b");
  375. ASSERT_EQ(IterStatus(iter), "a->va");
  376. iter->Prev();
  377. ASSERT_EQ(IterStatus(iter), "(invalid)");
  378. delete iter;
  379. } while (ChangeCompactOptions());
  380. }
  381. TEST_P(DBIteratorTest, IterMulti) {
  382. do {
  383. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  384. ASSERT_OK(Put(1, "a", "va"));
  385. ASSERT_OK(Put(1, "b", "vb"));
  386. ASSERT_OK(Put(1, "c", "vc"));
  387. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  388. iter->SeekToFirst();
  389. ASSERT_EQ(IterStatus(iter), "a->va");
  390. iter->Next();
  391. ASSERT_EQ(IterStatus(iter), "b->vb");
  392. iter->Next();
  393. ASSERT_EQ(IterStatus(iter), "c->vc");
  394. iter->Next();
  395. ASSERT_EQ(IterStatus(iter), "(invalid)");
  396. iter->SeekToFirst();
  397. ASSERT_EQ(IterStatus(iter), "a->va");
  398. iter->Prev();
  399. ASSERT_EQ(IterStatus(iter), "(invalid)");
  400. iter->SeekToLast();
  401. ASSERT_EQ(IterStatus(iter), "c->vc");
  402. iter->Prev();
  403. ASSERT_EQ(IterStatus(iter), "b->vb");
  404. iter->Prev();
  405. ASSERT_EQ(IterStatus(iter), "a->va");
  406. iter->Prev();
  407. ASSERT_EQ(IterStatus(iter), "(invalid)");
  408. iter->SeekToLast();
  409. ASSERT_EQ(IterStatus(iter), "c->vc");
  410. iter->Next();
  411. ASSERT_EQ(IterStatus(iter), "(invalid)");
  412. iter->Seek("");
  413. ASSERT_EQ(IterStatus(iter), "a->va");
  414. iter->Seek("a");
  415. ASSERT_EQ(IterStatus(iter), "a->va");
  416. iter->Seek("ax");
  417. ASSERT_EQ(IterStatus(iter), "b->vb");
  418. iter->SeekForPrev("d");
  419. ASSERT_EQ(IterStatus(iter), "c->vc");
  420. iter->SeekForPrev("c");
  421. ASSERT_EQ(IterStatus(iter), "c->vc");
  422. iter->SeekForPrev("bx");
  423. ASSERT_EQ(IterStatus(iter), "b->vb");
  424. iter->Seek("b");
  425. ASSERT_EQ(IterStatus(iter), "b->vb");
  426. iter->Seek("z");
  427. ASSERT_EQ(IterStatus(iter), "(invalid)");
  428. iter->SeekForPrev("b");
  429. ASSERT_EQ(IterStatus(iter), "b->vb");
  430. iter->SeekForPrev("");
  431. ASSERT_EQ(IterStatus(iter), "(invalid)");
  432. // Switch from reverse to forward
  433. iter->SeekToLast();
  434. iter->Prev();
  435. iter->Prev();
  436. iter->Next();
  437. ASSERT_EQ(IterStatus(iter), "b->vb");
  438. // Switch from forward to reverse
  439. iter->SeekToFirst();
  440. iter->Next();
  441. iter->Next();
  442. iter->Prev();
  443. ASSERT_EQ(IterStatus(iter), "b->vb");
  444. // Make sure iter stays at snapshot
  445. ASSERT_OK(Put(1, "a", "va2"));
  446. ASSERT_OK(Put(1, "a2", "va3"));
  447. ASSERT_OK(Put(1, "b", "vb2"));
  448. ASSERT_OK(Put(1, "c", "vc2"));
  449. ASSERT_OK(Delete(1, "b"));
  450. iter->SeekToFirst();
  451. ASSERT_EQ(IterStatus(iter), "a->va");
  452. iter->Next();
  453. ASSERT_EQ(IterStatus(iter), "b->vb");
  454. iter->Next();
  455. ASSERT_EQ(IterStatus(iter), "c->vc");
  456. iter->Next();
  457. ASSERT_EQ(IterStatus(iter), "(invalid)");
  458. iter->SeekToLast();
  459. ASSERT_EQ(IterStatus(iter), "c->vc");
  460. iter->Prev();
  461. ASSERT_EQ(IterStatus(iter), "b->vb");
  462. iter->Prev();
  463. ASSERT_EQ(IterStatus(iter), "a->va");
  464. iter->Prev();
  465. ASSERT_EQ(IterStatus(iter), "(invalid)");
  466. delete iter;
  467. } while (ChangeCompactOptions());
  468. }
  469. // Check that we can skip over a run of user keys
  470. // by using reseek rather than sequential scan
  471. TEST_P(DBIteratorTest, IterReseek) {
  472. anon::OptionsOverride options_override;
  473. options_override.skip_policy = kSkipNoSnapshot;
  474. Options options = CurrentOptions(options_override);
  475. options.max_sequential_skip_in_iterations = 3;
  476. options.create_if_missing = true;
  477. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  478. DestroyAndReopen(options);
  479. CreateAndReopenWithCF({"pikachu"}, options);
  480. // insert three keys with same userkey and verify that
  481. // reseek is not invoked. For each of these test cases,
  482. // verify that we can find the next key "b".
  483. ASSERT_OK(Put(1, "a", "zero"));
  484. ASSERT_OK(Put(1, "a", "one"));
  485. ASSERT_OK(Put(1, "a", "two"));
  486. ASSERT_OK(Put(1, "b", "bone"));
  487. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  488. iter->SeekToFirst();
  489. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
  490. ASSERT_EQ(IterStatus(iter), "a->two");
  491. iter->Next();
  492. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
  493. ASSERT_EQ(IterStatus(iter), "b->bone");
  494. delete iter;
  495. // insert a total of three keys with same userkey and verify
  496. // that reseek is still not invoked.
  497. ASSERT_OK(Put(1, "a", "three"));
  498. iter = NewIterator(ReadOptions(), handles_[1]);
  499. iter->SeekToFirst();
  500. ASSERT_EQ(IterStatus(iter), "a->three");
  501. iter->Next();
  502. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
  503. ASSERT_EQ(IterStatus(iter), "b->bone");
  504. delete iter;
  505. // insert a total of four keys with same userkey and verify
  506. // that reseek is invoked.
  507. ASSERT_OK(Put(1, "a", "four"));
  508. iter = NewIterator(ReadOptions(), handles_[1]);
  509. iter->SeekToFirst();
  510. ASSERT_EQ(IterStatus(iter), "a->four");
  511. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
  512. iter->Next();
  513. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
  514. ASSERT_EQ(IterStatus(iter), "b->bone");
  515. delete iter;
  516. // Testing reverse iterator
  517. // At this point, we have three versions of "a" and one version of "b".
  518. // The reseek statistics is already at 1.
  519. int num_reseeks = static_cast<int>(
  520. TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
  521. // Insert another version of b and assert that reseek is not invoked
  522. ASSERT_OK(Put(1, "b", "btwo"));
  523. iter = NewIterator(ReadOptions(), handles_[1]);
  524. iter->SeekToLast();
  525. ASSERT_EQ(IterStatus(iter), "b->btwo");
  526. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
  527. num_reseeks);
  528. iter->Prev();
  529. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
  530. num_reseeks + 1);
  531. ASSERT_EQ(IterStatus(iter), "a->four");
  532. delete iter;
  533. // insert two more versions of b. This makes a total of 4 versions
  534. // of b and 4 versions of a.
  535. ASSERT_OK(Put(1, "b", "bthree"));
  536. ASSERT_OK(Put(1, "b", "bfour"));
  537. iter = NewIterator(ReadOptions(), handles_[1]);
  538. iter->SeekToLast();
  539. ASSERT_EQ(IterStatus(iter), "b->bfour");
  540. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
  541. num_reseeks + 2);
  542. iter->Prev();
  543. // the previous Prev call should have invoked reseek
  544. ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
  545. num_reseeks + 3);
  546. ASSERT_EQ(IterStatus(iter), "a->four");
  547. delete iter;
  548. }
  549. TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
  550. do {
  551. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  552. ASSERT_OK(Put(1, "a", "va"));
  553. ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
  554. ASSERT_OK(Put(1, "c", "vc"));
  555. ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
  556. ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
  557. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  558. iter->SeekToFirst();
  559. ASSERT_EQ(IterStatus(iter), "a->va");
  560. iter->Next();
  561. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  562. iter->Next();
  563. ASSERT_EQ(IterStatus(iter), "c->vc");
  564. iter->Next();
  565. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  566. iter->Next();
  567. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  568. iter->Next();
  569. ASSERT_EQ(IterStatus(iter), "(invalid)");
  570. iter->SeekToLast();
  571. ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
  572. iter->Prev();
  573. ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
  574. iter->Prev();
  575. ASSERT_EQ(IterStatus(iter), "c->vc");
  576. iter->Prev();
  577. ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
  578. iter->Prev();
  579. ASSERT_EQ(IterStatus(iter), "a->va");
  580. iter->Prev();
  581. ASSERT_EQ(IterStatus(iter), "(invalid)");
  582. delete iter;
  583. } while (ChangeCompactOptions());
  584. }
  585. TEST_P(DBIteratorTest, IterMultiWithDelete) {
  586. do {
  587. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  588. ASSERT_OK(Put(1, "ka", "va"));
  589. ASSERT_OK(Put(1, "kb", "vb"));
  590. ASSERT_OK(Put(1, "kc", "vc"));
  591. ASSERT_OK(Delete(1, "kb"));
  592. ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
  593. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  594. iter->Seek("kc");
  595. ASSERT_EQ(IterStatus(iter), "kc->vc");
  596. if (!CurrentOptions().merge_operator) {
  597. // TODO: merge operator does not support backward iteration yet
  598. if (kPlainTableAllBytesPrefix != option_config_ &&
  599. kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
  600. kHashLinkList != option_config_ &&
  601. kHashSkipList != option_config_) { // doesn't support SeekToLast
  602. iter->Prev();
  603. ASSERT_EQ(IterStatus(iter), "ka->va");
  604. }
  605. }
  606. delete iter;
  607. } while (ChangeOptions());
  608. }
  609. TEST_P(DBIteratorTest, IterPrevMaxSkip) {
  610. do {
  611. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  612. for (int i = 0; i < 2; i++) {
  613. ASSERT_OK(Put(1, "key1", "v1"));
  614. ASSERT_OK(Put(1, "key2", "v2"));
  615. ASSERT_OK(Put(1, "key3", "v3"));
  616. ASSERT_OK(Put(1, "key4", "v4"));
  617. ASSERT_OK(Put(1, "key5", "v5"));
  618. }
  619. VerifyIterLast("key5->v5", 1);
  620. ASSERT_OK(Delete(1, "key5"));
  621. VerifyIterLast("key4->v4", 1);
  622. ASSERT_OK(Delete(1, "key4"));
  623. VerifyIterLast("key3->v3", 1);
  624. ASSERT_OK(Delete(1, "key3"));
  625. VerifyIterLast("key2->v2", 1);
  626. ASSERT_OK(Delete(1, "key2"));
  627. VerifyIterLast("key1->v1", 1);
  628. ASSERT_OK(Delete(1, "key1"));
  629. VerifyIterLast("(invalid)", 1);
  630. } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
  631. }
  632. TEST_P(DBIteratorTest, IterWithSnapshot) {
  633. anon::OptionsOverride options_override;
  634. options_override.skip_policy = kSkipNoSnapshot;
  635. do {
  636. CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
  637. ASSERT_OK(Put(1, "key1", "val1"));
  638. ASSERT_OK(Put(1, "key2", "val2"));
  639. ASSERT_OK(Put(1, "key3", "val3"));
  640. ASSERT_OK(Put(1, "key4", "val4"));
  641. ASSERT_OK(Put(1, "key5", "val5"));
  642. const Snapshot* snapshot = db_->GetSnapshot();
  643. ReadOptions options;
  644. options.snapshot = snapshot;
  645. Iterator* iter = NewIterator(options, handles_[1]);
  646. ASSERT_OK(Put(1, "key0", "val0"));
  647. // Put more values after the snapshot
  648. ASSERT_OK(Put(1, "key100", "val100"));
  649. ASSERT_OK(Put(1, "key101", "val101"));
  650. iter->Seek("key5");
  651. ASSERT_EQ(IterStatus(iter), "key5->val5");
  652. if (!CurrentOptions().merge_operator) {
  653. // TODO: merge operator does not support backward iteration yet
  654. if (kPlainTableAllBytesPrefix != option_config_ &&
  655. kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
  656. kHashLinkList != option_config_ && kHashSkipList != option_config_) {
  657. iter->Prev();
  658. ASSERT_EQ(IterStatus(iter), "key4->val4");
  659. iter->Prev();
  660. ASSERT_EQ(IterStatus(iter), "key3->val3");
  661. iter->Next();
  662. ASSERT_EQ(IterStatus(iter), "key4->val4");
  663. iter->Next();
  664. ASSERT_EQ(IterStatus(iter), "key5->val5");
  665. }
  666. iter->Next();
  667. ASSERT_TRUE(!iter->Valid());
  668. }
  669. if (!CurrentOptions().merge_operator) {
  670. // TODO(gzh): merge operator does not support backward iteration yet
  671. if (kPlainTableAllBytesPrefix != option_config_ &&
  672. kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
  673. kHashLinkList != option_config_ && kHashSkipList != option_config_) {
  674. iter->SeekForPrev("key1");
  675. ASSERT_EQ(IterStatus(iter), "key1->val1");
  676. iter->Next();
  677. ASSERT_EQ(IterStatus(iter), "key2->val2");
  678. iter->Next();
  679. ASSERT_EQ(IterStatus(iter), "key3->val3");
  680. iter->Prev();
  681. ASSERT_EQ(IterStatus(iter), "key2->val2");
  682. iter->Prev();
  683. ASSERT_EQ(IterStatus(iter), "key1->val1");
  684. iter->Prev();
  685. ASSERT_TRUE(!iter->Valid());
  686. }
  687. }
  688. db_->ReleaseSnapshot(snapshot);
  689. delete iter;
  690. } while (ChangeOptions());
  691. }
  692. TEST_P(DBIteratorTest, IteratorPinsRef) {
  693. do {
  694. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  695. Put(1, "foo", "hello");
  696. // Get iterator that will yield the current contents of the DB.
  697. Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
  698. // Write to force compactions
  699. Put(1, "foo", "newvalue1");
  700. for (int i = 0; i < 100; i++) {
  701. // 100K values
  702. ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
  703. }
  704. Put(1, "foo", "newvalue2");
  705. iter->SeekToFirst();
  706. ASSERT_TRUE(iter->Valid());
  707. ASSERT_EQ("foo", iter->key().ToString());
  708. ASSERT_EQ("hello", iter->value().ToString());
  709. iter->Next();
  710. ASSERT_TRUE(!iter->Valid());
  711. delete iter;
  712. } while (ChangeCompactOptions());
  713. }
  714. TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) {
  715. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  716. Put(1, "foo", "delete-cf-then-delete-iter");
  717. Put(1, "hello", "value2");
  718. ColumnFamilyHandle* cf = handles_[1];
  719. ReadOptions ro;
  720. auto* iter = db_->NewIterator(ro, cf);
  721. iter->SeekToFirst();
  722. ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter");
  723. // delete CF handle
  724. db_->DestroyColumnFamilyHandle(cf);
  725. handles_.erase(std::begin(handles_) + 1);
  726. // delete Iterator after CF handle is deleted
  727. iter->Next();
  728. ASSERT_EQ(IterStatus(iter), "hello->value2");
  729. delete iter;
  730. }
  731. TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) {
  732. CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
  733. Put(1, "foo", "drop-cf-then-delete-iter");
  734. ReadOptions ro;
  735. ColumnFamilyHandle* cf = handles_[1];
  736. auto* iter = db_->NewIterator(ro, cf);
  737. iter->SeekToFirst();
  738. ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter");
  739. // drop and delete CF
  740. db_->DropColumnFamily(cf);
  741. db_->DestroyColumnFamilyHandle(cf);
  742. handles_.erase(std::begin(handles_) + 1);
  743. // delete Iterator after CF handle is dropped
  744. delete iter;
  745. }
  746. // SetOptions not defined in ROCKSDB LITE
  747. #ifndef ROCKSDB_LITE
  748. TEST_P(DBIteratorTest, DBIteratorBoundTest) {
  749. Options options = CurrentOptions();
  750. options.env = env_;
  751. options.create_if_missing = true;
  752. options.prefix_extractor = nullptr;
  753. DestroyAndReopen(options);
  754. ASSERT_OK(Put("a", "0"));
  755. ASSERT_OK(Put("foo", "bar"));
  756. ASSERT_OK(Put("foo1", "bar1"));
  757. ASSERT_OK(Put("g1", "0"));
  758. // testing basic case with no iterate_upper_bound and no prefix_extractor
  759. {
  760. ReadOptions ro;
  761. ro.iterate_upper_bound = nullptr;
  762. std::unique_ptr<Iterator> iter(NewIterator(ro));
  763. iter->Seek("foo");
  764. ASSERT_TRUE(iter->Valid());
  765. ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
  766. iter->Next();
  767. ASSERT_TRUE(iter->Valid());
  768. ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
  769. iter->Next();
  770. ASSERT_TRUE(iter->Valid());
  771. ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
  772. iter->SeekForPrev("g1");
  773. ASSERT_TRUE(iter->Valid());
  774. ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
  775. iter->Prev();
  776. ASSERT_TRUE(iter->Valid());
  777. ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
  778. iter->Prev();
  779. ASSERT_TRUE(iter->Valid());
  780. ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
  781. }
  782. // testing iterate_upper_bound and forward iterator
  783. // to make sure it stops at bound
  784. {
  785. ReadOptions ro;
  786. // iterate_upper_bound points beyond the last expected entry
  787. Slice prefix("foo2");
  788. ro.iterate_upper_bound = &prefix;
  789. std::unique_ptr<Iterator> iter(NewIterator(ro));
  790. iter->Seek("foo");
  791. ASSERT_TRUE(iter->Valid());
  792. ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
  793. iter->Next();
  794. ASSERT_TRUE(iter->Valid());
  795. ASSERT_EQ(iter->key().compare(("foo1")), 0);
  796. iter->Next();
  797. // should stop here...
  798. ASSERT_TRUE(!iter->Valid());
  799. }
  800. // Testing SeekToLast with iterate_upper_bound set
  801. {
  802. ReadOptions ro;
  803. Slice prefix("foo");
  804. ro.iterate_upper_bound = &prefix;
  805. std::unique_ptr<Iterator> iter(NewIterator(ro));
  806. iter->SeekToLast();
  807. ASSERT_TRUE(iter->Valid());
  808. ASSERT_EQ(iter->key().compare(Slice("a")), 0);
  809. }
  810. // prefix is the first letter of the key
  811. ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
  812. ASSERT_OK(Put("a", "0"));
  813. ASSERT_OK(Put("foo", "bar"));
  814. ASSERT_OK(Put("foo1", "bar1"));
  815. ASSERT_OK(Put("g1", "0"));
  816. // testing with iterate_upper_bound and prefix_extractor
  817. // Seek target and iterate_upper_bound are not is same prefix
  818. // This should be an error
  819. {
  820. ReadOptions ro;
  821. Slice upper_bound("g");
  822. ro.iterate_upper_bound = &upper_bound;
  823. std::unique_ptr<Iterator> iter(NewIterator(ro));
  824. iter->Seek("foo");
  825. ASSERT_TRUE(iter->Valid());
  826. ASSERT_EQ("foo", iter->key().ToString());
  827. iter->Next();
  828. ASSERT_TRUE(iter->Valid());
  829. ASSERT_EQ("foo1", iter->key().ToString());
  830. iter->Next();
  831. ASSERT_TRUE(!iter->Valid());
  832. }
  833. // testing that iterate_upper_bound prevents iterating over deleted items
  834. // if the bound has already reached
  835. {
  836. options.prefix_extractor = nullptr;
  837. DestroyAndReopen(options);
  838. ASSERT_OK(Put("a", "0"));
  839. ASSERT_OK(Put("b", "0"));
  840. ASSERT_OK(Put("b1", "0"));
  841. ASSERT_OK(Put("c", "0"));
  842. ASSERT_OK(Put("d", "0"));
  843. ASSERT_OK(Put("e", "0"));
  844. ASSERT_OK(Delete("c"));
  845. ASSERT_OK(Delete("d"));
  846. // base case with no bound
  847. ReadOptions ro;
  848. ro.iterate_upper_bound = nullptr;
  849. std::unique_ptr<Iterator> iter(NewIterator(ro));
  850. iter->Seek("b");
  851. ASSERT_TRUE(iter->Valid());
  852. ASSERT_EQ(iter->key().compare(Slice("b")), 0);
  853. iter->Next();
  854. ASSERT_TRUE(iter->Valid());
  855. ASSERT_EQ(iter->key().compare(("b1")), 0);
  856. get_perf_context()->Reset();
  857. iter->Next();
  858. ASSERT_TRUE(iter->Valid());
  859. ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
  860. // now testing with iterate_bound
  861. Slice prefix("c");
  862. ro.iterate_upper_bound = &prefix;
  863. iter.reset(NewIterator(ro));
  864. get_perf_context()->Reset();
  865. iter->Seek("b");
  866. ASSERT_TRUE(iter->Valid());
  867. ASSERT_EQ(iter->key().compare(Slice("b")), 0);
  868. iter->Next();
  869. ASSERT_TRUE(iter->Valid());
  870. ASSERT_EQ(iter->key().compare(("b1")), 0);
  871. iter->Next();
  872. // the iteration should stop as soon as the bound key is reached
  873. // even though the key is deleted
  874. // hence internal_delete_skipped_count should be 0
  875. ASSERT_TRUE(!iter->Valid());
  876. ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
  877. }
  878. }
  879. TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
  880. Options options = CurrentOptions();
  881. options.env = env_;
  882. options.create_if_missing = true;
  883. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  884. options.prefix_extractor = nullptr;
  885. DestroyAndReopen(options);
  886. ASSERT_OK(Put("a", "0"));
  887. ASSERT_OK(Put("z", "0"));
  888. ASSERT_OK(Flush());
  889. ASSERT_OK(Put("foo1", "bar1"));
  890. ASSERT_OK(Put("foo2", "bar2"));
  891. ASSERT_OK(Put("foo3", "bar3"));
  892. ASSERT_OK(Put("foo4", "bar4"));
  893. {
  894. std::string up_str = "foo5";
  895. Slice up(up_str);
  896. ReadOptions ro;
  897. ro.iterate_upper_bound = &up;
  898. std::unique_ptr<Iterator> iter(NewIterator(ro));
  899. iter->Seek("foo1");
  900. ASSERT_TRUE(iter->Valid());
  901. ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
  902. uint64_t prev_block_cache_hit =
  903. TestGetTickerCount(options, BLOCK_CACHE_HIT);
  904. uint64_t prev_block_cache_miss =
  905. TestGetTickerCount(options, BLOCK_CACHE_MISS);
  906. ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
  907. iter->Seek("foo4");
  908. ASSERT_TRUE(iter->Valid());
  909. ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
  910. ASSERT_EQ(prev_block_cache_hit,
  911. TestGetTickerCount(options, BLOCK_CACHE_HIT));
  912. ASSERT_EQ(prev_block_cache_miss,
  913. TestGetTickerCount(options, BLOCK_CACHE_MISS));
  914. iter->Seek("foo2");
  915. ASSERT_TRUE(iter->Valid());
  916. ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
  917. iter->Next();
  918. ASSERT_TRUE(iter->Valid());
  919. ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
  920. ASSERT_EQ(prev_block_cache_hit,
  921. TestGetTickerCount(options, BLOCK_CACHE_HIT));
  922. ASSERT_EQ(prev_block_cache_miss,
  923. TestGetTickerCount(options, BLOCK_CACHE_MISS));
  924. }
  925. }
  926. #endif
  927. TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
  928. for (auto format_version : {2, 3, 4}) {
  929. int upper_bound_hits = 0;
  930. Options options = CurrentOptions();
  931. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  932. "BlockBasedTableIterator:out_of_bound",
  933. [&upper_bound_hits](void*) { upper_bound_hits++; });
  934. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  935. options.env = env_;
  936. options.create_if_missing = true;
  937. options.prefix_extractor = nullptr;
  938. BlockBasedTableOptions table_options;
  939. table_options.format_version = format_version;
  940. table_options.flush_block_policy_factory =
  941. std::make_shared<FlushBlockEveryKeyPolicyFactory>();
  942. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  943. DestroyAndReopen(options);
  944. ASSERT_OK(Put("foo1", "bar1"));
  945. ASSERT_OK(Put("foo2", "bar2"));
  946. ASSERT_OK(Put("foo4", "bar4"));
  947. ASSERT_OK(Flush());
  948. Slice ub("foo3");
  949. ReadOptions ro;
  950. ro.iterate_upper_bound = &ub;
  951. std::unique_ptr<Iterator> iter(NewIterator(ro));
  952. iter->Seek("foo");
  953. ASSERT_TRUE(iter->Valid());
  954. ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
  955. ASSERT_EQ(upper_bound_hits, 0);
  956. iter->Next();
  957. ASSERT_TRUE(iter->Valid());
  958. ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
  959. ASSERT_EQ(upper_bound_hits, 0);
  960. iter->Next();
  961. ASSERT_FALSE(iter->Valid());
  962. ASSERT_EQ(upper_bound_hits, 1);
  963. }
  964. }
  965. // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
  966. // they don't do unnecessary block reads.
  967. TEST_P(DBIteratorTest, IndexWithFirstKey) {
  968. for (int tailing = 0; tailing < 2; ++tailing) {
  969. SCOPED_TRACE("tailing = " + std::to_string(tailing));
  970. Options options = CurrentOptions();
  971. options.env = env_;
  972. options.create_if_missing = true;
  973. options.prefix_extractor = nullptr;
  974. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  975. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  976. Statistics* stats = options.statistics.get();
  977. BlockBasedTableOptions table_options;
  978. table_options.index_type =
  979. BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
  980. table_options.index_shortening =
  981. BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
  982. table_options.flush_block_policy_factory =
  983. std::make_shared<FlushBlockEveryKeyPolicyFactory>();
  984. table_options.block_cache =
  985. NewLRUCache(8000); // fits all blocks and their cache metadata overhead
  986. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  987. DestroyAndReopen(options);
  988. ASSERT_OK(Merge("a1", "x1"));
  989. ASSERT_OK(Merge("b1", "y1"));
  990. ASSERT_OK(Merge("c0", "z1"));
  991. ASSERT_OK(Flush());
  992. ASSERT_OK(Merge("a2", "x2"));
  993. ASSERT_OK(Merge("b2", "y2"));
  994. ASSERT_OK(Merge("c0", "z2"));
  995. ASSERT_OK(Flush());
  996. ASSERT_OK(Merge("a3", "x3"));
  997. ASSERT_OK(Merge("b3", "y3"));
  998. ASSERT_OK(Merge("c3", "z3"));
  999. ASSERT_OK(Flush());
  1000. // Block cache is not important for this test.
  1001. // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
  1002. // available way of counting block accesses.
  1003. ReadOptions ropt;
  1004. ropt.tailing = tailing;
  1005. std::unique_ptr<Iterator> iter(NewIterator(ropt));
  1006. iter->Seek("b10");
  1007. ASSERT_TRUE(iter->Valid());
  1008. EXPECT_EQ("b2", iter->key().ToString());
  1009. EXPECT_EQ("y2", iter->value().ToString());
  1010. EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1011. iter->Next();
  1012. ASSERT_TRUE(iter->Valid());
  1013. EXPECT_EQ("b3", iter->key().ToString());
  1014. EXPECT_EQ("y3", iter->value().ToString());
  1015. EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1016. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1017. iter->Seek("c0");
  1018. ASSERT_TRUE(iter->Valid());
  1019. EXPECT_EQ("c0", iter->key().ToString());
  1020. EXPECT_EQ("z1,z2", iter->value().ToString());
  1021. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1022. EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1023. iter->Next();
  1024. ASSERT_TRUE(iter->Valid());
  1025. EXPECT_EQ("c3", iter->key().ToString());
  1026. EXPECT_EQ("z3", iter->value().ToString());
  1027. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1028. EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1029. iter.reset();
  1030. // Enable iterate_upper_bound and check that iterator is not trying to read
  1031. // blocks that are fully above upper bound.
  1032. std::string ub = "b3";
  1033. Slice ub_slice(ub);
  1034. ropt.iterate_upper_bound = &ub_slice;
  1035. iter.reset(NewIterator(ropt));
  1036. iter->Seek("b2");
  1037. ASSERT_TRUE(iter->Valid());
  1038. EXPECT_EQ("b2", iter->key().ToString());
  1039. EXPECT_EQ("y2", iter->value().ToString());
  1040. EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1041. EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1042. iter->Next();
  1043. ASSERT_FALSE(iter->Valid());
  1044. EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1045. EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1046. }
  1047. }
  1048. TEST_P(DBIteratorTest, IndexWithFirstKeyGet) {
  1049. Options options = CurrentOptions();
  1050. options.env = env_;
  1051. options.create_if_missing = true;
  1052. options.prefix_extractor = nullptr;
  1053. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  1054. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1055. Statistics* stats = options.statistics.get();
  1056. BlockBasedTableOptions table_options;
  1057. table_options.index_type =
  1058. BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
  1059. table_options.index_shortening =
  1060. BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
  1061. table_options.flush_block_policy_factory =
  1062. std::make_shared<FlushBlockEveryKeyPolicyFactory>();
  1063. table_options.block_cache = NewLRUCache(1000); // fits all blocks
  1064. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1065. DestroyAndReopen(options);
  1066. ASSERT_OK(Merge("a", "x1"));
  1067. ASSERT_OK(Merge("c", "y1"));
  1068. ASSERT_OK(Merge("e", "z1"));
  1069. ASSERT_OK(Flush());
  1070. ASSERT_OK(Merge("c", "y2"));
  1071. ASSERT_OK(Merge("e", "z2"));
  1072. ASSERT_OK(Flush());
  1073. // Get() between blocks shouldn't read any blocks.
  1074. ASSERT_EQ("NOT_FOUND", Get("b"));
  1075. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1076. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1077. // Get() of an existing key shouldn't read any unnecessary blocks when there's
  1078. // only one key per block.
  1079. ASSERT_EQ("y1,y2", Get("c"));
  1080. EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1081. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1082. ASSERT_EQ("x1", Get("a"));
  1083. EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
  1084. EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
  1085. EXPECT_EQ(std::vector<std::string>({"NOT_FOUND", "z1,z2"}),
  1086. MultiGet({"b", "e"}));
  1087. }
  1088. // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
  1089. // return the biggest key which is smaller than the seek key.
  1090. TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
  1091. Options options;
  1092. options.create_if_missing = true;
  1093. options.merge_operator = MergeOperators::CreatePutOperator();
  1094. options.env = env_;
  1095. DestroyAndReopen(options);
  1096. // write three entries with different keys using Merge()
  1097. WriteOptions wopts;
  1098. db_->Merge(wopts, "1", "data1");
  1099. db_->Merge(wopts, "2", "data2");
  1100. db_->Merge(wopts, "3", "data3");
  1101. std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
  1102. it->Seek("2");
  1103. ASSERT_TRUE(it->Valid());
  1104. ASSERT_EQ("2", it->key().ToString());
  1105. it->Prev();
  1106. ASSERT_TRUE(it->Valid());
  1107. ASSERT_EQ("1", it->key().ToString());
  1108. it->SeekForPrev("1");
  1109. ASSERT_TRUE(it->Valid());
  1110. ASSERT_EQ("1", it->key().ToString());
  1111. it->Next();
  1112. ASSERT_TRUE(it->Valid());
  1113. ASSERT_EQ("2", it->key().ToString());
  1114. }
  1115. class DBIteratorTestForPinnedData : public DBIteratorTest {
  1116. public:
  1117. enum TestConfig {
  1118. NORMAL,
  1119. CLOSE_AND_OPEN,
  1120. COMPACT_BEFORE_READ,
  1121. FLUSH_EVERY_1000,
  1122. MAX
  1123. };
  1124. DBIteratorTestForPinnedData() : DBIteratorTest() {}
  1125. void PinnedDataIteratorRandomized(TestConfig run_config) {
  1126. // Generate Random data
  1127. Random rnd(301);
  1128. int puts = 100000;
  1129. int key_pool = static_cast<int>(puts * 0.7);
  1130. int key_size = 100;
  1131. int val_size = 1000;
  1132. int seeks_percentage = 20; // 20% of keys will be used to test seek()
  1133. int delete_percentage = 20; // 20% of keys will be deleted
  1134. int merge_percentage = 20; // 20% of keys will be added using Merge()
  1135. Options options = CurrentOptions();
  1136. BlockBasedTableOptions table_options;
  1137. table_options.use_delta_encoding = false;
  1138. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1139. options.merge_operator = MergeOperators::CreatePutOperator();
  1140. DestroyAndReopen(options);
  1141. std::vector<std::string> generated_keys(key_pool);
  1142. for (int i = 0; i < key_pool; i++) {
  1143. generated_keys[i] = RandomString(&rnd, key_size);
  1144. }
  1145. std::map<std::string, std::string> true_data;
  1146. std::vector<std::string> random_keys;
  1147. std::vector<std::string> deleted_keys;
  1148. for (int i = 0; i < puts; i++) {
  1149. auto& k = generated_keys[rnd.Next() % key_pool];
  1150. auto v = RandomString(&rnd, val_size);
  1151. // Insert data to true_data map and to DB
  1152. true_data[k] = v;
  1153. if (rnd.PercentTrue(merge_percentage)) {
  1154. ASSERT_OK(db_->Merge(WriteOptions(), k, v));
  1155. } else {
  1156. ASSERT_OK(Put(k, v));
  1157. }
  1158. // Pick random keys to be used to test Seek()
  1159. if (rnd.PercentTrue(seeks_percentage)) {
  1160. random_keys.push_back(k);
  1161. }
  1162. // Delete some random keys
  1163. if (rnd.PercentTrue(delete_percentage)) {
  1164. deleted_keys.push_back(k);
  1165. true_data.erase(k);
  1166. ASSERT_OK(Delete(k));
  1167. }
  1168. if (run_config == TestConfig::FLUSH_EVERY_1000) {
  1169. if (i && i % 1000 == 0) {
  1170. Flush();
  1171. }
  1172. }
  1173. }
  1174. if (run_config == TestConfig::CLOSE_AND_OPEN) {
  1175. Close();
  1176. Reopen(options);
  1177. } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
  1178. db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
  1179. }
  1180. ReadOptions ro;
  1181. ro.pin_data = true;
  1182. auto iter = NewIterator(ro);
  1183. {
  1184. // Test Seek to random keys
  1185. std::vector<Slice> keys_slices;
  1186. std::vector<std::string> true_keys;
  1187. for (auto& k : random_keys) {
  1188. iter->Seek(k);
  1189. if (!iter->Valid()) {
  1190. ASSERT_EQ(true_data.lower_bound(k), true_data.end());
  1191. continue;
  1192. }
  1193. std::string prop_value;
  1194. ASSERT_OK(
  1195. iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1196. ASSERT_EQ("1", prop_value);
  1197. keys_slices.push_back(iter->key());
  1198. true_keys.push_back(true_data.lower_bound(k)->first);
  1199. }
  1200. for (size_t i = 0; i < keys_slices.size(); i++) {
  1201. ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
  1202. }
  1203. }
  1204. {
  1205. // Test SeekForPrev to random keys
  1206. std::vector<Slice> keys_slices;
  1207. std::vector<std::string> true_keys;
  1208. for (auto& k : random_keys) {
  1209. iter->SeekForPrev(k);
  1210. if (!iter->Valid()) {
  1211. ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
  1212. continue;
  1213. }
  1214. std::string prop_value;
  1215. ASSERT_OK(
  1216. iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1217. ASSERT_EQ("1", prop_value);
  1218. keys_slices.push_back(iter->key());
  1219. true_keys.push_back((--true_data.upper_bound(k))->first);
  1220. }
  1221. for (size_t i = 0; i < keys_slices.size(); i++) {
  1222. ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
  1223. }
  1224. }
  1225. {
  1226. // Test iterating all data forward
  1227. std::vector<Slice> all_keys;
  1228. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  1229. std::string prop_value;
  1230. ASSERT_OK(
  1231. iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1232. ASSERT_EQ("1", prop_value);
  1233. all_keys.push_back(iter->key());
  1234. }
  1235. ASSERT_EQ(all_keys.size(), true_data.size());
  1236. // Verify that all keys slices are valid
  1237. auto data_iter = true_data.begin();
  1238. for (size_t i = 0; i < all_keys.size(); i++) {
  1239. ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
  1240. data_iter++;
  1241. }
  1242. }
  1243. {
  1244. // Test iterating all data backward
  1245. std::vector<Slice> all_keys;
  1246. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  1247. std::string prop_value;
  1248. ASSERT_OK(
  1249. iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1250. ASSERT_EQ("1", prop_value);
  1251. all_keys.push_back(iter->key());
  1252. }
  1253. ASSERT_EQ(all_keys.size(), true_data.size());
  1254. // Verify that all keys slices are valid (backward)
  1255. auto data_iter = true_data.rbegin();
  1256. for (size_t i = 0; i < all_keys.size(); i++) {
  1257. ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
  1258. data_iter++;
  1259. }
  1260. }
  1261. delete iter;
  1262. }
  1263. };
  1264. TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
  1265. PinnedDataIteratorRandomized(TestConfig::NORMAL);
  1266. }
  1267. TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
  1268. PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
  1269. }
  1270. TEST_P(DBIteratorTestForPinnedData,
  1271. PinnedDataIteratorRandomizedCompactBeforeRead) {
  1272. PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
  1273. }
  1274. TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
  1275. PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
  1276. }
  1277. #ifndef ROCKSDB_LITE
  1278. TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
  1279. Options options = CurrentOptions();
  1280. BlockBasedTableOptions table_options;
  1281. table_options.use_delta_encoding = false;
  1282. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1283. options.disable_auto_compactions = true;
  1284. options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
  1285. DestroyAndReopen(options);
  1286. std::map<std::string, std::string> true_data;
  1287. // Generate 4 sst files in L2
  1288. Random rnd(301);
  1289. for (int i = 1; i <= 1000; i++) {
  1290. std::string k = Key(i * 3);
  1291. std::string v = RandomString(&rnd, 100);
  1292. ASSERT_OK(Put(k, v));
  1293. true_data[k] = v;
  1294. if (i % 250 == 0) {
  1295. ASSERT_OK(Flush());
  1296. }
  1297. }
  1298. ASSERT_EQ(FilesPerLevel(0), "4");
  1299. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  1300. ASSERT_EQ(FilesPerLevel(0), "0,4");
  1301. // Generate 4 sst files in L0
  1302. for (int i = 1; i <= 1000; i++) {
  1303. std::string k = Key(i * 2);
  1304. std::string v = RandomString(&rnd, 100);
  1305. ASSERT_OK(Put(k, v));
  1306. true_data[k] = v;
  1307. if (i % 250 == 0) {
  1308. ASSERT_OK(Flush());
  1309. }
  1310. }
  1311. ASSERT_EQ(FilesPerLevel(0), "4,4");
  1312. // Add some keys/values in memtables
  1313. for (int i = 1; i <= 1000; i++) {
  1314. std::string k = Key(i);
  1315. std::string v = RandomString(&rnd, 100);
  1316. ASSERT_OK(Put(k, v));
  1317. true_data[k] = v;
  1318. }
  1319. ASSERT_EQ(FilesPerLevel(0), "4,4");
  1320. ReadOptions ro;
  1321. ro.pin_data = true;
  1322. auto iter = NewIterator(ro);
  1323. std::vector<std::pair<Slice, std::string>> results;
  1324. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  1325. std::string prop_value;
  1326. ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1327. ASSERT_EQ("1", prop_value);
  1328. results.emplace_back(iter->key(), iter->value().ToString());
  1329. }
  1330. ASSERT_EQ(results.size(), true_data.size());
  1331. auto data_iter = true_data.begin();
  1332. for (size_t i = 0; i < results.size(); i++, data_iter++) {
  1333. auto& kv = results[i];
  1334. ASSERT_EQ(kv.first, data_iter->first);
  1335. ASSERT_EQ(kv.second, data_iter->second);
  1336. }
  1337. delete iter;
  1338. }
  1339. #endif
  1340. TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
  1341. Options options = CurrentOptions();
  1342. BlockBasedTableOptions table_options;
  1343. table_options.use_delta_encoding = false;
  1344. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1345. options.merge_operator = MergeOperators::CreateUInt64AddOperator();
  1346. DestroyAndReopen(options);
  1347. std::string numbers[7];
  1348. for (int val = 0; val <= 6; val++) {
  1349. PutFixed64(numbers + val, val);
  1350. }
  1351. // +1 all keys in range [ 0 => 999]
  1352. for (int i = 0; i < 1000; i++) {
  1353. WriteOptions wo;
  1354. ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
  1355. }
  1356. // +2 all keys divisible by 2 in range [ 0 => 999]
  1357. for (int i = 0; i < 1000; i += 2) {
  1358. WriteOptions wo;
  1359. ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
  1360. }
  1361. // +3 all keys divisible by 5 in range [ 0 => 999]
  1362. for (int i = 0; i < 1000; i += 5) {
  1363. WriteOptions wo;
  1364. ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
  1365. }
  1366. ReadOptions ro;
  1367. ro.pin_data = true;
  1368. auto iter = NewIterator(ro);
  1369. std::vector<std::pair<Slice, std::string>> results;
  1370. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  1371. std::string prop_value;
  1372. ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1373. ASSERT_EQ("1", prop_value);
  1374. results.emplace_back(iter->key(), iter->value().ToString());
  1375. }
  1376. ASSERT_EQ(results.size(), 1000);
  1377. for (size_t i = 0; i < results.size(); i++) {
  1378. auto& kv = results[i];
  1379. ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
  1380. int expected_val = 1;
  1381. if (i % 2 == 0) {
  1382. expected_val += 2;
  1383. }
  1384. if (i % 5 == 0) {
  1385. expected_val += 3;
  1386. }
  1387. ASSERT_EQ(kv.second, numbers[expected_val]);
  1388. }
  1389. delete iter;
  1390. }
  1391. TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
  1392. Options options = CurrentOptions();
  1393. BlockBasedTableOptions table_options;
  1394. table_options.use_delta_encoding = false;
  1395. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1396. options.write_buffer_size = 100000;
  1397. DestroyAndReopen(options);
  1398. Random rnd(301);
  1399. std::map<std::string, std::string> true_data;
  1400. for (int i = 0; i < 1000; i++) {
  1401. std::string k = RandomString(&rnd, 10);
  1402. std::string v = RandomString(&rnd, 1000);
  1403. ASSERT_OK(Put(k, v));
  1404. true_data[k] = v;
  1405. }
  1406. ReadOptions ro;
  1407. ro.pin_data = true;
  1408. auto iter = NewIterator(ro);
  1409. // Delete 50% of the keys and update the other 50%
  1410. for (auto& kv : true_data) {
  1411. if (rnd.OneIn(2)) {
  1412. ASSERT_OK(Delete(kv.first));
  1413. } else {
  1414. std::string new_val = RandomString(&rnd, 1000);
  1415. ASSERT_OK(Put(kv.first, new_val));
  1416. }
  1417. }
  1418. std::vector<std::pair<Slice, std::string>> results;
  1419. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  1420. std::string prop_value;
  1421. ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
  1422. ASSERT_EQ("1", prop_value);
  1423. results.emplace_back(iter->key(), iter->value().ToString());
  1424. }
  1425. auto data_iter = true_data.begin();
  1426. for (size_t i = 0; i < results.size(); i++, data_iter++) {
  1427. auto& kv = results[i];
  1428. ASSERT_EQ(kv.first, data_iter->first);
  1429. ASSERT_EQ(kv.second, data_iter->second);
  1430. }
  1431. delete iter;
  1432. }
  1433. class SliceTransformLimitedDomainGeneric : public SliceTransform {
  1434. const char* Name() const override {
  1435. return "SliceTransformLimitedDomainGeneric";
  1436. }
  1437. Slice Transform(const Slice& src) const override {
  1438. return Slice(src.data(), 1);
  1439. }
  1440. bool InDomain(const Slice& src) const override {
  1441. // prefix will be x????
  1442. return src.size() >= 1;
  1443. }
  1444. bool InRange(const Slice& dst) const override {
  1445. // prefix will be x????
  1446. return dst.size() == 1;
  1447. }
  1448. };
  1449. TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
  1450. Options options = CurrentOptions();
  1451. options.prefix_extractor.reset(NewFixedPrefixTransform(1));
  1452. options.disable_auto_compactions = true;
  1453. // Enable prefix bloom for SST files
  1454. BlockBasedTableOptions table_options;
  1455. table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  1456. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1457. DestroyAndReopen(options);
  1458. ASSERT_OK(Put("a1", "va1"));
  1459. ASSERT_OK(Put("a2", "va2"));
  1460. ASSERT_OK(Put("a3", "va3"));
  1461. ASSERT_OK(Flush());
  1462. ASSERT_OK(Put("b1", "vb1"));
  1463. ASSERT_OK(Put("b2", "vb2"));
  1464. ASSERT_OK(Put("b3", "vb3"));
  1465. ASSERT_OK(Flush());
  1466. ASSERT_OK(Put("b4", "vb4"));
  1467. ASSERT_OK(Put("d1", "vd1"));
  1468. ASSERT_OK(Put("d2", "vd2"));
  1469. ASSERT_OK(Put("d4", "vd4"));
  1470. ASSERT_OK(Flush());
  1471. MoveFilesToLevel(1);
  1472. {
  1473. ReadOptions ro;
  1474. Iterator* iter = NewIterator(ro);
  1475. iter->SeekForPrev("a4");
  1476. ASSERT_EQ(iter->key().ToString(), "a3");
  1477. ASSERT_EQ(iter->value().ToString(), "va3");
  1478. iter->SeekForPrev("c2");
  1479. ASSERT_EQ(iter->key().ToString(), "b3");
  1480. iter->SeekForPrev("d3");
  1481. ASSERT_EQ(iter->key().ToString(), "d2");
  1482. iter->SeekForPrev("b5");
  1483. ASSERT_EQ(iter->key().ToString(), "b4");
  1484. delete iter;
  1485. }
  1486. {
  1487. ReadOptions ro;
  1488. ro.prefix_same_as_start = true;
  1489. Iterator* iter = NewIterator(ro);
  1490. iter->SeekForPrev("c2");
  1491. ASSERT_TRUE(!iter->Valid());
  1492. delete iter;
  1493. }
  1494. }
  1495. TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
  1496. Options options = CurrentOptions();
  1497. options.prefix_extractor =
  1498. std::make_shared<SliceTransformLimitedDomainGeneric>();
  1499. options.disable_auto_compactions = true;
  1500. // Enable prefix bloom for SST files
  1501. BlockBasedTableOptions table_options;
  1502. table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
  1503. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1504. DestroyAndReopen(options);
  1505. ASSERT_OK(Put("a1", "va1"));
  1506. ASSERT_OK(Put("a2", "va2"));
  1507. ASSERT_OK(Put("a3", "va3"));
  1508. ASSERT_OK(Flush());
  1509. ASSERT_OK(Put("b1", "vb1"));
  1510. ASSERT_OK(Put("b2", "vb2"));
  1511. ASSERT_OK(Put("b3", "vb3"));
  1512. ASSERT_OK(Flush());
  1513. ASSERT_OK(Put("b4", "vb4"));
  1514. ASSERT_OK(Put("d1", "vd1"));
  1515. ASSERT_OK(Put("d2", "vd2"));
  1516. ASSERT_OK(Put("d4", "vd4"));
  1517. ASSERT_OK(Flush());
  1518. MoveFilesToLevel(1);
  1519. {
  1520. ReadOptions ro;
  1521. Iterator* iter = NewIterator(ro);
  1522. iter->SeekForPrev("a4");
  1523. ASSERT_EQ(iter->key().ToString(), "a3");
  1524. ASSERT_EQ(iter->value().ToString(), "va3");
  1525. iter->SeekForPrev("c2");
  1526. ASSERT_EQ(iter->key().ToString(), "b3");
  1527. iter->SeekForPrev("d3");
  1528. ASSERT_EQ(iter->key().ToString(), "d2");
  1529. iter->SeekForPrev("b5");
  1530. ASSERT_EQ(iter->key().ToString(), "b4");
  1531. delete iter;
  1532. }
  1533. {
  1534. ReadOptions ro;
  1535. ro.prefix_same_as_start = true;
  1536. Iterator* iter = NewIterator(ro);
  1537. iter->SeekForPrev("c2");
  1538. ASSERT_TRUE(!iter->Valid());
  1539. delete iter;
  1540. }
  1541. }
  1542. TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
  1543. Options options = CurrentOptions();
  1544. BlockBasedTableOptions table_options;
  1545. table_options.block_size = 1; // every block will contain one entry
  1546. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  1547. options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
  1548. options.disable_auto_compactions = true;
  1549. options.max_sequential_skip_in_iterations = 8;
  1550. DestroyAndReopen(options);
  1551. // Putting such deletes will force DBIter::Prev() to fallback to a Seek
  1552. for (int file_num = 0; file_num < 10; file_num++) {
  1553. ASSERT_OK(Delete("key4"));
  1554. ASSERT_OK(Flush());
  1555. }
  1556. // First File containing 5 blocks of puts
  1557. ASSERT_OK(Put("key1", "val1.0"));
  1558. ASSERT_OK(Put("key2", "val2.0"));
  1559. ASSERT_OK(Put("key3", "val3.0"));
  1560. ASSERT_OK(Put("key4", "val4.0"));
  1561. ASSERT_OK(Put("key5", "val5.0"));
  1562. ASSERT_OK(Flush());
  1563. // Second file containing 9 blocks of merge operands
  1564. ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
  1565. ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
  1566. ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
  1567. ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
  1568. ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
  1569. ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
  1570. ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
  1571. ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
  1572. ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
  1573. ASSERT_OK(Flush());
  1574. {
  1575. ReadOptions ro;
  1576. ro.fill_cache = false;
  1577. Iterator* iter = NewIterator(ro);
  1578. iter->SeekToLast();
  1579. ASSERT_EQ(iter->key().ToString(), "key5");
  1580. ASSERT_EQ(iter->value().ToString(), "val5.0");
  1581. iter->Prev();
  1582. ASSERT_EQ(iter->key().ToString(), "key4");
  1583. ASSERT_EQ(iter->value().ToString(), "val4.0");
  1584. iter->Prev();
  1585. ASSERT_EQ(iter->key().ToString(), "key3");
  1586. ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
  1587. iter->Prev();
  1588. ASSERT_EQ(iter->key().ToString(), "key2");
  1589. ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
  1590. iter->Prev();
  1591. ASSERT_EQ(iter->key().ToString(), "key1");
  1592. ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
  1593. delete iter;
  1594. }
  1595. }
  1596. TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
  1597. Options options = CurrentOptions();
  1598. options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
  1599. options.disable_auto_compactions = true;
  1600. options.level0_slowdown_writes_trigger = (1 << 30);
  1601. options.level0_stop_writes_trigger = (1 << 30);
  1602. options.max_sequential_skip_in_iterations = 8;
  1603. DestroyAndReopen(options);
  1604. const int kNumKeys = 500;
  1605. // Small number of merge operands to make sure that DBIter::Prev() dont
  1606. // fall back to Seek()
  1607. const int kNumMergeOperands = 3;
  1608. // Use value size that will make sure that every block contain 1 key
  1609. const int kValSize =
  1610. static_cast<int>(BlockBasedTableOptions().block_size) * 4;
  1611. // Percentage of keys that wont get merge operations
  1612. const int kNoMergeOpPercentage = 20;
  1613. // Percentage of keys that will be deleted
  1614. const int kDeletePercentage = 10;
  1615. // For half of the key range we will write multiple deletes first to
  1616. // force DBIter::Prev() to fall back to Seek()
  1617. for (int file_num = 0; file_num < 10; file_num++) {
  1618. for (int i = 0; i < kNumKeys; i += 2) {
  1619. ASSERT_OK(Delete(Key(i)));
  1620. }
  1621. ASSERT_OK(Flush());
  1622. }
  1623. Random rnd(301);
  1624. std::map<std::string, std::string> true_data;
  1625. std::string gen_key;
  1626. std::string gen_val;
  1627. for (int i = 0; i < kNumKeys; i++) {
  1628. gen_key = Key(i);
  1629. gen_val = RandomString(&rnd, kValSize);
  1630. ASSERT_OK(Put(gen_key, gen_val));
  1631. true_data[gen_key] = gen_val;
  1632. }
  1633. ASSERT_OK(Flush());
  1634. // Separate values and merge operands in different file so that we
  1635. // make sure that we dont merge them while flushing but actually
  1636. // merge them in the read path
  1637. for (int i = 0; i < kNumKeys; i++) {
  1638. if (rnd.PercentTrue(kNoMergeOpPercentage)) {
  1639. // Dont give merge operations for some keys
  1640. continue;
  1641. }
  1642. for (int j = 0; j < kNumMergeOperands; j++) {
  1643. gen_key = Key(i);
  1644. gen_val = RandomString(&rnd, kValSize);
  1645. ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
  1646. true_data[gen_key] += "," + gen_val;
  1647. }
  1648. }
  1649. ASSERT_OK(Flush());
  1650. for (int i = 0; i < kNumKeys; i++) {
  1651. if (rnd.PercentTrue(kDeletePercentage)) {
  1652. gen_key = Key(i);
  1653. ASSERT_OK(Delete(gen_key));
  1654. true_data.erase(gen_key);
  1655. }
  1656. }
  1657. ASSERT_OK(Flush());
  1658. {
  1659. ReadOptions ro;
  1660. ro.fill_cache = false;
  1661. Iterator* iter = NewIterator(ro);
  1662. auto data_iter = true_data.rbegin();
  1663. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  1664. ASSERT_EQ(iter->key().ToString(), data_iter->first);
  1665. ASSERT_EQ(iter->value().ToString(), data_iter->second);
  1666. data_iter++;
  1667. }
  1668. ASSERT_EQ(data_iter, true_data.rend());
  1669. delete iter;
  1670. }
  1671. {
  1672. ReadOptions ro;
  1673. ro.fill_cache = false;
  1674. Iterator* iter = NewIterator(ro);
  1675. auto data_iter = true_data.rbegin();
  1676. int entries_right = 0;
  1677. std::string seek_key;
  1678. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  1679. // Verify key/value of current position
  1680. ASSERT_EQ(iter->key().ToString(), data_iter->first);
  1681. ASSERT_EQ(iter->value().ToString(), data_iter->second);
  1682. bool restore_position_with_seek = rnd.Uniform(2);
  1683. if (restore_position_with_seek) {
  1684. seek_key = iter->key().ToString();
  1685. }
  1686. // Do some Next() operations the restore the iterator to orignal position
  1687. int next_count =
  1688. entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
  1689. for (int i = 0; i < next_count; i++) {
  1690. iter->Next();
  1691. data_iter--;
  1692. ASSERT_EQ(iter->key().ToString(), data_iter->first);
  1693. ASSERT_EQ(iter->value().ToString(), data_iter->second);
  1694. }
  1695. if (restore_position_with_seek) {
  1696. // Restore orignal position using Seek()
  1697. iter->Seek(seek_key);
  1698. for (int i = 0; i < next_count; i++) {
  1699. data_iter++;
  1700. }
  1701. ASSERT_EQ(iter->key().ToString(), data_iter->first);
  1702. ASSERT_EQ(iter->value().ToString(), data_iter->second);
  1703. } else {
  1704. // Restore original position using Prev()
  1705. for (int i = 0; i < next_count; i++) {
  1706. iter->Prev();
  1707. data_iter++;
  1708. ASSERT_EQ(iter->key().ToString(), data_iter->first);
  1709. ASSERT_EQ(iter->value().ToString(), data_iter->second);
  1710. }
  1711. }
  1712. entries_right++;
  1713. data_iter++;
  1714. }
  1715. ASSERT_EQ(data_iter, true_data.rend());
  1716. delete iter;
  1717. }
  1718. }
  1719. TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
  1720. Options options = CurrentOptions();
  1721. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1722. DestroyAndReopen(options);
  1723. Random rnd(301);
  1724. for (int i = 0; i < 1000; i++) {
  1725. // Key 10 bytes / Value 10 bytes
  1726. ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
  1727. }
  1728. std::atomic<uint64_t> total_next(0);
  1729. std::atomic<uint64_t> total_next_found(0);
  1730. std::atomic<uint64_t> total_prev(0);
  1731. std::atomic<uint64_t> total_prev_found(0);
  1732. std::atomic<uint64_t> total_bytes(0);
  1733. std::vector<port::Thread> threads;
  1734. std::function<void()> reader_func_next = [&]() {
  1735. SetPerfLevel(kEnableCount);
  1736. get_perf_context()->Reset();
  1737. Iterator* iter = NewIterator(ReadOptions());
  1738. iter->SeekToFirst();
  1739. // Seek will bump ITER_BYTES_READ
  1740. uint64_t bytes = 0;
  1741. bytes += iter->key().size();
  1742. bytes += iter->value().size();
  1743. while (true) {
  1744. iter->Next();
  1745. total_next++;
  1746. if (!iter->Valid()) {
  1747. break;
  1748. }
  1749. total_next_found++;
  1750. bytes += iter->key().size();
  1751. bytes += iter->value().size();
  1752. }
  1753. delete iter;
  1754. ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
  1755. SetPerfLevel(kDisable);
  1756. total_bytes += bytes;
  1757. };
  1758. std::function<void()> reader_func_prev = [&]() {
  1759. SetPerfLevel(kEnableCount);
  1760. Iterator* iter = NewIterator(ReadOptions());
  1761. iter->SeekToLast();
  1762. // Seek will bump ITER_BYTES_READ
  1763. uint64_t bytes = 0;
  1764. bytes += iter->key().size();
  1765. bytes += iter->value().size();
  1766. while (true) {
  1767. iter->Prev();
  1768. total_prev++;
  1769. if (!iter->Valid()) {
  1770. break;
  1771. }
  1772. total_prev_found++;
  1773. bytes += iter->key().size();
  1774. bytes += iter->value().size();
  1775. }
  1776. delete iter;
  1777. ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
  1778. SetPerfLevel(kDisable);
  1779. total_bytes += bytes;
  1780. };
  1781. for (int i = 0; i < 10; i++) {
  1782. threads.emplace_back(reader_func_next);
  1783. }
  1784. for (int i = 0; i < 15; i++) {
  1785. threads.emplace_back(reader_func_prev);
  1786. }
  1787. for (auto& t : threads) {
  1788. t.join();
  1789. }
  1790. ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
  1791. ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
  1792. (uint64_t)total_next_found);
  1793. ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
  1794. ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
  1795. (uint64_t)total_prev_found);
  1796. ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), (uint64_t)total_bytes);
  1797. }
  1798. TEST_P(DBIteratorTest, ReadAhead) {
  1799. Options options;
  1800. env_->count_random_reads_ = true;
  1801. options.env = env_;
  1802. options.disable_auto_compactions = true;
  1803. options.write_buffer_size = 4 << 20;
  1804. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1805. BlockBasedTableOptions table_options;
  1806. table_options.block_size = 1024;
  1807. table_options.no_block_cache = true;
  1808. options.table_factory.reset(new BlockBasedTableFactory(table_options));
  1809. Reopen(options);
  1810. std::string value(1024, 'a');
  1811. for (int i = 0; i < 100; i++) {
  1812. Put(Key(i), value);
  1813. }
  1814. ASSERT_OK(Flush());
  1815. MoveFilesToLevel(2);
  1816. for (int i = 0; i < 100; i++) {
  1817. Put(Key(i), value);
  1818. }
  1819. ASSERT_OK(Flush());
  1820. MoveFilesToLevel(1);
  1821. for (int i = 0; i < 100; i++) {
  1822. Put(Key(i), value);
  1823. }
  1824. ASSERT_OK(Flush());
  1825. #ifndef ROCKSDB_LITE
  1826. ASSERT_EQ("1,1,1", FilesPerLevel());
  1827. #endif // !ROCKSDB_LITE
  1828. env_->random_read_bytes_counter_ = 0;
  1829. options.statistics->setTickerCount(NO_FILE_OPENS, 0);
  1830. ReadOptions read_options;
  1831. auto* iter = NewIterator(read_options);
  1832. iter->SeekToFirst();
  1833. int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
  1834. size_t bytes_read = env_->random_read_bytes_counter_;
  1835. delete iter;
  1836. int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
  1837. env_->random_read_bytes_counter_ = 0;
  1838. options.statistics->setTickerCount(NO_FILE_OPENS, 0);
  1839. read_options.readahead_size = 1024 * 10;
  1840. iter = NewIterator(read_options);
  1841. iter->SeekToFirst();
  1842. int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
  1843. size_t bytes_read_readahead = env_->random_read_bytes_counter_;
  1844. delete iter;
  1845. int64_t num_file_closes_readahead =
  1846. TestGetTickerCount(options, NO_FILE_CLOSES);
  1847. ASSERT_EQ(num_file_opens, num_file_opens_readahead);
  1848. ASSERT_EQ(num_file_closes, num_file_closes_readahead);
  1849. ASSERT_GT(bytes_read_readahead, bytes_read);
  1850. ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
  1851. // Verify correctness.
  1852. iter = NewIterator(read_options);
  1853. int count = 0;
  1854. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  1855. ASSERT_EQ(value, iter->value());
  1856. count++;
  1857. }
  1858. ASSERT_EQ(100, count);
  1859. for (int i = 0; i < 100; i++) {
  1860. iter->Seek(Key(i));
  1861. ASSERT_EQ(value, iter->value());
  1862. }
  1863. delete iter;
  1864. }
  1865. // Insert a key, create a snapshot iterator, overwrite key lots of times,
  1866. // seek to a smaller key. Expect DBIter to fall back to a seek instead of
  1867. // going through all the overwrites linearly.
  1868. TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
  1869. Options options = CurrentOptions();
  1870. options.env = env_;
  1871. options.create_if_missing = true;
  1872. options.max_sequential_skip_in_iterations = 3;
  1873. options.prefix_extractor = nullptr;
  1874. options.write_buffer_size = 1 << 27; // big enough to avoid flush
  1875. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1876. DestroyAndReopen(options);
  1877. // Insert.
  1878. ASSERT_OK(Put("b", "0"));
  1879. // Create iterator.
  1880. ReadOptions ro;
  1881. std::unique_ptr<Iterator> iter(NewIterator(ro));
  1882. // Insert a lot.
  1883. for (int i = 0; i < 100; ++i) {
  1884. ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
  1885. }
  1886. #ifndef ROCKSDB_LITE
  1887. // Check that memtable wasn't flushed.
  1888. std::string val;
  1889. ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
  1890. EXPECT_EQ("0", val);
  1891. #endif
  1892. // Seek iterator to a smaller key.
  1893. get_perf_context()->Reset();
  1894. iter->Seek("a");
  1895. ASSERT_TRUE(iter->Valid());
  1896. EXPECT_EQ("b", iter->key().ToString());
  1897. EXPECT_EQ("0", iter->value().ToString());
  1898. // Check that the seek didn't do too much work.
  1899. // Checks are not tight, just make sure that everything is well below 100.
  1900. EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
  1901. EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
  1902. EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
  1903. EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
  1904. EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
  1905. // Check that iterator did something like what we expect.
  1906. EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
  1907. EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
  1908. EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
  1909. EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
  1910. EXPECT_EQ(1, options.statistics->getTickerCount(
  1911. NUMBER_OF_RESEEKS_IN_ITERATION));
  1912. }
  1913. TEST_P(DBIteratorTest, Refresh) {
  1914. ASSERT_OK(Put("x", "y"));
  1915. std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
  1916. iter->Seek(Slice("a"));
  1917. ASSERT_TRUE(iter->Valid());
  1918. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1919. iter->Next();
  1920. ASSERT_FALSE(iter->Valid());
  1921. ASSERT_OK(Put("c", "d"));
  1922. iter->Seek(Slice("a"));
  1923. ASSERT_TRUE(iter->Valid());
  1924. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1925. iter->Next();
  1926. ASSERT_FALSE(iter->Valid());
  1927. iter->Refresh();
  1928. iter->Seek(Slice("a"));
  1929. ASSERT_TRUE(iter->Valid());
  1930. ASSERT_EQ(iter->key().compare(Slice("c")), 0);
  1931. iter->Next();
  1932. ASSERT_TRUE(iter->Valid());
  1933. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1934. iter->Next();
  1935. ASSERT_FALSE(iter->Valid());
  1936. dbfull()->Flush(FlushOptions());
  1937. ASSERT_OK(Put("m", "n"));
  1938. iter->Seek(Slice("a"));
  1939. ASSERT_TRUE(iter->Valid());
  1940. ASSERT_EQ(iter->key().compare(Slice("c")), 0);
  1941. iter->Next();
  1942. ASSERT_TRUE(iter->Valid());
  1943. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1944. iter->Next();
  1945. ASSERT_FALSE(iter->Valid());
  1946. iter->Refresh();
  1947. iter->Seek(Slice("a"));
  1948. ASSERT_TRUE(iter->Valid());
  1949. ASSERT_EQ(iter->key().compare(Slice("c")), 0);
  1950. iter->Next();
  1951. ASSERT_TRUE(iter->Valid());
  1952. ASSERT_EQ(iter->key().compare(Slice("m")), 0);
  1953. iter->Next();
  1954. ASSERT_TRUE(iter->Valid());
  1955. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1956. iter->Next();
  1957. ASSERT_FALSE(iter->Valid());
  1958. iter.reset();
  1959. }
  1960. TEST_P(DBIteratorTest, RefreshWithSnapshot) {
  1961. ASSERT_OK(Put("x", "y"));
  1962. const Snapshot* snapshot = db_->GetSnapshot();
  1963. ReadOptions options;
  1964. options.snapshot = snapshot;
  1965. Iterator* iter = NewIterator(options);
  1966. iter->Seek(Slice("a"));
  1967. ASSERT_TRUE(iter->Valid());
  1968. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1969. iter->Next();
  1970. ASSERT_FALSE(iter->Valid());
  1971. ASSERT_OK(Put("c", "d"));
  1972. iter->Seek(Slice("a"));
  1973. ASSERT_TRUE(iter->Valid());
  1974. ASSERT_EQ(iter->key().compare(Slice("x")), 0);
  1975. iter->Next();
  1976. ASSERT_FALSE(iter->Valid());
  1977. Status s;
  1978. s = iter->Refresh();
  1979. ASSERT_TRUE(s.IsNotSupported());
  1980. db_->ReleaseSnapshot(snapshot);
  1981. delete iter;
  1982. }
  1983. TEST_P(DBIteratorTest, CreationFailure) {
  1984. SyncPoint::GetInstance()->SetCallBack(
  1985. "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
  1986. *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
  1987. });
  1988. SyncPoint::GetInstance()->EnableProcessing();
  1989. Iterator* iter = NewIterator(ReadOptions());
  1990. ASSERT_FALSE(iter->Valid());
  1991. ASSERT_TRUE(iter->status().IsCorruption());
  1992. delete iter;
  1993. }
  1994. TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
  1995. Options options = CurrentOptions();
  1996. options.max_sequential_skip_in_iterations = 3;
  1997. DestroyAndReopen(options);
  1998. // write a bunch of kvs to the database.
  1999. ASSERT_OK(Put("a", "1"));
  2000. ASSERT_OK(Put("y", "1"));
  2001. ASSERT_OK(Put("y1", "1"));
  2002. ASSERT_OK(Put("y2", "1"));
  2003. ASSERT_OK(Put("y3", "1"));
  2004. ASSERT_OK(Put("z", "1"));
  2005. ASSERT_OK(Flush());
  2006. ASSERT_OK(Put("a", "1"));
  2007. ASSERT_OK(Put("z", "1"));
  2008. ASSERT_OK(Put("bar", "1"));
  2009. ASSERT_OK(Put("foo", "1"));
  2010. std::string upper_bound = "x";
  2011. Slice ub_slice(upper_bound);
  2012. ReadOptions ro;
  2013. ro.iterate_upper_bound = &ub_slice;
  2014. ro.max_skippable_internal_keys = 1000;
  2015. Iterator* iter = NewIterator(ro);
  2016. iter->Seek("foo");
  2017. ASSERT_TRUE(iter->Valid());
  2018. ASSERT_EQ("foo", iter->key().ToString());
  2019. iter->Prev();
  2020. ASSERT_TRUE(iter->Valid());
  2021. ASSERT_OK(iter->status());
  2022. ASSERT_EQ("bar", iter->key().ToString());
  2023. delete iter;
  2024. }
  2025. TEST_P(DBIteratorTest, TableFilter) {
  2026. ASSERT_OK(Put("a", "1"));
  2027. dbfull()->Flush(FlushOptions());
  2028. ASSERT_OK(Put("b", "2"));
  2029. ASSERT_OK(Put("c", "3"));
  2030. dbfull()->Flush(FlushOptions());
  2031. ASSERT_OK(Put("d", "4"));
  2032. ASSERT_OK(Put("e", "5"));
  2033. ASSERT_OK(Put("f", "6"));
  2034. dbfull()->Flush(FlushOptions());
  2035. // Ensure the table_filter callback is called once for each table.
  2036. {
  2037. std::set<uint64_t> unseen{1, 2, 3};
  2038. ReadOptions opts;
  2039. opts.table_filter = [&](const TableProperties& props) {
  2040. auto it = unseen.find(props.num_entries);
  2041. if (it == unseen.end()) {
  2042. ADD_FAILURE() << "saw table properties with an unexpected "
  2043. << props.num_entries << " entries";
  2044. } else {
  2045. unseen.erase(it);
  2046. }
  2047. return true;
  2048. };
  2049. auto iter = NewIterator(opts);
  2050. iter->SeekToFirst();
  2051. ASSERT_EQ(IterStatus(iter), "a->1");
  2052. iter->Next();
  2053. ASSERT_EQ(IterStatus(iter), "b->2");
  2054. iter->Next();
  2055. ASSERT_EQ(IterStatus(iter), "c->3");
  2056. iter->Next();
  2057. ASSERT_EQ(IterStatus(iter), "d->4");
  2058. iter->Next();
  2059. ASSERT_EQ(IterStatus(iter), "e->5");
  2060. iter->Next();
  2061. ASSERT_EQ(IterStatus(iter), "f->6");
  2062. iter->Next();
  2063. ASSERT_FALSE(iter->Valid());
  2064. ASSERT_TRUE(unseen.empty());
  2065. delete iter;
  2066. }
  2067. // Ensure returning false in the table_filter hides the keys from that table
  2068. // during iteration.
  2069. {
  2070. ReadOptions opts;
  2071. opts.table_filter = [](const TableProperties& props) {
  2072. return props.num_entries != 2;
  2073. };
  2074. auto iter = NewIterator(opts);
  2075. iter->SeekToFirst();
  2076. ASSERT_EQ(IterStatus(iter), "a->1");
  2077. iter->Next();
  2078. ASSERT_EQ(IterStatus(iter), "d->4");
  2079. iter->Next();
  2080. ASSERT_EQ(IterStatus(iter), "e->5");
  2081. iter->Next();
  2082. ASSERT_EQ(IterStatus(iter), "f->6");
  2083. iter->Next();
  2084. ASSERT_FALSE(iter->Valid());
  2085. delete iter;
  2086. }
  2087. }
  2088. TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
  2089. Options options = CurrentOptions();
  2090. options.max_sequential_skip_in_iterations = 3;
  2091. DestroyAndReopen(options);
  2092. // write a bunch of kvs to the database.
  2093. ASSERT_OK(Put("a", "1"));
  2094. ASSERT_OK(Put("y", "1"));
  2095. ASSERT_OK(Put("z", "1"));
  2096. ASSERT_OK(Flush());
  2097. ASSERT_OK(Put("a", "1"));
  2098. ASSERT_OK(Put("z", "1"));
  2099. ASSERT_OK(Put("bar", "1"));
  2100. ASSERT_OK(Put("foo", "1"));
  2101. ASSERT_OK(Put("foo", "2"));
  2102. ASSERT_OK(Put("foo", "3"));
  2103. ASSERT_OK(Put("foo", "4"));
  2104. ASSERT_OK(Put("foo", "5"));
  2105. const Snapshot* snapshot = db_->GetSnapshot();
  2106. ASSERT_OK(Put("foo", "6"));
  2107. std::string upper_bound = "x";
  2108. Slice ub_slice(upper_bound);
  2109. ReadOptions ro;
  2110. ro.snapshot = snapshot;
  2111. ro.iterate_upper_bound = &ub_slice;
  2112. Iterator* iter = NewIterator(ro);
  2113. iter->SeekForPrev("goo");
  2114. ASSERT_TRUE(iter->Valid());
  2115. ASSERT_EQ("foo", iter->key().ToString());
  2116. iter->Prev();
  2117. ASSERT_TRUE(iter->Valid());
  2118. ASSERT_EQ("bar", iter->key().ToString());
  2119. delete iter;
  2120. db_->ReleaseSnapshot(snapshot);
  2121. }
  2122. TEST_P(DBIteratorTest, SkipStatistics) {
  2123. Options options = CurrentOptions();
  2124. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  2125. DestroyAndReopen(options);
  2126. int skip_count = 0;
  2127. // write a bunch of kvs to the database.
  2128. ASSERT_OK(Put("a", "1"));
  2129. ASSERT_OK(Put("b", "1"));
  2130. ASSERT_OK(Put("c", "1"));
  2131. ASSERT_OK(Flush());
  2132. ASSERT_OK(Put("d", "1"));
  2133. ASSERT_OK(Put("e", "1"));
  2134. ASSERT_OK(Put("f", "1"));
  2135. ASSERT_OK(Put("a", "2"));
  2136. ASSERT_OK(Put("b", "2"));
  2137. ASSERT_OK(Flush());
  2138. ASSERT_OK(Delete("d"));
  2139. ASSERT_OK(Delete("e"));
  2140. ASSERT_OK(Delete("f"));
  2141. Iterator* iter = NewIterator(ReadOptions());
  2142. int count = 0;
  2143. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  2144. ASSERT_OK(iter->status());
  2145. count++;
  2146. }
  2147. ASSERT_EQ(count, 3);
  2148. delete iter;
  2149. skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
  2150. ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
  2151. iter = NewIterator(ReadOptions());
  2152. count = 0;
  2153. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  2154. ASSERT_OK(iter->status());
  2155. count++;
  2156. }
  2157. ASSERT_EQ(count, 3);
  2158. delete iter;
  2159. skip_count += 8; // Same as above, but in reverse order
  2160. ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
  2161. ASSERT_OK(Put("aa", "1"));
  2162. ASSERT_OK(Put("ab", "1"));
  2163. ASSERT_OK(Put("ac", "1"));
  2164. ASSERT_OK(Put("ad", "1"));
  2165. ASSERT_OK(Flush());
  2166. ASSERT_OK(Delete("ab"));
  2167. ASSERT_OK(Delete("ac"));
  2168. ASSERT_OK(Delete("ad"));
  2169. ReadOptions ro;
  2170. Slice prefix("b");
  2171. ro.iterate_upper_bound = &prefix;
  2172. iter = NewIterator(ro);
  2173. count = 0;
  2174. for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
  2175. ASSERT_OK(iter->status());
  2176. count++;
  2177. }
  2178. ASSERT_EQ(count, 1);
  2179. delete iter;
  2180. skip_count += 6; // 3 deletes + 3 original keys
  2181. ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
  2182. iter = NewIterator(ro);
  2183. count = 0;
  2184. for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  2185. ASSERT_OK(iter->status());
  2186. count++;
  2187. }
  2188. ASSERT_EQ(count, 2);
  2189. delete iter;
  2190. // 3 deletes + 3 original keys + lower sequence of "a"
  2191. skip_count += 7;
  2192. ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
  2193. }
  2194. TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
  2195. Options options = CurrentOptions();
  2196. DestroyAndReopen(options);
  2197. ReadOptions ropts;
  2198. ropts.max_skippable_internal_keys = 2;
  2199. Put("1", "val_1");
  2200. // Add more tombstones than max_skippable_internal_keys so that Next() fails.
  2201. Delete("2");
  2202. Delete("3");
  2203. Delete("4");
  2204. Delete("5");
  2205. Put("6", "val_6");
  2206. std::unique_ptr<Iterator> iter(NewIterator(ropts));
  2207. iter->SeekToFirst();
  2208. ASSERT_TRUE(iter->Valid());
  2209. ASSERT_EQ(iter->key().ToString(), "1");
  2210. ASSERT_EQ(iter->value().ToString(), "val_1");
  2211. // This should fail as incomplete due to too many non-visible internal keys on
  2212. // the way to the next valid user key.
  2213. iter->Next();
  2214. ASSERT_TRUE(!iter->Valid());
  2215. ASSERT_TRUE(iter->status().IsIncomplete());
  2216. // Get the internal key at which Next() failed.
  2217. std::string prop_value;
  2218. ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
  2219. ASSERT_EQ("4", prop_value);
  2220. // Create a new iterator to seek to the internal key.
  2221. std::unique_ptr<Iterator> iter2(NewIterator(ropts));
  2222. iter2->Seek(prop_value);
  2223. ASSERT_TRUE(iter2->Valid());
  2224. ASSERT_OK(iter2->status());
  2225. ASSERT_EQ(iter2->key().ToString(), "6");
  2226. ASSERT_EQ(iter2->value().ToString(), "val_6");
  2227. }
  2228. // Reproduces a former bug where iterator would skip some records when DBIter
  2229. // re-seeks subiterator with Incomplete status.
  2230. TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
  2231. Options options = CurrentOptions();
  2232. BlockBasedTableOptions table_options;
  2233. // Make sure the sst file has more than one block.
  2234. table_options.flush_block_policy_factory =
  2235. std::make_shared<FlushBlockEveryKeyPolicyFactory>();
  2236. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2237. DestroyAndReopen(options);
  2238. // Two records in sst file, each in its own block.
  2239. Put("b", "");
  2240. Put("d", "");
  2241. Flush();
  2242. // Create a nonblocking iterator before writing to memtable.
  2243. ReadOptions ropt;
  2244. ropt.read_tier = kBlockCacheTier;
  2245. std::unique_ptr<Iterator> iter(NewIterator(ropt));
  2246. // Overwrite a key in memtable many times to hit
  2247. // max_sequential_skip_in_iterations (which is 8 by default).
  2248. for (int i = 0; i < 20; ++i) {
  2249. Put("c", "");
  2250. }
  2251. // Load the second block in sst file into the block cache.
  2252. {
  2253. std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
  2254. iter2->Seek("d");
  2255. }
  2256. // Finally seek the nonblocking iterator.
  2257. iter->Seek("a");
  2258. // With the bug, the status used to be OK, and the iterator used to point to
  2259. // "d".
  2260. EXPECT_TRUE(iter->status().IsIncomplete());
  2261. }
  2262. TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
  2263. Put("a", "");
  2264. Put("b", "");
  2265. Flush();
  2266. ReadOptions ropt;
  2267. Slice ub = "b";
  2268. ropt.iterate_upper_bound = &ub;
  2269. std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
  2270. it->SeekForPrev("a");
  2271. ASSERT_TRUE(it->Valid());
  2272. ASSERT_OK(it->status());
  2273. ASSERT_EQ("a", it->key().ToString());
  2274. it->Next();
  2275. ASSERT_FALSE(it->Valid());
  2276. ASSERT_OK(it->status());
  2277. it->SeekForPrev("a");
  2278. ASSERT_OK(it->status());
  2279. ASSERT_TRUE(it->Valid());
  2280. ASSERT_EQ("a", it->key().ToString());
  2281. }
  2282. TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
  2283. Options options = CurrentOptions();
  2284. options.compression = CompressionType::kNoCompression;
  2285. BlockBasedTableOptions table_options;
  2286. table_options.block_size = 800;
  2287. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2288. Reopen(options);
  2289. Random rnd(301);
  2290. std::string random_str = RandomString(&rnd, 180);
  2291. ASSERT_OK(Put("1", random_str));
  2292. ASSERT_OK(Put("2", random_str));
  2293. ASSERT_OK(Put("3", random_str));
  2294. ASSERT_OK(Put("4", random_str));
  2295. // A new block
  2296. ASSERT_OK(Put("5", random_str));
  2297. ASSERT_OK(Put("6", random_str));
  2298. ASSERT_OK(Put("7", random_str));
  2299. ASSERT_OK(Flush());
  2300. ASSERT_OK(Put("8", random_str));
  2301. ASSERT_OK(Put("9", random_str));
  2302. ASSERT_OK(Flush());
  2303. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  2304. int num_find_file_in_level = 0;
  2305. int num_idx_blk_seek = 0;
  2306. SyncPoint::GetInstance()->SetCallBack(
  2307. "LevelIterator::Seek:BeforeFindFile",
  2308. [&](void* /*arg*/) { num_find_file_in_level++; });
  2309. SyncPoint::GetInstance()->SetCallBack(
  2310. "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
  2311. SyncPoint::GetInstance()->EnableProcessing();
  2312. {
  2313. std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
  2314. iter->Seek("1");
  2315. ASSERT_TRUE(iter->Valid());
  2316. ASSERT_EQ(1, num_find_file_in_level);
  2317. ASSERT_EQ(1, num_idx_blk_seek);
  2318. iter->Seek("2");
  2319. ASSERT_TRUE(iter->Valid());
  2320. ASSERT_EQ(1, num_find_file_in_level);
  2321. ASSERT_EQ(1, num_idx_blk_seek);
  2322. iter->Seek("3");
  2323. ASSERT_TRUE(iter->Valid());
  2324. ASSERT_EQ(1, num_find_file_in_level);
  2325. ASSERT_EQ(1, num_idx_blk_seek);
  2326. iter->Next();
  2327. ASSERT_TRUE(iter->Valid());
  2328. ASSERT_EQ(1, num_find_file_in_level);
  2329. ASSERT_EQ(1, num_idx_blk_seek);
  2330. iter->Seek("5");
  2331. ASSERT_TRUE(iter->Valid());
  2332. ASSERT_EQ(1, num_find_file_in_level);
  2333. ASSERT_EQ(2, num_idx_blk_seek);
  2334. iter->Seek("6");
  2335. ASSERT_TRUE(iter->Valid());
  2336. ASSERT_EQ(1, num_find_file_in_level);
  2337. ASSERT_EQ(2, num_idx_blk_seek);
  2338. iter->Seek("7");
  2339. ASSERT_TRUE(iter->Valid());
  2340. ASSERT_EQ(1, num_find_file_in_level);
  2341. ASSERT_EQ(3, num_idx_blk_seek);
  2342. iter->Seek("8");
  2343. ASSERT_TRUE(iter->Valid());
  2344. ASSERT_EQ(2, num_find_file_in_level);
  2345. // Still re-seek because "8" is the boundary key, which has
  2346. // the same user key as the seek key.
  2347. ASSERT_EQ(4, num_idx_blk_seek);
  2348. iter->Seek("5");
  2349. ASSERT_TRUE(iter->Valid());
  2350. ASSERT_EQ(3, num_find_file_in_level);
  2351. ASSERT_EQ(5, num_idx_blk_seek);
  2352. iter->Next();
  2353. ASSERT_TRUE(iter->Valid());
  2354. ASSERT_EQ(3, num_find_file_in_level);
  2355. ASSERT_EQ(5, num_idx_blk_seek);
  2356. // Seek backward never triggers the index block seek to be skipped
  2357. iter->Seek("5");
  2358. ASSERT_TRUE(iter->Valid());
  2359. ASSERT_EQ(3, num_find_file_in_level);
  2360. ASSERT_EQ(6, num_idx_blk_seek);
  2361. }
  2362. SyncPoint::GetInstance()->DisableProcessing();
  2363. }
  2364. // MyRocks may change iterate bounds before seek. Simply test to make sure such
  2365. // usage doesn't break iterator.
  2366. TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) {
  2367. Options options = CurrentOptions();
  2368. options.compression = CompressionType::kNoCompression;
  2369. BlockBasedTableOptions table_options;
  2370. table_options.block_size = 100;
  2371. options.table_factory.reset(NewBlockBasedTableFactory(table_options));
  2372. std::string value(50, 'v');
  2373. Reopen(options);
  2374. ASSERT_OK(Put("aaa", value));
  2375. ASSERT_OK(Flush());
  2376. ASSERT_OK(Put("bbb", "v"));
  2377. ASSERT_OK(Put("ccc", "v"));
  2378. ASSERT_OK(Put("ddd", "v"));
  2379. ASSERT_OK(Flush());
  2380. ASSERT_OK(Put("eee", "v"));
  2381. ASSERT_OK(Flush());
  2382. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  2383. std::string ub1 = "e";
  2384. std::string ub2 = "c";
  2385. Slice ub(ub1);
  2386. ReadOptions read_opts1;
  2387. read_opts1.iterate_upper_bound = &ub;
  2388. Iterator* iter = NewIterator(read_opts1);
  2389. // Seek and iterate accross block boundary.
  2390. iter->Seek("b");
  2391. ASSERT_TRUE(iter->Valid());
  2392. ASSERT_OK(iter->status());
  2393. ASSERT_EQ("bbb", iter->key());
  2394. ub = Slice(ub2);
  2395. iter->Seek("b");
  2396. ASSERT_TRUE(iter->Valid());
  2397. ASSERT_OK(iter->status());
  2398. ASSERT_EQ("bbb", iter->key());
  2399. iter->Next();
  2400. ASSERT_FALSE(iter->Valid());
  2401. ASSERT_OK(iter->status());
  2402. delete iter;
  2403. std::string lb1 = "a";
  2404. std::string lb2 = "c";
  2405. Slice lb(lb1);
  2406. ReadOptions read_opts2;
  2407. read_opts2.iterate_lower_bound = &lb;
  2408. iter = NewIterator(read_opts2);
  2409. iter->SeekForPrev("d");
  2410. ASSERT_TRUE(iter->Valid());
  2411. ASSERT_OK(iter->status());
  2412. ASSERT_EQ("ccc", iter->key());
  2413. lb = Slice(lb2);
  2414. iter->SeekForPrev("d");
  2415. ASSERT_TRUE(iter->Valid());
  2416. ASSERT_OK(iter->status());
  2417. ASSERT_EQ("ccc", iter->key());
  2418. iter->Prev();
  2419. ASSERT_FALSE(iter->Valid());
  2420. ASSERT_OK(iter->status());
  2421. delete iter;
  2422. }
  2423. TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) {
  2424. ASSERT_OK(Put("aaa", "v"));
  2425. ASSERT_OK(Put("bbb", "v"));
  2426. ASSERT_OK(Flush());
  2427. ASSERT_OK(Put("ccc", "v"));
  2428. ASSERT_OK(Put("ddd", "v"));
  2429. ASSERT_OK(Flush());
  2430. // Move both files to bottom level.
  2431. ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
  2432. Slice lower_bound("b");
  2433. ReadOptions read_opts;
  2434. read_opts.iterate_lower_bound = &lower_bound;
  2435. std::unique_ptr<Iterator> iter(NewIterator(read_opts));
  2436. iter->SeekForPrev("d");
  2437. ASSERT_TRUE(iter->Valid());
  2438. ASSERT_OK(iter->status());
  2439. ASSERT_EQ("ccc", iter->key());
  2440. iter->Prev();
  2441. ASSERT_TRUE(iter->Valid());
  2442. ASSERT_OK(iter->status());
  2443. ASSERT_EQ("bbb", iter->key());
  2444. iter->Prev();
  2445. ASSERT_FALSE(iter->Valid());
  2446. ASSERT_OK(iter->status());
  2447. }
  2448. INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
  2449. testing::Values(true, false));
  2450. // Tests how DBIter work with ReadCallback
  2451. class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
  2452. TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
  2453. class TestReadCallback : public ReadCallback {
  2454. public:
  2455. explicit TestReadCallback(SequenceNumber _max_visible_seq)
  2456. : ReadCallback(_max_visible_seq) {}
  2457. bool IsVisibleFullCheck(SequenceNumber seq) override {
  2458. return seq <= max_visible_seq_;
  2459. }
  2460. };
  2461. ASSERT_OK(Put("foo", "v1"));
  2462. ASSERT_OK(Put("foo", "v2"));
  2463. ASSERT_OK(Put("foo", "v3"));
  2464. ASSERT_OK(Put("a", "va"));
  2465. ASSERT_OK(Put("z", "vz"));
  2466. SequenceNumber seq1 = db_->GetLatestSequenceNumber();
  2467. TestReadCallback callback1(seq1);
  2468. ASSERT_OK(Put("foo", "v4"));
  2469. ASSERT_OK(Put("foo", "v5"));
  2470. ASSERT_OK(Put("bar", "v7"));
  2471. SequenceNumber seq2 = db_->GetLatestSequenceNumber();
  2472. auto* cfd =
  2473. reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
  2474. ->cfd();
  2475. // The iterator are suppose to see data before seq1.
  2476. Iterator* iter =
  2477. dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
  2478. // Seek
  2479. // The latest value of "foo" before seq1 is "v3"
  2480. iter->Seek("foo");
  2481. ASSERT_TRUE(iter->Valid());
  2482. ASSERT_OK(iter->status());
  2483. ASSERT_EQ("foo", iter->key());
  2484. ASSERT_EQ("v3", iter->value());
  2485. // "bar" is not visible to the iterator. It will move on to the next key
  2486. // "foo".
  2487. iter->Seek("bar");
  2488. ASSERT_TRUE(iter->Valid());
  2489. ASSERT_OK(iter->status());
  2490. ASSERT_EQ("foo", iter->key());
  2491. ASSERT_EQ("v3", iter->value());
  2492. // Next
  2493. // Seek to "a"
  2494. iter->Seek("a");
  2495. ASSERT_TRUE(iter->Valid());
  2496. ASSERT_OK(iter->status());
  2497. ASSERT_EQ("va", iter->value());
  2498. // "bar" is not visible to the iterator. It will move on to the next key
  2499. // "foo".
  2500. iter->Next();
  2501. ASSERT_TRUE(iter->Valid());
  2502. ASSERT_OK(iter->status());
  2503. ASSERT_EQ("foo", iter->key());
  2504. ASSERT_EQ("v3", iter->value());
  2505. // Prev
  2506. // Seek to "z"
  2507. iter->Seek("z");
  2508. ASSERT_TRUE(iter->Valid());
  2509. ASSERT_OK(iter->status());
  2510. ASSERT_EQ("vz", iter->value());
  2511. // The previous key is "foo", which is visible to the iterator.
  2512. iter->Prev();
  2513. ASSERT_TRUE(iter->Valid());
  2514. ASSERT_OK(iter->status());
  2515. ASSERT_EQ("foo", iter->key());
  2516. ASSERT_EQ("v3", iter->value());
  2517. // "bar" is not visible to the iterator. It will move on to the next key "a".
  2518. iter->Prev(); // skipping "bar"
  2519. ASSERT_TRUE(iter->Valid());
  2520. ASSERT_OK(iter->status());
  2521. ASSERT_EQ("a", iter->key());
  2522. ASSERT_EQ("va", iter->value());
  2523. // SeekForPrev
  2524. // The previous key is "foo", which is visible to the iterator.
  2525. iter->SeekForPrev("y");
  2526. ASSERT_TRUE(iter->Valid());
  2527. ASSERT_OK(iter->status());
  2528. ASSERT_EQ("foo", iter->key());
  2529. ASSERT_EQ("v3", iter->value());
  2530. // "bar" is not visible to the iterator. It will move on to the next key "a".
  2531. iter->SeekForPrev("bar");
  2532. ASSERT_TRUE(iter->Valid());
  2533. ASSERT_OK(iter->status());
  2534. ASSERT_EQ("a", iter->key());
  2535. ASSERT_EQ("va", iter->value());
  2536. delete iter;
  2537. // Prev beyond max_sequential_skip_in_iterations
  2538. uint64_t num_versions =
  2539. CurrentOptions().max_sequential_skip_in_iterations + 10;
  2540. for (uint64_t i = 0; i < num_versions; i++) {
  2541. ASSERT_OK(Put("bar", ToString(i)));
  2542. }
  2543. SequenceNumber seq3 = db_->GetLatestSequenceNumber();
  2544. TestReadCallback callback2(seq3);
  2545. ASSERT_OK(Put("bar", "v8"));
  2546. SequenceNumber seq4 = db_->GetLatestSequenceNumber();
  2547. // The iterator is suppose to see data before seq3.
  2548. iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
  2549. // Seek to "z", which is visible.
  2550. iter->Seek("z");
  2551. ASSERT_TRUE(iter->Valid());
  2552. ASSERT_OK(iter->status());
  2553. ASSERT_EQ("vz", iter->value());
  2554. // Previous key is "foo" and the last value "v5" is visible.
  2555. iter->Prev();
  2556. ASSERT_TRUE(iter->Valid());
  2557. ASSERT_OK(iter->status());
  2558. ASSERT_EQ("foo", iter->key());
  2559. ASSERT_EQ("v5", iter->value());
  2560. // Since the number of values of "bar" is more than
  2561. // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
  2562. // seek in forward direction. Here we test the fallback seek is correct.
  2563. // The last visible value should be (num_versions - 1), as "v8" is not
  2564. // visible.
  2565. iter->Prev();
  2566. ASSERT_TRUE(iter->Valid());
  2567. ASSERT_OK(iter->status());
  2568. ASSERT_EQ("bar", iter->key());
  2569. ASSERT_EQ(ToString(num_versions - 1), iter->value());
  2570. delete iter;
  2571. }
  2572. } // namespace ROCKSDB_NAMESPACE
  2573. int main(int argc, char** argv) {
  2574. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  2575. ::testing::InitGoogleTest(&argc, argv);
  2576. return RUN_ALL_TESTS();
  2577. }