db_blob_basic_test.cc 78 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #include <array>
  6. #include <sstream>
  7. #include <string>
  8. #include "cache/compressed_secondary_cache.h"
  9. #include "db/blob/blob_index.h"
  10. #include "db/blob/blob_log_format.h"
  11. #include "db/db_test_util.h"
  12. #include "db/db_with_timestamp_test_util.h"
  13. #include "port/stack_trace.h"
  14. #include "test_util/sync_point.h"
  15. #include "utilities/fault_injection_env.h"
  16. namespace ROCKSDB_NAMESPACE {
  17. class DBBlobBasicTest : public DBTestBase {
  18. protected:
  19. DBBlobBasicTest()
  20. : DBTestBase("db_blob_basic_test", /* env_do_fsync */ false) {}
  21. };
  22. TEST_F(DBBlobBasicTest, GetBlob) {
  23. Options options = GetDefaultOptions();
  24. options.enable_blob_files = true;
  25. options.min_blob_size = 0;
  26. Reopen(options);
  27. constexpr char key[] = "key";
  28. constexpr char blob_value[] = "blob_value";
  29. ASSERT_OK(Put(key, blob_value));
  30. ASSERT_OK(Flush());
  31. ASSERT_EQ(Get(key), blob_value);
  32. // Try again with no I/O allowed. The table and the necessary blocks should
  33. // already be in their respective caches; however, the blob itself can only be
  34. // read from the blob file, so the read should return Incomplete.
  35. ReadOptions read_options;
  36. read_options.read_tier = kBlockCacheTier;
  37. PinnableSlice result;
  38. ASSERT_TRUE(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result)
  39. .IsIncomplete());
  40. }
  41. TEST_F(DBBlobBasicTest, GetBlobFromCache) {
  42. Options options = GetDefaultOptions();
  43. LRUCacheOptions co;
  44. co.capacity = 2 << 20; // 2MB
  45. co.num_shard_bits = 2;
  46. co.metadata_charge_policy = kDontChargeCacheMetadata;
  47. auto backing_cache = NewLRUCache(co);
  48. options.enable_blob_files = true;
  49. options.blob_cache = backing_cache;
  50. BlockBasedTableOptions block_based_options;
  51. block_based_options.no_block_cache = false;
  52. block_based_options.block_cache = backing_cache;
  53. block_based_options.cache_index_and_filter_blocks = true;
  54. options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
  55. Reopen(options);
  56. constexpr char key[] = "key";
  57. constexpr char blob_value[] = "blob_value";
  58. ASSERT_OK(Put(key, blob_value));
  59. ASSERT_OK(Flush());
  60. ReadOptions read_options;
  61. read_options.fill_cache = false;
  62. {
  63. PinnableSlice result;
  64. read_options.read_tier = kReadAllTier;
  65. ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
  66. ASSERT_EQ(result, blob_value);
  67. result.Reset();
  68. read_options.read_tier = kBlockCacheTier;
  69. // Try again with no I/O allowed. Since we didn't re-fill the cache, the
  70. // blob itself can only be read from the blob file, so the read should
  71. // return Incomplete.
  72. ASSERT_TRUE(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result)
  73. .IsIncomplete());
  74. ASSERT_TRUE(result.empty());
  75. }
  76. read_options.fill_cache = true;
  77. {
  78. PinnableSlice result;
  79. read_options.read_tier = kReadAllTier;
  80. ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
  81. ASSERT_EQ(result, blob_value);
  82. result.Reset();
  83. read_options.read_tier = kBlockCacheTier;
  84. // Try again with no I/O allowed. The table and the necessary blocks/blobs
  85. // should already be in their respective caches.
  86. ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
  87. ASSERT_EQ(result, blob_value);
  88. }
  89. }
  90. TEST_F(DBBlobBasicTest, IterateBlobsFromCache) {
  91. Options options = GetDefaultOptions();
  92. LRUCacheOptions co;
  93. co.capacity = 2 << 20; // 2MB
  94. co.num_shard_bits = 2;
  95. co.metadata_charge_policy = kDontChargeCacheMetadata;
  96. auto backing_cache = NewLRUCache(co);
  97. options.enable_blob_files = true;
  98. options.blob_cache = backing_cache;
  99. BlockBasedTableOptions block_based_options;
  100. block_based_options.no_block_cache = false;
  101. block_based_options.block_cache = backing_cache;
  102. block_based_options.cache_index_and_filter_blocks = true;
  103. options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
  104. options.statistics = CreateDBStatistics();
  105. Reopen(options);
  106. int num_blobs = 5;
  107. std::vector<std::string> keys;
  108. std::vector<std::string> blobs;
  109. for (int i = 0; i < num_blobs; ++i) {
  110. keys.push_back("key" + std::to_string(i));
  111. blobs.push_back("blob" + std::to_string(i));
  112. ASSERT_OK(Put(keys[i], blobs[i]));
  113. }
  114. ASSERT_OK(Flush());
  115. ReadOptions read_options;
  116. {
  117. read_options.fill_cache = false;
  118. read_options.read_tier = kReadAllTier;
  119. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  120. ASSERT_OK(iter->status());
  121. int i = 0;
  122. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  123. ASSERT_OK(iter->status());
  124. ASSERT_EQ(iter->key().ToString(), keys[i]);
  125. ASSERT_EQ(iter->value().ToString(), blobs[i]);
  126. ++i;
  127. }
  128. ASSERT_OK(iter->status());
  129. ASSERT_EQ(i, num_blobs);
  130. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0);
  131. }
  132. {
  133. read_options.fill_cache = false;
  134. read_options.read_tier = kBlockCacheTier;
  135. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  136. ASSERT_OK(iter->status());
  137. // Try again with no I/O allowed. Since we didn't re-fill the cache,
  138. // the blob itself can only be read from the blob file, so iter->Valid()
  139. // should be false.
  140. iter->SeekToFirst();
  141. ASSERT_NOK(iter->status());
  142. ASSERT_FALSE(iter->Valid());
  143. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0);
  144. }
  145. {
  146. read_options.fill_cache = true;
  147. read_options.read_tier = kReadAllTier;
  148. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  149. ASSERT_OK(iter->status());
  150. // Read blobs from the file and refill the cache.
  151. int i = 0;
  152. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  153. ASSERT_OK(iter->status());
  154. ASSERT_EQ(iter->key().ToString(), keys[i]);
  155. ASSERT_EQ(iter->value().ToString(), blobs[i]);
  156. ++i;
  157. }
  158. ASSERT_OK(iter->status());
  159. ASSERT_EQ(i, num_blobs);
  160. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD),
  161. num_blobs);
  162. }
  163. {
  164. read_options.fill_cache = false;
  165. read_options.read_tier = kBlockCacheTier;
  166. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  167. ASSERT_OK(iter->status());
  168. // Try again with no I/O allowed. The table and the necessary blocks/blobs
  169. // should already be in their respective caches.
  170. int i = 0;
  171. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  172. ASSERT_OK(iter->status());
  173. ASSERT_EQ(iter->key().ToString(), keys[i]);
  174. ASSERT_EQ(iter->value().ToString(), blobs[i]);
  175. ++i;
  176. }
  177. ASSERT_OK(iter->status());
  178. ASSERT_EQ(i, num_blobs);
  179. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0);
  180. }
  181. }
  182. TEST_F(DBBlobBasicTest, IterateBlobsFromCachePinning) {
  183. constexpr size_t min_blob_size = 6;
  184. Options options = GetDefaultOptions();
  185. LRUCacheOptions cache_options;
  186. cache_options.capacity = 2048;
  187. cache_options.num_shard_bits = 0;
  188. cache_options.metadata_charge_policy = kDontChargeCacheMetadata;
  189. options.blob_cache = NewLRUCache(cache_options);
  190. options.enable_blob_files = true;
  191. options.min_blob_size = min_blob_size;
  192. Reopen(options);
  193. // Put then iterate over three key-values. The second value is below the size
  194. // limit and is thus stored inline; the other two are stored separately as
  195. // blobs. We expect to have something pinned in the cache iff we are
  196. // positioned on a blob.
  197. constexpr char first_key[] = "first_key";
  198. constexpr char first_value[] = "long_value";
  199. static_assert(sizeof(first_value) - 1 >= min_blob_size,
  200. "first_value too short to be stored as blob");
  201. ASSERT_OK(Put(first_key, first_value));
  202. constexpr char second_key[] = "second_key";
  203. constexpr char second_value[] = "short";
  204. static_assert(sizeof(second_value) - 1 < min_blob_size,
  205. "second_value too long to be inlined");
  206. ASSERT_OK(Put(second_key, second_value));
  207. constexpr char third_key[] = "third_key";
  208. constexpr char third_value[] = "other_long_value";
  209. static_assert(sizeof(third_value) - 1 >= min_blob_size,
  210. "third_value too short to be stored as blob");
  211. ASSERT_OK(Put(third_key, third_value));
  212. ASSERT_OK(Flush());
  213. {
  214. ReadOptions read_options;
  215. read_options.fill_cache = true;
  216. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  217. iter->SeekToFirst();
  218. ASSERT_TRUE(iter->Valid());
  219. ASSERT_OK(iter->status());
  220. ASSERT_EQ(iter->key(), first_key);
  221. ASSERT_EQ(iter->value(), first_value);
  222. iter->Next();
  223. ASSERT_TRUE(iter->Valid());
  224. ASSERT_OK(iter->status());
  225. ASSERT_EQ(iter->key(), second_key);
  226. ASSERT_EQ(iter->value(), second_value);
  227. iter->Next();
  228. ASSERT_TRUE(iter->Valid());
  229. ASSERT_OK(iter->status());
  230. ASSERT_EQ(iter->key(), third_key);
  231. ASSERT_EQ(iter->value(), third_value);
  232. iter->Next();
  233. ASSERT_FALSE(iter->Valid());
  234. ASSERT_OK(iter->status());
  235. }
  236. {
  237. ReadOptions read_options;
  238. read_options.fill_cache = false;
  239. read_options.read_tier = kBlockCacheTier;
  240. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  241. iter->SeekToFirst();
  242. ASSERT_TRUE(iter->Valid());
  243. ASSERT_OK(iter->status());
  244. ASSERT_EQ(iter->key(), first_key);
  245. ASSERT_EQ(iter->value(), first_value);
  246. ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
  247. iter->Next();
  248. ASSERT_TRUE(iter->Valid());
  249. ASSERT_OK(iter->status());
  250. ASSERT_EQ(iter->key(), second_key);
  251. ASSERT_EQ(iter->value(), second_value);
  252. ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
  253. iter->Next();
  254. ASSERT_TRUE(iter->Valid());
  255. ASSERT_OK(iter->status());
  256. ASSERT_EQ(iter->key(), third_key);
  257. ASSERT_EQ(iter->value(), third_value);
  258. ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
  259. iter->Next();
  260. ASSERT_FALSE(iter->Valid());
  261. ASSERT_OK(iter->status());
  262. ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
  263. }
  264. {
  265. ReadOptions read_options;
  266. read_options.fill_cache = false;
  267. read_options.read_tier = kBlockCacheTier;
  268. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  269. iter->SeekToLast();
  270. ASSERT_TRUE(iter->Valid());
  271. ASSERT_OK(iter->status());
  272. ASSERT_EQ(iter->key(), third_key);
  273. ASSERT_EQ(iter->value(), third_value);
  274. ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
  275. iter->Prev();
  276. ASSERT_TRUE(iter->Valid());
  277. ASSERT_OK(iter->status());
  278. ASSERT_EQ(iter->key(), second_key);
  279. ASSERT_EQ(iter->value(), second_value);
  280. ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
  281. iter->Prev();
  282. ASSERT_TRUE(iter->Valid());
  283. ASSERT_OK(iter->status());
  284. ASSERT_EQ(iter->key(), first_key);
  285. ASSERT_EQ(iter->value(), first_value);
  286. ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
  287. iter->Prev();
  288. ASSERT_FALSE(iter->Valid());
  289. ASSERT_OK(iter->status());
  290. ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
  291. }
  292. }
  293. TEST_F(DBBlobBasicTest, IterateBlobsAllowUnpreparedValue) {
  294. Options options = GetDefaultOptions();
  295. options.enable_blob_files = true;
  296. Reopen(options);
  297. constexpr size_t num_blobs = 5;
  298. std::vector<std::string> keys;
  299. std::vector<std::string> blobs;
  300. for (size_t i = 0; i < num_blobs; ++i) {
  301. keys.emplace_back("key" + std::to_string(i));
  302. blobs.emplace_back("blob" + std::to_string(i));
  303. ASSERT_OK(Put(keys[i], blobs[i]));
  304. }
  305. ASSERT_OK(Flush());
  306. ReadOptions read_options;
  307. read_options.allow_unprepared_value = true;
  308. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  309. {
  310. size_t i = 0;
  311. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  312. ASSERT_EQ(iter->key(), keys[i]);
  313. ASSERT_TRUE(iter->value().empty());
  314. ASSERT_OK(iter->status());
  315. ASSERT_TRUE(iter->PrepareValue());
  316. ASSERT_EQ(iter->key(), keys[i]);
  317. ASSERT_EQ(iter->value(), blobs[i]);
  318. ASSERT_OK(iter->status());
  319. ++i;
  320. }
  321. ASSERT_OK(iter->status());
  322. ASSERT_EQ(i, num_blobs);
  323. }
  324. {
  325. size_t i = 0;
  326. for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
  327. ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
  328. ASSERT_TRUE(iter->value().empty());
  329. ASSERT_OK(iter->status());
  330. ASSERT_TRUE(iter->PrepareValue());
  331. ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
  332. ASSERT_EQ(iter->value(), blobs[num_blobs - 1 - i]);
  333. ASSERT_OK(iter->status());
  334. ++i;
  335. }
  336. ASSERT_OK(iter->status());
  337. ASSERT_EQ(i, num_blobs);
  338. }
  339. {
  340. size_t i = 1;
  341. for (iter->Seek(keys[i]); iter->Valid(); iter->Next()) {
  342. ASSERT_EQ(iter->key(), keys[i]);
  343. ASSERT_TRUE(iter->value().empty());
  344. ASSERT_OK(iter->status());
  345. ASSERT_TRUE(iter->PrepareValue());
  346. ASSERT_EQ(iter->key(), keys[i]);
  347. ASSERT_EQ(iter->value(), blobs[i]);
  348. ASSERT_OK(iter->status());
  349. ++i;
  350. }
  351. ASSERT_OK(iter->status());
  352. ASSERT_EQ(i, num_blobs);
  353. }
  354. {
  355. size_t i = 1;
  356. for (iter->SeekForPrev(keys[num_blobs - 1 - i]); iter->Valid();
  357. iter->Prev()) {
  358. ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
  359. ASSERT_TRUE(iter->value().empty());
  360. ASSERT_OK(iter->status());
  361. ASSERT_TRUE(iter->PrepareValue());
  362. ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
  363. ASSERT_EQ(iter->value(), blobs[num_blobs - 1 - i]);
  364. ASSERT_OK(iter->status());
  365. ++i;
  366. }
  367. ASSERT_OK(iter->status());
  368. ASSERT_EQ(i, num_blobs);
  369. }
  370. }
  371. TEST_F(DBBlobBasicTest, MultiGetBlobs) {
  372. constexpr size_t min_blob_size = 6;
  373. Options options = GetDefaultOptions();
  374. options.enable_blob_files = true;
  375. options.min_blob_size = min_blob_size;
  376. Reopen(options);
  377. // Put then retrieve three key-values. The first value is below the size limit
  378. // and is thus stored inline; the other two are stored separately as blobs.
  379. constexpr size_t num_keys = 3;
  380. constexpr char first_key[] = "first_key";
  381. constexpr char first_value[] = "short";
  382. static_assert(sizeof(first_value) - 1 < min_blob_size,
  383. "first_value too long to be inlined");
  384. ASSERT_OK(Put(first_key, first_value));
  385. constexpr char second_key[] = "second_key";
  386. constexpr char second_value[] = "long_value";
  387. static_assert(sizeof(second_value) - 1 >= min_blob_size,
  388. "second_value too short to be stored as blob");
  389. ASSERT_OK(Put(second_key, second_value));
  390. constexpr char third_key[] = "third_key";
  391. constexpr char third_value[] = "other_long_value";
  392. static_assert(sizeof(third_value) - 1 >= min_blob_size,
  393. "third_value too short to be stored as blob");
  394. ASSERT_OK(Put(third_key, third_value));
  395. ASSERT_OK(Flush());
  396. ReadOptions read_options;
  397. std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
  398. {
  399. std::array<PinnableSlice, num_keys> values;
  400. std::array<Status, num_keys> statuses;
  401. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  402. keys.data(), values.data(), statuses.data());
  403. ASSERT_OK(statuses[0]);
  404. ASSERT_EQ(values[0], first_value);
  405. ASSERT_OK(statuses[1]);
  406. ASSERT_EQ(values[1], second_value);
  407. ASSERT_OK(statuses[2]);
  408. ASSERT_EQ(values[2], third_value);
  409. }
  410. // Try again with no I/O allowed. The table and the necessary blocks should
  411. // already be in their respective caches. The first (inlined) value should be
  412. // successfully read; however, the two blob values could only be read from the
  413. // blob file, so for those the read should return Incomplete.
  414. read_options.read_tier = kBlockCacheTier;
  415. {
  416. std::array<PinnableSlice, num_keys> values;
  417. std::array<Status, num_keys> statuses;
  418. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  419. keys.data(), values.data(), statuses.data());
  420. ASSERT_OK(statuses[0]);
  421. ASSERT_EQ(values[0], first_value);
  422. ASSERT_TRUE(statuses[1].IsIncomplete());
  423. ASSERT_TRUE(statuses[2].IsIncomplete());
  424. }
  425. }
  426. TEST_F(DBBlobBasicTest, MultiGetBlobsFromCache) {
  427. Options options = GetDefaultOptions();
  428. LRUCacheOptions co;
  429. co.capacity = 2 << 20; // 2MB
  430. co.num_shard_bits = 2;
  431. co.metadata_charge_policy = kDontChargeCacheMetadata;
  432. auto backing_cache = NewLRUCache(co);
  433. constexpr size_t min_blob_size = 6;
  434. options.min_blob_size = min_blob_size;
  435. options.create_if_missing = true;
  436. options.enable_blob_files = true;
  437. options.blob_cache = backing_cache;
  438. BlockBasedTableOptions block_based_options;
  439. block_based_options.no_block_cache = false;
  440. block_based_options.block_cache = backing_cache;
  441. block_based_options.cache_index_and_filter_blocks = true;
  442. options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
  443. DestroyAndReopen(options);
  444. // Put then retrieve three key-values. The first value is below the size limit
  445. // and is thus stored inline; the other two are stored separately as blobs.
  446. constexpr size_t num_keys = 3;
  447. constexpr char first_key[] = "first_key";
  448. constexpr char first_value[] = "short";
  449. static_assert(sizeof(first_value) - 1 < min_blob_size,
  450. "first_value too long to be inlined");
  451. ASSERT_OK(Put(first_key, first_value));
  452. constexpr char second_key[] = "second_key";
  453. constexpr char second_value[] = "long_value";
  454. static_assert(sizeof(second_value) - 1 >= min_blob_size,
  455. "second_value too short to be stored as blob");
  456. ASSERT_OK(Put(second_key, second_value));
  457. constexpr char third_key[] = "third_key";
  458. constexpr char third_value[] = "other_long_value";
  459. static_assert(sizeof(third_value) - 1 >= min_blob_size,
  460. "third_value too short to be stored as blob");
  461. ASSERT_OK(Put(third_key, third_value));
  462. ASSERT_OK(Flush());
  463. ReadOptions read_options;
  464. read_options.fill_cache = false;
  465. std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
  466. {
  467. std::array<PinnableSlice, num_keys> values;
  468. std::array<Status, num_keys> statuses;
  469. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  470. keys.data(), values.data(), statuses.data());
  471. ASSERT_OK(statuses[0]);
  472. ASSERT_EQ(values[0], first_value);
  473. ASSERT_OK(statuses[1]);
  474. ASSERT_EQ(values[1], second_value);
  475. ASSERT_OK(statuses[2]);
  476. ASSERT_EQ(values[2], third_value);
  477. }
  478. // Try again with no I/O allowed. The first (inlined) value should be
  479. // successfully read; however, the two blob values could only be read from the
  480. // blob file, so for those the read should return Incomplete.
  481. read_options.read_tier = kBlockCacheTier;
  482. {
  483. std::array<PinnableSlice, num_keys> values;
  484. std::array<Status, num_keys> statuses;
  485. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  486. keys.data(), values.data(), statuses.data());
  487. ASSERT_OK(statuses[0]);
  488. ASSERT_EQ(values[0], first_value);
  489. ASSERT_TRUE(statuses[1].IsIncomplete());
  490. ASSERT_TRUE(statuses[2].IsIncomplete());
  491. }
  492. // Fill the cache when reading blobs from the blob file.
  493. read_options.read_tier = kReadAllTier;
  494. read_options.fill_cache = true;
  495. {
  496. std::array<PinnableSlice, num_keys> values;
  497. std::array<Status, num_keys> statuses;
  498. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  499. keys.data(), values.data(), statuses.data());
  500. ASSERT_OK(statuses[0]);
  501. ASSERT_EQ(values[0], first_value);
  502. ASSERT_OK(statuses[1]);
  503. ASSERT_EQ(values[1], second_value);
  504. ASSERT_OK(statuses[2]);
  505. ASSERT_EQ(values[2], third_value);
  506. }
  507. // Try again with no I/O allowed. All blobs should be successfully read from
  508. // the cache.
  509. read_options.read_tier = kBlockCacheTier;
  510. {
  511. std::array<PinnableSlice, num_keys> values;
  512. std::array<Status, num_keys> statuses;
  513. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  514. keys.data(), values.data(), statuses.data());
  515. ASSERT_OK(statuses[0]);
  516. ASSERT_EQ(values[0], first_value);
  517. ASSERT_OK(statuses[1]);
  518. ASSERT_EQ(values[1], second_value);
  519. ASSERT_OK(statuses[2]);
  520. ASSERT_EQ(values[2], third_value);
  521. }
  522. }
  523. TEST_F(DBBlobBasicTest, MultiGetWithDirectIO) {
  524. Options options = GetDefaultOptions();
  525. // First, create an external SST file ["b"].
  526. const std::string file_path = dbname_ + "/test.sst";
  527. {
  528. SstFileWriter sst_file_writer(EnvOptions(), GetDefaultOptions());
  529. Status s = sst_file_writer.Open(file_path);
  530. ASSERT_OK(s);
  531. ASSERT_OK(sst_file_writer.Put("b", "b_value"));
  532. ASSERT_OK(sst_file_writer.Finish());
  533. }
  534. options.enable_blob_files = true;
  535. options.min_blob_size = 1000;
  536. options.use_direct_reads = true;
  537. options.allow_ingest_behind = true;
  538. // Open DB with fixed-prefix sst-partitioner so that compaction will cut
  539. // new table file when encountering a new key whose 1-byte prefix changes.
  540. constexpr size_t key_len = 1;
  541. options.sst_partitioner_factory =
  542. NewSstPartitionerFixedPrefixFactory(key_len);
  543. Status s = TryReopen(options);
  544. if (s.IsInvalidArgument()) {
  545. ROCKSDB_GTEST_SKIP("This test requires direct IO support");
  546. return;
  547. }
  548. ASSERT_OK(s);
  549. constexpr size_t num_keys = 3;
  550. constexpr size_t blob_size = 3000;
  551. constexpr char first_key[] = "a";
  552. const std::string first_blob(blob_size, 'a');
  553. ASSERT_OK(Put(first_key, first_blob));
  554. constexpr char second_key[] = "b";
  555. const std::string second_blob(2 * blob_size, 'b');
  556. ASSERT_OK(Put(second_key, second_blob));
  557. constexpr char third_key[] = "d";
  558. const std::string third_blob(blob_size, 'd');
  559. ASSERT_OK(Put(third_key, third_blob));
  560. // first_blob, second_blob and third_blob in the same blob file.
  561. // SST Blob file
  562. // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'|
  563. // | | | ^ ^ ^
  564. // | | | | | |
  565. // | | +---------|-------|--------+
  566. // | +-----------------|-------+
  567. // +-------------------------+
  568. ASSERT_OK(Flush());
  569. constexpr char fourth_key[] = "c";
  570. const std::string fourth_blob(blob_size, 'c');
  571. ASSERT_OK(Put(fourth_key, fourth_blob));
  572. // fourth_blob in another blob file.
  573. // SST Blob file SST Blob file
  574. // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'| ["c"] |'cccc'|
  575. // | | | ^ ^ ^ | ^
  576. // | | | | | | | |
  577. // | | +---------|-------|--------+ +-------+
  578. // | +-----------------|-------+
  579. // +-------------------------+
  580. ASSERT_OK(Flush());
  581. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
  582. /*end=*/nullptr));
  583. // Due to the above sst partitioner, we get 4 L1 files. The blob files are
  584. // unchanged.
  585. // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
  586. // ^ ^ ^ ^
  587. // | | | |
  588. // L0 | | | |
  589. // L1 ["a"] ["b"] ["c"] | | ["d"] |
  590. // | | | | | |
  591. // | | +---------|-------|---------------+
  592. // | +-----------------|-------+
  593. // +-------------------------+
  594. ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1));
  595. {
  596. // Ingest the external SST file into bottommost level.
  597. std::vector<std::string> ext_files{file_path};
  598. IngestExternalFileOptions opts;
  599. opts.ingest_behind = true;
  600. ASSERT_OK(
  601. db_->IngestExternalFile(db_->DefaultColumnFamily(), ext_files, opts));
  602. }
  603. // Now the database becomes as follows.
  604. // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
  605. // ^ ^ ^ ^
  606. // | | | |
  607. // L0 | | | |
  608. // L1 ["a"] ["b"] ["c"] | | ["d"] |
  609. // | | | | | |
  610. // | | +---------|-------|---------------+
  611. // | +-----------------|-------+
  612. // +-------------------------+
  613. //
  614. // L6 ["b"]
  615. {
  616. // Compact ["b"] to bottommost level.
  617. Slice begin = Slice(second_key);
  618. Slice end = Slice(second_key);
  619. CompactRangeOptions cro;
  620. cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
  621. ASSERT_OK(db_->CompactRange(cro, &begin, &end));
  622. }
  623. // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
  624. // ^ ^ ^ ^
  625. // | | | |
  626. // L0 | | | |
  627. // L1 ["a"] ["c"] | | ["d"] |
  628. // | | | | |
  629. // | +---------|-------|---------------+
  630. // | +-----------------|-------+
  631. // +-------|-----------------+
  632. // |
  633. // L6 ["b"]
  634. ASSERT_EQ(3, NumTableFilesAtLevel(/*level=*/1));
  635. ASSERT_EQ(1, NumTableFilesAtLevel(/*level=*/6));
  636. bool called = false;
  637. SyncPoint::GetInstance()->ClearAllCallBacks();
  638. SyncPoint::GetInstance()->SetCallBack(
  639. "RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* arg) {
  640. auto* aligned_reqs = static_cast<std::vector<FSReadRequest>*>(arg);
  641. assert(aligned_reqs);
  642. ASSERT_EQ(1, aligned_reqs->size());
  643. called = true;
  644. });
  645. SyncPoint::GetInstance()->EnableProcessing();
  646. std::array<Slice, num_keys> keys{{first_key, third_key, second_key}};
  647. {
  648. std::array<PinnableSlice, num_keys> values;
  649. std::array<Status, num_keys> statuses;
  650. // The MultiGet(), when constructing the KeyContexts, will process the keys
  651. // in such order: a, d, b. The reason is that ["a"] and ["d"] are in L1,
  652. // while ["b"] resides in L6.
  653. // Consequently, the original FSReadRequest list prepared by
  654. // Version::MultiGetblob() will be for "a", "d" and "b". It is unsorted as
  655. // follows:
  656. //
  657. // ["a", offset=30, len=3033],
  658. // ["d", offset=9096, len=3033],
  659. // ["b", offset=3063, len=6033]
  660. //
  661. // If we do not sort them before calling MultiRead() in DirectIO, then the
  662. // underlying IO merging logic will yield two requests.
  663. //
  664. // [offset=0, len=4096] (for "a")
  665. // [offset=0, len=12288] (result of merging the request for "d" and "b")
  666. //
  667. // We need to sort them in Version::MultiGetBlob() so that the underlying
  668. // IO merging logic in DirectIO mode works as expected. The correct
  669. // behavior will be one aligned request:
  670. //
  671. // [offset=0, len=12288]
  672. db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
  673. keys.data(), values.data(), statuses.data());
  674. SyncPoint::GetInstance()->DisableProcessing();
  675. SyncPoint::GetInstance()->ClearAllCallBacks();
  676. ASSERT_TRUE(called);
  677. ASSERT_OK(statuses[0]);
  678. ASSERT_EQ(values[0], first_blob);
  679. ASSERT_OK(statuses[1]);
  680. ASSERT_EQ(values[1], third_blob);
  681. ASSERT_OK(statuses[2]);
  682. ASSERT_EQ(values[2], second_blob);
  683. }
  684. }
  685. TEST_F(DBBlobBasicTest, MultiGetBlobsFromMultipleFiles) {
  686. Options options = GetDefaultOptions();
  687. LRUCacheOptions co;
  688. co.capacity = 2 << 20; // 2MB
  689. co.num_shard_bits = 2;
  690. co.metadata_charge_policy = kDontChargeCacheMetadata;
  691. auto backing_cache = NewLRUCache(co);
  692. options.min_blob_size = 0;
  693. options.create_if_missing = true;
  694. options.enable_blob_files = true;
  695. options.blob_cache = backing_cache;
  696. BlockBasedTableOptions block_based_options;
  697. block_based_options.no_block_cache = false;
  698. block_based_options.block_cache = backing_cache;
  699. block_based_options.cache_index_and_filter_blocks = true;
  700. options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
  701. Reopen(options);
  702. constexpr size_t kNumBlobFiles = 3;
  703. constexpr size_t kNumBlobsPerFile = 3;
  704. constexpr size_t kNumKeys = kNumBlobsPerFile * kNumBlobFiles;
  705. std::vector<std::string> key_strs;
  706. std::vector<std::string> value_strs;
  707. for (size_t i = 0; i < kNumBlobFiles; ++i) {
  708. for (size_t j = 0; j < kNumBlobsPerFile; ++j) {
  709. std::string key = "key" + std::to_string(i) + "_" + std::to_string(j);
  710. std::string value =
  711. "value_as_blob" + std::to_string(i) + "_" + std::to_string(j);
  712. ASSERT_OK(Put(key, value));
  713. key_strs.push_back(key);
  714. value_strs.push_back(value);
  715. }
  716. ASSERT_OK(Flush());
  717. }
  718. assert(key_strs.size() == kNumKeys);
  719. std::array<Slice, kNumKeys> keys;
  720. for (size_t i = 0; i < keys.size(); ++i) {
  721. keys[i] = key_strs[i];
  722. }
  723. ReadOptions read_options;
  724. read_options.read_tier = kReadAllTier;
  725. read_options.fill_cache = false;
  726. {
  727. std::array<PinnableSlice, kNumKeys> values;
  728. std::array<Status, kNumKeys> statuses;
  729. db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
  730. keys.data(), values.data(), statuses.data());
  731. for (size_t i = 0; i < kNumKeys; ++i) {
  732. ASSERT_OK(statuses[i]);
  733. ASSERT_EQ(value_strs[i], values[i]);
  734. }
  735. }
  736. read_options.read_tier = kBlockCacheTier;
  737. {
  738. std::array<PinnableSlice, kNumKeys> values;
  739. std::array<Status, kNumKeys> statuses;
  740. db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
  741. keys.data(), values.data(), statuses.data());
  742. for (size_t i = 0; i < kNumKeys; ++i) {
  743. ASSERT_TRUE(statuses[i].IsIncomplete());
  744. ASSERT_TRUE(values[i].empty());
  745. }
  746. }
  747. read_options.read_tier = kReadAllTier;
  748. read_options.fill_cache = true;
  749. {
  750. std::array<PinnableSlice, kNumKeys> values;
  751. std::array<Status, kNumKeys> statuses;
  752. db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
  753. keys.data(), values.data(), statuses.data());
  754. for (size_t i = 0; i < kNumKeys; ++i) {
  755. ASSERT_OK(statuses[i]);
  756. ASSERT_EQ(value_strs[i], values[i]);
  757. }
  758. }
  759. read_options.read_tier = kBlockCacheTier;
  760. {
  761. std::array<PinnableSlice, kNumKeys> values;
  762. std::array<Status, kNumKeys> statuses;
  763. db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
  764. keys.data(), values.data(), statuses.data());
  765. for (size_t i = 0; i < kNumKeys; ++i) {
  766. ASSERT_OK(statuses[i]);
  767. ASSERT_EQ(value_strs[i], values[i]);
  768. }
  769. }
  770. }
  771. TEST_F(DBBlobBasicTest, GetBlob_CorruptIndex) {
  772. Options options = GetDefaultOptions();
  773. options.enable_blob_files = true;
  774. options.min_blob_size = 0;
  775. Reopen(options);
  776. constexpr char key[] = "key";
  777. constexpr char blob[] = "blob";
  778. ASSERT_OK(Put(key, blob));
  779. ASSERT_OK(Flush());
  780. SyncPoint::GetInstance()->SetCallBack(
  781. "Version::Get::TamperWithBlobIndex", [](void* arg) {
  782. Slice* const blob_index = static_cast<Slice*>(arg);
  783. assert(blob_index);
  784. assert(!blob_index->empty());
  785. blob_index->remove_prefix(1);
  786. });
  787. SyncPoint::GetInstance()->EnableProcessing();
  788. PinnableSlice result;
  789. ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
  790. .IsCorruption());
  791. SyncPoint::GetInstance()->DisableProcessing();
  792. SyncPoint::GetInstance()->ClearAllCallBacks();
  793. }
  794. TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) {
  795. Options options = GetDefaultOptions();
  796. options.enable_blob_files = true;
  797. options.min_blob_size = 0;
  798. options.create_if_missing = true;
  799. DestroyAndReopen(options);
  800. constexpr size_t kNumOfKeys = 3;
  801. std::array<std::string, kNumOfKeys> key_strs;
  802. std::array<std::string, kNumOfKeys> value_strs;
  803. std::array<Slice, kNumOfKeys + 1> keys;
  804. for (size_t i = 0; i < kNumOfKeys; ++i) {
  805. key_strs[i] = "foo" + std::to_string(i);
  806. value_strs[i] = "blob_value" + std::to_string(i);
  807. ASSERT_OK(Put(key_strs[i], value_strs[i]));
  808. keys[i] = key_strs[i];
  809. }
  810. constexpr char key[] = "key";
  811. constexpr char blob[] = "blob";
  812. ASSERT_OK(Put(key, blob));
  813. keys[kNumOfKeys] = key;
  814. ASSERT_OK(Flush());
  815. SyncPoint::GetInstance()->SetCallBack(
  816. "Version::MultiGet::TamperWithBlobIndex", [&key](void* arg) {
  817. KeyContext* const key_context = static_cast<KeyContext*>(arg);
  818. assert(key_context);
  819. assert(key_context->key);
  820. if (*(key_context->key) == key) {
  821. Slice* const blob_index = key_context->value;
  822. assert(blob_index);
  823. assert(!blob_index->empty());
  824. blob_index->remove_prefix(1);
  825. }
  826. });
  827. SyncPoint::GetInstance()->EnableProcessing();
  828. std::array<PinnableSlice, kNumOfKeys + 1> values;
  829. std::array<Status, kNumOfKeys + 1> statuses;
  830. db_->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), kNumOfKeys + 1,
  831. keys.data(), values.data(), statuses.data(),
  832. /*sorted_input=*/false);
  833. for (size_t i = 0; i < kNumOfKeys + 1; ++i) {
  834. if (i != kNumOfKeys) {
  835. ASSERT_OK(statuses[i]);
  836. ASSERT_EQ("blob_value" + std::to_string(i), values[i]);
  837. } else {
  838. ASSERT_TRUE(statuses[i].IsCorruption());
  839. }
  840. }
  841. SyncPoint::GetInstance()->DisableProcessing();
  842. SyncPoint::GetInstance()->ClearAllCallBacks();
  843. }
  844. TEST_F(DBBlobBasicTest, MultiGetBlob_ExceedSoftLimit) {
  845. Options options = GetDefaultOptions();
  846. options.enable_blob_files = true;
  847. options.min_blob_size = 0;
  848. Reopen(options);
  849. constexpr size_t kNumOfKeys = 3;
  850. std::array<std::string, kNumOfKeys> key_bufs;
  851. std::array<std::string, kNumOfKeys> value_bufs;
  852. std::array<Slice, kNumOfKeys> keys;
  853. for (size_t i = 0; i < kNumOfKeys; ++i) {
  854. key_bufs[i] = "foo" + std::to_string(i);
  855. value_bufs[i] = "blob_value" + std::to_string(i);
  856. ASSERT_OK(Put(key_bufs[i], value_bufs[i]));
  857. keys[i] = key_bufs[i];
  858. }
  859. ASSERT_OK(Flush());
  860. std::array<PinnableSlice, kNumOfKeys> values;
  861. std::array<Status, kNumOfKeys> statuses;
  862. ReadOptions read_opts;
  863. read_opts.value_size_soft_limit = 1;
  864. db_->MultiGet(read_opts, dbfull()->DefaultColumnFamily(), kNumOfKeys,
  865. keys.data(), values.data(), statuses.data(),
  866. /*sorted_input=*/true);
  867. for (const auto& s : statuses) {
  868. ASSERT_TRUE(s.IsAborted());
  869. }
  870. }
  871. TEST_F(DBBlobBasicTest, GetBlob_InlinedTTLIndex) {
  872. constexpr uint64_t min_blob_size = 10;
  873. Options options = GetDefaultOptions();
  874. options.enable_blob_files = true;
  875. options.min_blob_size = min_blob_size;
  876. Reopen(options);
  877. constexpr char key[] = "key";
  878. constexpr char blob[] = "short";
  879. static_assert(sizeof(short) - 1 < min_blob_size,
  880. "Blob too long to be inlined");
  881. // Fake an inlined TTL blob index.
  882. std::string blob_index;
  883. constexpr uint64_t expiration = 1234567890;
  884. BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob);
  885. WriteBatch batch;
  886. ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
  887. ASSERT_OK(db_->Write(WriteOptions(), &batch));
  888. ASSERT_OK(Flush());
  889. PinnableSlice result;
  890. ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
  891. .IsCorruption());
  892. }
  893. TEST_F(DBBlobBasicTest, GetBlob_IndexWithInvalidFileNumber) {
  894. Options options = GetDefaultOptions();
  895. options.enable_blob_files = true;
  896. options.min_blob_size = 0;
  897. Reopen(options);
  898. constexpr char key[] = "key";
  899. // Fake a blob index referencing a non-existent blob file.
  900. std::string blob_index;
  901. constexpr uint64_t blob_file_number = 1000;
  902. constexpr uint64_t offset = 1234;
  903. constexpr uint64_t size = 5678;
  904. BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
  905. kNoCompression);
  906. WriteBatch batch;
  907. ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
  908. ASSERT_OK(db_->Write(WriteOptions(), &batch));
  909. ASSERT_OK(Flush());
  910. PinnableSlice result;
  911. ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
  912. .IsCorruption());
  913. }
  914. TEST_F(DBBlobBasicTest, GenerateIOTracing) {
  915. Options options = GetDefaultOptions();
  916. options.enable_blob_files = true;
  917. options.min_blob_size = 0;
  918. std::string trace_file = dbname_ + "/io_trace_file";
  919. Reopen(options);
  920. {
  921. // Create IO trace file
  922. std::unique_ptr<TraceWriter> trace_writer;
  923. ASSERT_OK(
  924. NewFileTraceWriter(env_, EnvOptions(), trace_file, &trace_writer));
  925. ASSERT_OK(db_->StartIOTrace(TraceOptions(), std::move(trace_writer)));
  926. constexpr char key[] = "key";
  927. constexpr char blob_value[] = "blob_value";
  928. ASSERT_OK(Put(key, blob_value));
  929. ASSERT_OK(Flush());
  930. ASSERT_EQ(Get(key), blob_value);
  931. ASSERT_OK(db_->EndIOTrace());
  932. ASSERT_OK(env_->FileExists(trace_file));
  933. }
  934. {
  935. // Parse trace file to check file operations related to blob files are
  936. // recorded.
  937. std::unique_ptr<TraceReader> trace_reader;
  938. ASSERT_OK(
  939. NewFileTraceReader(env_, EnvOptions(), trace_file, &trace_reader));
  940. IOTraceReader reader(std::move(trace_reader));
  941. IOTraceHeader header;
  942. ASSERT_OK(reader.ReadHeader(&header));
  943. ASSERT_EQ(kMajorVersion, static_cast<int>(header.rocksdb_major_version));
  944. ASSERT_EQ(kMinorVersion, static_cast<int>(header.rocksdb_minor_version));
  945. // Read records.
  946. int blob_files_op_count = 0;
  947. Status status;
  948. while (true) {
  949. IOTraceRecord record;
  950. status = reader.ReadIOOp(&record);
  951. if (!status.ok()) {
  952. break;
  953. }
  954. if (record.file_name.find("blob") != std::string::npos) {
  955. blob_files_op_count++;
  956. }
  957. }
  958. // Assuming blob files will have Append, Close and then Read operations.
  959. ASSERT_GT(blob_files_op_count, 2);
  960. }
  961. }
  962. TEST_F(DBBlobBasicTest, BestEffortsRecovery_MissingNewestBlobFile) {
  963. Options options = GetDefaultOptions();
  964. options.enable_blob_files = true;
  965. options.min_blob_size = 0;
  966. options.create_if_missing = true;
  967. Reopen(options);
  968. ASSERT_OK(dbfull()->DisableFileDeletions());
  969. constexpr int kNumTableFiles = 2;
  970. for (int i = 0; i < kNumTableFiles; ++i) {
  971. for (char ch = 'a'; ch != 'c'; ++ch) {
  972. std::string key(1, ch);
  973. ASSERT_OK(Put(key, "value" + std::to_string(i)));
  974. }
  975. ASSERT_OK(Flush());
  976. }
  977. Close();
  978. std::vector<std::string> files;
  979. ASSERT_OK(env_->GetChildren(dbname_, &files));
  980. std::string blob_file_path;
  981. uint64_t max_blob_file_num = kInvalidBlobFileNumber;
  982. for (const auto& fname : files) {
  983. uint64_t file_num = 0;
  984. FileType type;
  985. if (ParseFileName(fname, &file_num, /*info_log_name_prefix=*/"", &type) &&
  986. type == kBlobFile) {
  987. if (file_num > max_blob_file_num) {
  988. max_blob_file_num = file_num;
  989. blob_file_path = dbname_ + "/" + fname;
  990. }
  991. }
  992. }
  993. ASSERT_OK(env_->DeleteFile(blob_file_path));
  994. options.best_efforts_recovery = true;
  995. Reopen(options);
  996. std::string value;
  997. ASSERT_OK(db_->Get(ReadOptions(), "a", &value));
  998. ASSERT_EQ("value" + std::to_string(kNumTableFiles - 2), value);
  999. }
  1000. TEST_F(DBBlobBasicTest, GetMergeBlobWithPut) {
  1001. Options options = GetDefaultOptions();
  1002. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  1003. options.enable_blob_files = true;
  1004. options.min_blob_size = 0;
  1005. Reopen(options);
  1006. ASSERT_OK(Put("Key1", "v1"));
  1007. ASSERT_OK(Flush());
  1008. ASSERT_OK(Merge("Key1", "v2"));
  1009. ASSERT_OK(Flush());
  1010. ASSERT_OK(Merge("Key1", "v3"));
  1011. ASSERT_OK(Flush());
  1012. std::string value;
  1013. ASSERT_OK(db_->Get(ReadOptions(), "Key1", &value));
  1014. ASSERT_EQ(Get("Key1"), "v1,v2,v3");
  1015. }
  1016. TEST_F(DBBlobBasicTest, GetMergeBlobFromMemoryTier) {
  1017. Options options = GetDefaultOptions();
  1018. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  1019. options.enable_blob_files = true;
  1020. options.min_blob_size = 0;
  1021. Reopen(options);
  1022. ASSERT_OK(Put(Key(0), "v1"));
  1023. ASSERT_OK(Flush());
  1024. ASSERT_OK(Merge(Key(0), "v2"));
  1025. ASSERT_OK(Flush());
  1026. // Regular `Get()` loads data block to cache.
  1027. std::string value;
  1028. ASSERT_OK(db_->Get(ReadOptions(), Key(0), &value));
  1029. ASSERT_EQ("v1,v2", value);
  1030. // Base value blob is still uncached, so an in-memory read will fail.
  1031. ReadOptions read_options;
  1032. read_options.read_tier = kBlockCacheTier;
  1033. ASSERT_TRUE(db_->Get(read_options, Key(0), &value).IsIncomplete());
  1034. }
  1035. TEST_F(DBBlobBasicTest, MultiGetMergeBlobWithPut) {
  1036. constexpr size_t num_keys = 3;
  1037. Options options = GetDefaultOptions();
  1038. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  1039. options.enable_blob_files = true;
  1040. options.min_blob_size = 0;
  1041. Reopen(options);
  1042. ASSERT_OK(Put("Key0", "v0_0"));
  1043. ASSERT_OK(Put("Key1", "v1_0"));
  1044. ASSERT_OK(Put("Key2", "v2_0"));
  1045. ASSERT_OK(Flush());
  1046. ASSERT_OK(Merge("Key0", "v0_1"));
  1047. ASSERT_OK(Merge("Key1", "v1_1"));
  1048. ASSERT_OK(Flush());
  1049. ASSERT_OK(Merge("Key0", "v0_2"));
  1050. ASSERT_OK(Flush());
  1051. std::array<Slice, num_keys> keys{{"Key0", "Key1", "Key2"}};
  1052. std::array<PinnableSlice, num_keys> values;
  1053. std::array<Status, num_keys> statuses;
  1054. db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
  1055. keys.data(), values.data(), statuses.data());
  1056. ASSERT_OK(statuses[0]);
  1057. ASSERT_EQ(values[0], "v0_0,v0_1,v0_2");
  1058. ASSERT_OK(statuses[1]);
  1059. ASSERT_EQ(values[1], "v1_0,v1_1");
  1060. ASSERT_OK(statuses[2]);
  1061. ASSERT_EQ(values[2], "v2_0");
  1062. }
  1063. TEST_F(DBBlobBasicTest, Properties) {
  1064. Options options = GetDefaultOptions();
  1065. options.enable_blob_files = true;
  1066. options.min_blob_size = 0;
  1067. Reopen(options);
  1068. constexpr char key1[] = "key1";
  1069. constexpr size_t key1_size = sizeof(key1) - 1;
  1070. constexpr char key2[] = "key2";
  1071. constexpr size_t key2_size = sizeof(key2) - 1;
  1072. constexpr char key3[] = "key3";
  1073. constexpr size_t key3_size = sizeof(key3) - 1;
  1074. constexpr char blob[] = "00000000000000";
  1075. constexpr size_t blob_size = sizeof(blob) - 1;
  1076. constexpr char longer_blob[] = "00000000000000000000";
  1077. constexpr size_t longer_blob_size = sizeof(longer_blob) - 1;
  1078. ASSERT_OK(Put(key1, blob));
  1079. ASSERT_OK(Put(key2, longer_blob));
  1080. ASSERT_OK(Flush());
  1081. constexpr size_t first_blob_file_expected_size =
  1082. BlobLogHeader::kSize +
  1083. BlobLogRecord::CalculateAdjustmentForRecordHeader(key1_size) + blob_size +
  1084. BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) +
  1085. longer_blob_size + BlobLogFooter::kSize;
  1086. ASSERT_OK(Put(key3, blob));
  1087. ASSERT_OK(Flush());
  1088. constexpr size_t second_blob_file_expected_size =
  1089. BlobLogHeader::kSize +
  1090. BlobLogRecord::CalculateAdjustmentForRecordHeader(key3_size) + blob_size +
  1091. BlobLogFooter::kSize;
  1092. constexpr size_t total_expected_size =
  1093. first_blob_file_expected_size + second_blob_file_expected_size;
  1094. // Number of blob files
  1095. uint64_t num_blob_files = 0;
  1096. ASSERT_TRUE(
  1097. db_->GetIntProperty(DB::Properties::kNumBlobFiles, &num_blob_files));
  1098. ASSERT_EQ(num_blob_files, 2);
  1099. // Total size of live blob files
  1100. uint64_t live_blob_file_size = 0;
  1101. ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileSize,
  1102. &live_blob_file_size));
  1103. ASSERT_EQ(live_blob_file_size, total_expected_size);
  1104. // Total amount of garbage in live blob files
  1105. {
  1106. uint64_t live_blob_file_garbage_size = 0;
  1107. ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
  1108. &live_blob_file_garbage_size));
  1109. ASSERT_EQ(live_blob_file_garbage_size, 0);
  1110. }
  1111. // Total size of all blob files across all versions
  1112. // Note: this should be the same as above since we only have one
  1113. // version at this point.
  1114. uint64_t total_blob_file_size = 0;
  1115. ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize,
  1116. &total_blob_file_size));
  1117. ASSERT_EQ(total_blob_file_size, total_expected_size);
  1118. // Delete key2 to create some garbage
  1119. ASSERT_OK(Delete(key2));
  1120. ASSERT_OK(Flush());
  1121. constexpr Slice* begin = nullptr;
  1122. constexpr Slice* end = nullptr;
  1123. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
  1124. constexpr size_t expected_garbage_size =
  1125. BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) +
  1126. longer_blob_size;
  1127. constexpr double expected_space_amp =
  1128. static_cast<double>(total_expected_size) /
  1129. (total_expected_size - expected_garbage_size);
  1130. // Blob file stats
  1131. std::string blob_stats;
  1132. ASSERT_TRUE(db_->GetProperty(DB::Properties::kBlobStats, &blob_stats));
  1133. std::ostringstream oss;
  1134. oss << "Number of blob files: 2\nTotal size of blob files: "
  1135. << total_expected_size
  1136. << "\nTotal size of garbage in blob files: " << expected_garbage_size
  1137. << "\nBlob file space amplification: " << expected_space_amp << '\n';
  1138. ASSERT_EQ(blob_stats, oss.str());
  1139. // Total amount of garbage in live blob files
  1140. {
  1141. uint64_t live_blob_file_garbage_size = 0;
  1142. ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
  1143. &live_blob_file_garbage_size));
  1144. ASSERT_EQ(live_blob_file_garbage_size, expected_garbage_size);
  1145. }
  1146. }
  1147. TEST_F(DBBlobBasicTest, PropertiesMultiVersion) {
  1148. Options options = GetDefaultOptions();
  1149. options.enable_blob_files = true;
  1150. options.min_blob_size = 0;
  1151. Reopen(options);
  1152. constexpr char key1[] = "key1";
  1153. constexpr char key2[] = "key2";
  1154. constexpr char key3[] = "key3";
  1155. constexpr size_t key_size = sizeof(key1) - 1;
  1156. static_assert(sizeof(key2) - 1 == key_size, "unexpected size: key2");
  1157. static_assert(sizeof(key3) - 1 == key_size, "unexpected size: key3");
  1158. constexpr char blob[] = "0000000000";
  1159. constexpr size_t blob_size = sizeof(blob) - 1;
  1160. ASSERT_OK(Put(key1, blob));
  1161. ASSERT_OK(Flush());
  1162. ASSERT_OK(Put(key2, blob));
  1163. ASSERT_OK(Flush());
  1164. // Create an iterator to keep the current version alive
  1165. std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
  1166. ASSERT_OK(iter->status());
  1167. // Note: the Delete and subsequent compaction results in the first blob file
  1168. // not making it to the final version. (It is still part of the previous
  1169. // version kept alive by the iterator though.) On the other hand, the Put
  1170. // results in a third blob file.
  1171. ASSERT_OK(Delete(key1));
  1172. ASSERT_OK(Put(key3, blob));
  1173. ASSERT_OK(Flush());
  1174. constexpr Slice* begin = nullptr;
  1175. constexpr Slice* end = nullptr;
  1176. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
  1177. // Total size of all blob files across all versions: between the two versions,
  1178. // we should have three blob files of the same size with one blob each.
  1179. // The version kept alive by the iterator contains the first and the second
  1180. // blob file, while the final version contains the second and the third blob
  1181. // file. (The second blob file is thus shared by the two versions but should
  1182. // be counted only once.)
  1183. uint64_t total_blob_file_size = 0;
  1184. ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize,
  1185. &total_blob_file_size));
  1186. ASSERT_EQ(total_blob_file_size,
  1187. 3 * (BlobLogHeader::kSize +
  1188. BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) +
  1189. blob_size + BlobLogFooter::kSize));
  1190. }
  1191. class DBBlobBasicIOErrorTest : public DBBlobBasicTest,
  1192. public testing::WithParamInterface<std::string> {
  1193. protected:
  1194. DBBlobBasicIOErrorTest() : sync_point_(GetParam()) {
  1195. fault_injection_env_.reset(new FaultInjectionTestEnv(env_));
  1196. }
  1197. ~DBBlobBasicIOErrorTest() { Close(); }
  1198. std::unique_ptr<FaultInjectionTestEnv> fault_injection_env_;
  1199. std::string sync_point_;
  1200. };
  1201. class DBBlobBasicIOErrorMultiGetTest : public DBBlobBasicIOErrorTest {
  1202. public:
  1203. DBBlobBasicIOErrorMultiGetTest() : DBBlobBasicIOErrorTest() {}
  1204. };
  1205. INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorTest,
  1206. ::testing::ValuesIn(std::vector<std::string>{
  1207. "BlobFileReader::OpenFile:NewRandomAccessFile",
  1208. "BlobFileReader::GetBlob:ReadFromFile"}));
  1209. INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorMultiGetTest,
  1210. ::testing::ValuesIn(std::vector<std::string>{
  1211. "BlobFileReader::OpenFile:NewRandomAccessFile",
  1212. "BlobFileReader::MultiGetBlob:ReadFromFile"}));
  1213. TEST_P(DBBlobBasicIOErrorTest, GetBlob_IOError) {
  1214. Options options;
  1215. options.env = fault_injection_env_.get();
  1216. options.enable_blob_files = true;
  1217. options.min_blob_size = 0;
  1218. Reopen(options);
  1219. constexpr char key[] = "key";
  1220. constexpr char blob_value[] = "blob_value";
  1221. ASSERT_OK(Put(key, blob_value));
  1222. ASSERT_OK(Flush());
  1223. SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
  1224. fault_injection_env_->SetFilesystemActive(false,
  1225. Status::IOError(sync_point_));
  1226. });
  1227. SyncPoint::GetInstance()->EnableProcessing();
  1228. PinnableSlice result;
  1229. ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
  1230. .IsIOError());
  1231. SyncPoint::GetInstance()->DisableProcessing();
  1232. SyncPoint::GetInstance()->ClearAllCallBacks();
  1233. }
  1234. TEST_P(DBBlobBasicIOErrorMultiGetTest, MultiGetBlobs_IOError) {
  1235. Options options = GetDefaultOptions();
  1236. options.env = fault_injection_env_.get();
  1237. options.enable_blob_files = true;
  1238. options.min_blob_size = 0;
  1239. Reopen(options);
  1240. constexpr size_t num_keys = 2;
  1241. constexpr char first_key[] = "first_key";
  1242. constexpr char first_value[] = "first_value";
  1243. ASSERT_OK(Put(first_key, first_value));
  1244. constexpr char second_key[] = "second_key";
  1245. constexpr char second_value[] = "second_value";
  1246. ASSERT_OK(Put(second_key, second_value));
  1247. ASSERT_OK(Flush());
  1248. std::array<Slice, num_keys> keys{{first_key, second_key}};
  1249. std::array<PinnableSlice, num_keys> values;
  1250. std::array<Status, num_keys> statuses;
  1251. SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
  1252. fault_injection_env_->SetFilesystemActive(false,
  1253. Status::IOError(sync_point_));
  1254. });
  1255. SyncPoint::GetInstance()->EnableProcessing();
  1256. db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
  1257. keys.data(), values.data(), statuses.data());
  1258. SyncPoint::GetInstance()->DisableProcessing();
  1259. SyncPoint::GetInstance()->ClearAllCallBacks();
  1260. ASSERT_TRUE(statuses[0].IsIOError());
  1261. ASSERT_TRUE(statuses[1].IsIOError());
  1262. }
  1263. TEST_P(DBBlobBasicIOErrorMultiGetTest, MultipleBlobFiles) {
  1264. Options options = GetDefaultOptions();
  1265. options.env = fault_injection_env_.get();
  1266. options.enable_blob_files = true;
  1267. options.min_blob_size = 0;
  1268. Reopen(options);
  1269. constexpr size_t num_keys = 2;
  1270. constexpr char key1[] = "key1";
  1271. constexpr char value1[] = "blob1";
  1272. ASSERT_OK(Put(key1, value1));
  1273. ASSERT_OK(Flush());
  1274. constexpr char key2[] = "key2";
  1275. constexpr char value2[] = "blob2";
  1276. ASSERT_OK(Put(key2, value2));
  1277. ASSERT_OK(Flush());
  1278. std::array<Slice, num_keys> keys{{key1, key2}};
  1279. std::array<PinnableSlice, num_keys> values;
  1280. std::array<Status, num_keys> statuses;
  1281. bool first_blob_file = true;
  1282. SyncPoint::GetInstance()->SetCallBack(
  1283. sync_point_, [&first_blob_file, this](void* /* arg */) {
  1284. if (first_blob_file) {
  1285. first_blob_file = false;
  1286. return;
  1287. }
  1288. fault_injection_env_->SetFilesystemActive(false,
  1289. Status::IOError(sync_point_));
  1290. });
  1291. SyncPoint::GetInstance()->EnableProcessing();
  1292. db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
  1293. keys.data(), values.data(), statuses.data());
  1294. SyncPoint::GetInstance()->DisableProcessing();
  1295. SyncPoint::GetInstance()->ClearAllCallBacks();
  1296. ASSERT_OK(statuses[0]);
  1297. ASSERT_EQ(value1, values[0]);
  1298. ASSERT_TRUE(statuses[1].IsIOError());
  1299. }
  1300. TEST_F(DBBlobBasicTest, MultiGetFindTable_IOError) {
  1301. // Repro test for a specific bug where `MultiGet()` would fail to open a table
  1302. // in `FindTable()` and then proceed to return raw blob handles for the other
  1303. // keys.
  1304. Options options = GetDefaultOptions();
  1305. options.enable_blob_files = true;
  1306. options.min_blob_size = 0;
  1307. Reopen(options);
  1308. // Force no table cache so every read will preload the SST file.
  1309. dbfull()->TEST_table_cache()->SetCapacity(0);
  1310. constexpr size_t num_keys = 2;
  1311. constexpr char key1[] = "key1";
  1312. constexpr char value1[] = "blob1";
  1313. ASSERT_OK(Put(key1, value1));
  1314. ASSERT_OK(Flush());
  1315. constexpr char key2[] = "key2";
  1316. constexpr char value2[] = "blob2";
  1317. ASSERT_OK(Put(key2, value2));
  1318. ASSERT_OK(Flush());
  1319. std::atomic<int> num_files_opened = 0;
  1320. // This test would be more realistic if we injected an `IOError` from the
  1321. // `FileSystem`
  1322. SyncPoint::GetInstance()->SetCallBack(
  1323. "TableCache::MultiGet:FindTable", [&](void* status) {
  1324. num_files_opened++;
  1325. if (num_files_opened == 2) {
  1326. Status* s = static_cast<Status*>(status);
  1327. *s = Status::IOError();
  1328. }
  1329. });
  1330. SyncPoint::GetInstance()->EnableProcessing();
  1331. std::array<Slice, num_keys> keys{{key1, key2}};
  1332. std::array<PinnableSlice, num_keys> values;
  1333. std::array<Status, num_keys> statuses;
  1334. db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
  1335. keys.data(), values.data(), statuses.data());
  1336. ASSERT_TRUE(statuses[0].IsIOError());
  1337. ASSERT_OK(statuses[1]);
  1338. ASSERT_EQ(value2, values[1]);
  1339. }
  1340. namespace {
  1341. class ReadBlobCompactionFilter : public CompactionFilter {
  1342. public:
  1343. ReadBlobCompactionFilter() = default;
  1344. const char* Name() const override {
  1345. return "rocksdb.compaction.filter.read.blob";
  1346. }
  1347. CompactionFilter::Decision FilterV2(
  1348. int /*level*/, const Slice& /*key*/, ValueType value_type,
  1349. const Slice& existing_value, std::string* new_value,
  1350. std::string* /*skip_until*/) const override {
  1351. if (value_type != CompactionFilter::ValueType::kValue) {
  1352. return CompactionFilter::Decision::kKeep;
  1353. }
  1354. assert(new_value);
  1355. new_value->assign(existing_value.data(), existing_value.size());
  1356. return CompactionFilter::Decision::kChangeValue;
  1357. }
  1358. };
  1359. } // anonymous namespace
  1360. TEST_P(DBBlobBasicIOErrorTest, CompactionFilterReadBlob_IOError) {
  1361. Options options = GetDefaultOptions();
  1362. options.env = fault_injection_env_.get();
  1363. options.enable_blob_files = true;
  1364. options.min_blob_size = 0;
  1365. options.create_if_missing = true;
  1366. std::unique_ptr<CompactionFilter> compaction_filter_guard(
  1367. new ReadBlobCompactionFilter);
  1368. options.compaction_filter = compaction_filter_guard.get();
  1369. DestroyAndReopen(options);
  1370. constexpr char key[] = "foo";
  1371. constexpr char blob_value[] = "foo_blob_value";
  1372. ASSERT_OK(Put(key, blob_value));
  1373. ASSERT_OK(Flush());
  1374. SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
  1375. fault_injection_env_->SetFilesystemActive(false,
  1376. Status::IOError(sync_point_));
  1377. });
  1378. SyncPoint::GetInstance()->EnableProcessing();
  1379. ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
  1380. /*end=*/nullptr)
  1381. .IsIOError());
  1382. SyncPoint::GetInstance()->DisableProcessing();
  1383. SyncPoint::GetInstance()->ClearAllCallBacks();
  1384. }
  1385. TEST_P(DBBlobBasicIOErrorTest, IterateBlobsAllowUnpreparedValue_IOError) {
  1386. Options options;
  1387. options.env = fault_injection_env_.get();
  1388. options.enable_blob_files = true;
  1389. Reopen(options);
  1390. constexpr char key[] = "key";
  1391. constexpr char blob_value[] = "blob_value";
  1392. ASSERT_OK(Put(key, blob_value));
  1393. ASSERT_OK(Flush());
  1394. SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
  1395. fault_injection_env_->SetFilesystemActive(false,
  1396. Status::IOError(sync_point_));
  1397. });
  1398. SyncPoint::GetInstance()->EnableProcessing();
  1399. ReadOptions read_options;
  1400. read_options.allow_unprepared_value = true;
  1401. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1402. iter->SeekToFirst();
  1403. ASSERT_TRUE(iter->Valid());
  1404. ASSERT_EQ(iter->key(), key);
  1405. ASSERT_TRUE(iter->value().empty());
  1406. ASSERT_OK(iter->status());
  1407. ASSERT_FALSE(iter->PrepareValue());
  1408. ASSERT_FALSE(iter->Valid());
  1409. ASSERT_TRUE(iter->status().IsIOError());
  1410. SyncPoint::GetInstance()->DisableProcessing();
  1411. SyncPoint::GetInstance()->ClearAllCallBacks();
  1412. }
  1413. TEST_F(DBBlobBasicTest, WarmCacheWithBlobsDuringFlush) {
  1414. Options options = GetDefaultOptions();
  1415. LRUCacheOptions co;
  1416. co.capacity = 1 << 25;
  1417. co.num_shard_bits = 2;
  1418. co.metadata_charge_policy = kDontChargeCacheMetadata;
  1419. auto backing_cache = NewLRUCache(co);
  1420. options.blob_cache = backing_cache;
  1421. BlockBasedTableOptions block_based_options;
  1422. block_based_options.no_block_cache = false;
  1423. block_based_options.block_cache = backing_cache;
  1424. block_based_options.cache_index_and_filter_blocks = true;
  1425. options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
  1426. options.enable_blob_files = true;
  1427. options.create_if_missing = true;
  1428. options.disable_auto_compactions = true;
  1429. options.enable_blob_garbage_collection = true;
  1430. options.blob_garbage_collection_age_cutoff = 1.0;
  1431. options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
  1432. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1433. DestroyAndReopen(options);
  1434. constexpr size_t kNumBlobs = 10;
  1435. constexpr size_t kValueSize = 100;
  1436. std::string value(kValueSize, 'a');
  1437. for (size_t i = 1; i <= kNumBlobs; i++) {
  1438. ASSERT_OK(Put(std::to_string(i), value));
  1439. ASSERT_OK(Put(std::to_string(i + kNumBlobs), value)); // Add some overlap
  1440. ASSERT_OK(Flush());
  1441. ASSERT_EQ(i * 2, options.statistics->getTickerCount(BLOB_DB_CACHE_ADD));
  1442. ASSERT_EQ(value, Get(std::to_string(i)));
  1443. ASSERT_EQ(value, Get(std::to_string(i + kNumBlobs)));
  1444. ASSERT_EQ(0, options.statistics->getTickerCount(BLOB_DB_CACHE_MISS));
  1445. ASSERT_EQ(i * 2, options.statistics->getTickerCount(BLOB_DB_CACHE_HIT));
  1446. }
  1447. // Verify compaction not counted
  1448. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
  1449. /*end=*/nullptr));
  1450. EXPECT_EQ(kNumBlobs * 2,
  1451. options.statistics->getTickerCount(BLOB_DB_CACHE_ADD));
  1452. }
  1453. TEST_F(DBBlobBasicTest, DynamicallyWarmCacheDuringFlush) {
  1454. Options options = GetDefaultOptions();
  1455. LRUCacheOptions co;
  1456. co.capacity = 1 << 25;
  1457. co.num_shard_bits = 2;
  1458. co.metadata_charge_policy = kDontChargeCacheMetadata;
  1459. auto backing_cache = NewLRUCache(co);
  1460. options.blob_cache = backing_cache;
  1461. BlockBasedTableOptions block_based_options;
  1462. block_based_options.no_block_cache = false;
  1463. block_based_options.block_cache = backing_cache;
  1464. block_based_options.cache_index_and_filter_blocks = true;
  1465. options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
  1466. options.enable_blob_files = true;
  1467. options.create_if_missing = true;
  1468. options.disable_auto_compactions = true;
  1469. options.enable_blob_garbage_collection = true;
  1470. options.blob_garbage_collection_age_cutoff = 1.0;
  1471. options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
  1472. options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  1473. DestroyAndReopen(options);
  1474. constexpr size_t kNumBlobs = 10;
  1475. constexpr size_t kValueSize = 100;
  1476. std::string value(kValueSize, 'a');
  1477. for (size_t i = 1; i <= 5; i++) {
  1478. ASSERT_OK(Put(std::to_string(i), value));
  1479. ASSERT_OK(Put(std::to_string(i + kNumBlobs), value)); // Add some overlap
  1480. ASSERT_OK(Flush());
  1481. ASSERT_EQ(2, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
  1482. ASSERT_EQ(value, Get(std::to_string(i)));
  1483. ASSERT_EQ(value, Get(std::to_string(i + kNumBlobs)));
  1484. ASSERT_EQ(0, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
  1485. ASSERT_EQ(0,
  1486. options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS));
  1487. ASSERT_EQ(2, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT));
  1488. }
  1489. ASSERT_OK(dbfull()->SetOptions({{"prepopulate_blob_cache", "kDisable"}}));
  1490. for (size_t i = 6; i <= kNumBlobs; i++) {
  1491. ASSERT_OK(Put(std::to_string(i), value));
  1492. ASSERT_OK(Put(std::to_string(i + kNumBlobs), value)); // Add some overlap
  1493. ASSERT_OK(Flush());
  1494. ASSERT_EQ(0, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
  1495. ASSERT_EQ(value, Get(std::to_string(i)));
  1496. ASSERT_EQ(value, Get(std::to_string(i + kNumBlobs)));
  1497. ASSERT_EQ(2, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
  1498. ASSERT_EQ(2,
  1499. options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS));
  1500. ASSERT_EQ(0, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT));
  1501. }
  1502. // Verify compaction not counted
  1503. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
  1504. /*end=*/nullptr));
  1505. EXPECT_EQ(0, options.statistics->getTickerCount(BLOB_DB_CACHE_ADD));
  1506. }
  1507. TEST_F(DBBlobBasicTest, WarmCacheWithBlobsSecondary) {
  1508. CompressedSecondaryCacheOptions secondary_cache_opts;
  1509. secondary_cache_opts.capacity = 1 << 20;
  1510. secondary_cache_opts.num_shard_bits = 0;
  1511. secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
  1512. secondary_cache_opts.compression_type = kNoCompression;
  1513. LRUCacheOptions primary_cache_opts;
  1514. primary_cache_opts.capacity = 1024;
  1515. primary_cache_opts.num_shard_bits = 0;
  1516. primary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
  1517. primary_cache_opts.secondary_cache =
  1518. NewCompressedSecondaryCache(secondary_cache_opts);
  1519. Options options = GetDefaultOptions();
  1520. options.create_if_missing = true;
  1521. options.statistics = CreateDBStatistics();
  1522. options.enable_blob_files = true;
  1523. options.blob_cache = NewLRUCache(primary_cache_opts);
  1524. options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
  1525. DestroyAndReopen(options);
  1526. // Note: only one of the two blobs fit in the primary cache at any given time.
  1527. constexpr char first_key[] = "foo";
  1528. constexpr size_t first_blob_size = 512;
  1529. const std::string first_blob(first_blob_size, 'a');
  1530. constexpr char second_key[] = "bar";
  1531. constexpr size_t second_blob_size = 768;
  1532. const std::string second_blob(second_blob_size, 'b');
  1533. // First blob is inserted into primary cache during flush.
  1534. ASSERT_OK(Put(first_key, first_blob));
  1535. ASSERT_OK(Flush());
  1536. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 1);
  1537. // Second blob is inserted into primary cache during flush,
  1538. // First blob is evicted but only a dummy handle is inserted into secondary
  1539. // cache.
  1540. ASSERT_OK(Put(second_key, second_blob));
  1541. ASSERT_OK(Flush());
  1542. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 1);
  1543. // First blob is inserted into primary cache.
  1544. // Second blob is evicted but only a dummy handle is inserted into secondary
  1545. // cache.
  1546. ASSERT_EQ(Get(first_key), first_blob);
  1547. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 1);
  1548. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 0);
  1549. ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
  1550. 0);
  1551. // Second blob is inserted into primary cache,
  1552. // First blob is evicted and is inserted into secondary cache.
  1553. ASSERT_EQ(Get(second_key), second_blob);
  1554. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 1);
  1555. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 0);
  1556. ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
  1557. 0);
  1558. // First blob's dummy item is inserted into primary cache b/c of lookup.
  1559. // Second blob is still in primary cache.
  1560. ASSERT_EQ(Get(first_key), first_blob);
  1561. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 0);
  1562. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 1);
  1563. ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
  1564. 1);
  1565. // First blob's item is inserted into primary cache b/c of lookup.
  1566. // Second blob is evicted and inserted into secondary cache.
  1567. ASSERT_EQ(Get(first_key), first_blob);
  1568. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 0);
  1569. ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 1);
  1570. ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
  1571. 1);
  1572. }
  1573. TEST_F(DBBlobBasicTest, GetEntityBlob) {
  1574. Options options = GetDefaultOptions();
  1575. options.enable_blob_files = true;
  1576. options.min_blob_size = 0;
  1577. Reopen(options);
  1578. constexpr char key[] = "key";
  1579. constexpr char blob_value[] = "blob_value";
  1580. constexpr char other_key[] = "other_key";
  1581. constexpr char other_blob_value[] = "other_blob_value";
  1582. ASSERT_OK(Put(key, blob_value));
  1583. ASSERT_OK(Put(other_key, other_blob_value));
  1584. ASSERT_OK(Flush());
  1585. WideColumns expected_columns{{kDefaultWideColumnName, blob_value}};
  1586. WideColumns other_expected_columns{
  1587. {kDefaultWideColumnName, other_blob_value}};
  1588. {
  1589. PinnableWideColumns result;
  1590. ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), key,
  1591. &result));
  1592. ASSERT_EQ(result.columns(), expected_columns);
  1593. }
  1594. {
  1595. PinnableWideColumns result;
  1596. ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
  1597. other_key, &result));
  1598. ASSERT_EQ(result.columns(), other_expected_columns);
  1599. }
  1600. {
  1601. constexpr size_t num_keys = 2;
  1602. std::array<Slice, num_keys> keys{{key, other_key}};
  1603. std::array<PinnableWideColumns, num_keys> results;
  1604. std::array<Status, num_keys> statuses;
  1605. db_->MultiGetEntity(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
  1606. keys.data(), results.data(), statuses.data());
  1607. ASSERT_OK(statuses[0]);
  1608. ASSERT_EQ(results[0].columns(), expected_columns);
  1609. ASSERT_OK(statuses[1]);
  1610. ASSERT_EQ(results[1].columns(), other_expected_columns);
  1611. }
  1612. }
  1613. class DBBlobWithTimestampTest : public DBBasicTestWithTimestampBase {
  1614. protected:
  1615. DBBlobWithTimestampTest()
  1616. : DBBasicTestWithTimestampBase("db_blob_with_timestamp_test") {}
  1617. };
  1618. TEST_F(DBBlobWithTimestampTest, GetBlob) {
  1619. Options options = GetDefaultOptions();
  1620. options.create_if_missing = true;
  1621. options.enable_blob_files = true;
  1622. options.min_blob_size = 0;
  1623. const size_t kTimestampSize = Timestamp(0, 0).size();
  1624. TestComparator test_cmp(kTimestampSize);
  1625. options.comparator = &test_cmp;
  1626. DestroyAndReopen(options);
  1627. WriteOptions write_opts;
  1628. const std::string ts = Timestamp(1, 0);
  1629. constexpr char key[] = "key";
  1630. constexpr char blob_value[] = "blob_value";
  1631. ASSERT_OK(db_->Put(write_opts, key, ts, blob_value));
  1632. ASSERT_OK(Flush());
  1633. const std::string read_ts = Timestamp(2, 0);
  1634. Slice read_ts_slice(read_ts);
  1635. ReadOptions read_opts;
  1636. read_opts.timestamp = &read_ts_slice;
  1637. std::string value;
  1638. ASSERT_OK(db_->Get(read_opts, key, &value));
  1639. ASSERT_EQ(value, blob_value);
  1640. }
  1641. TEST_F(DBBlobWithTimestampTest, MultiGetBlobs) {
  1642. constexpr size_t min_blob_size = 6;
  1643. Options options = GetDefaultOptions();
  1644. options.enable_blob_files = true;
  1645. options.min_blob_size = min_blob_size;
  1646. options.create_if_missing = true;
  1647. const size_t kTimestampSize = Timestamp(0, 0).size();
  1648. TestComparator test_cmp(kTimestampSize);
  1649. options.comparator = &test_cmp;
  1650. DestroyAndReopen(options);
  1651. // Put then retrieve three key-values. The first value is below the size limit
  1652. // and is thus stored inline; the other two are stored separately as blobs.
  1653. constexpr size_t num_keys = 3;
  1654. constexpr char first_key[] = "first_key";
  1655. constexpr char first_value[] = "short";
  1656. static_assert(sizeof(first_value) - 1 < min_blob_size,
  1657. "first_value too long to be inlined");
  1658. DestroyAndReopen(options);
  1659. WriteOptions write_opts;
  1660. const std::string ts = Timestamp(1, 0);
  1661. ASSERT_OK(db_->Put(write_opts, first_key, ts, first_value));
  1662. constexpr char second_key[] = "second_key";
  1663. constexpr char second_value[] = "long_value";
  1664. static_assert(sizeof(second_value) - 1 >= min_blob_size,
  1665. "second_value too short to be stored as blob");
  1666. ASSERT_OK(db_->Put(write_opts, second_key, ts, second_value));
  1667. constexpr char third_key[] = "third_key";
  1668. constexpr char third_value[] = "other_long_value";
  1669. static_assert(sizeof(third_value) - 1 >= min_blob_size,
  1670. "third_value too short to be stored as blob");
  1671. ASSERT_OK(db_->Put(write_opts, third_key, ts, third_value));
  1672. ASSERT_OK(Flush());
  1673. ReadOptions read_options;
  1674. const std::string read_ts = Timestamp(2, 0);
  1675. Slice read_ts_slice(read_ts);
  1676. read_options.timestamp = &read_ts_slice;
  1677. std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
  1678. {
  1679. std::array<PinnableSlice, num_keys> values;
  1680. std::array<Status, num_keys> statuses;
  1681. db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
  1682. keys.data(), values.data(), statuses.data());
  1683. ASSERT_OK(statuses[0]);
  1684. ASSERT_EQ(values[0], first_value);
  1685. ASSERT_OK(statuses[1]);
  1686. ASSERT_EQ(values[1], second_value);
  1687. ASSERT_OK(statuses[2]);
  1688. ASSERT_EQ(values[2], third_value);
  1689. }
  1690. }
  1691. TEST_F(DBBlobWithTimestampTest, GetMergeBlobWithPut) {
  1692. Options options = GetDefaultOptions();
  1693. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  1694. options.enable_blob_files = true;
  1695. options.min_blob_size = 0;
  1696. options.create_if_missing = true;
  1697. const size_t kTimestampSize = Timestamp(0, 0).size();
  1698. TestComparator test_cmp(kTimestampSize);
  1699. options.comparator = &test_cmp;
  1700. DestroyAndReopen(options);
  1701. WriteOptions write_opts;
  1702. const std::string ts = Timestamp(1, 0);
  1703. ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1"));
  1704. ASSERT_OK(Flush());
  1705. ASSERT_OK(
  1706. db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v2"));
  1707. ASSERT_OK(Flush());
  1708. ASSERT_OK(
  1709. db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v3"));
  1710. ASSERT_OK(Flush());
  1711. std::string value;
  1712. const std::string read_ts = Timestamp(2, 0);
  1713. Slice read_ts_slice(read_ts);
  1714. ReadOptions read_opts;
  1715. read_opts.timestamp = &read_ts_slice;
  1716. ASSERT_OK(db_->Get(read_opts, "Key1", &value));
  1717. ASSERT_EQ(value, "v1,v2,v3");
  1718. }
  1719. TEST_F(DBBlobWithTimestampTest, MultiGetMergeBlobWithPut) {
  1720. constexpr size_t num_keys = 3;
  1721. Options options = GetDefaultOptions();
  1722. options.merge_operator = MergeOperators::CreateStringAppendOperator();
  1723. options.enable_blob_files = true;
  1724. options.min_blob_size = 0;
  1725. options.create_if_missing = true;
  1726. const size_t kTimestampSize = Timestamp(0, 0).size();
  1727. TestComparator test_cmp(kTimestampSize);
  1728. options.comparator = &test_cmp;
  1729. DestroyAndReopen(options);
  1730. WriteOptions write_opts;
  1731. const std::string ts = Timestamp(1, 0);
  1732. ASSERT_OK(db_->Put(write_opts, "Key0", ts, "v0_0"));
  1733. ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1_0"));
  1734. ASSERT_OK(db_->Put(write_opts, "Key2", ts, "v2_0"));
  1735. ASSERT_OK(Flush());
  1736. ASSERT_OK(
  1737. db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_1"));
  1738. ASSERT_OK(
  1739. db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v1_1"));
  1740. ASSERT_OK(Flush());
  1741. ASSERT_OK(
  1742. db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_2"));
  1743. ASSERT_OK(Flush());
  1744. const std::string read_ts = Timestamp(2, 0);
  1745. Slice read_ts_slice(read_ts);
  1746. ReadOptions read_opts;
  1747. read_opts.timestamp = &read_ts_slice;
  1748. std::array<Slice, num_keys> keys{{"Key0", "Key1", "Key2"}};
  1749. std::array<PinnableSlice, num_keys> values;
  1750. std::array<Status, num_keys> statuses;
  1751. db_->MultiGet(read_opts, db_->DefaultColumnFamily(), num_keys, keys.data(),
  1752. values.data(), statuses.data());
  1753. ASSERT_OK(statuses[0]);
  1754. ASSERT_EQ(values[0], "v0_0,v0_1,v0_2");
  1755. ASSERT_OK(statuses[1]);
  1756. ASSERT_EQ(values[1], "v1_0,v1_1");
  1757. ASSERT_OK(statuses[2]);
  1758. ASSERT_EQ(values[2], "v2_0");
  1759. }
  1760. TEST_F(DBBlobWithTimestampTest, IterateBlobs) {
  1761. Options options = GetDefaultOptions();
  1762. options.enable_blob_files = true;
  1763. options.create_if_missing = true;
  1764. const size_t kTimestampSize = Timestamp(0, 0).size();
  1765. TestComparator test_cmp(kTimestampSize);
  1766. options.comparator = &test_cmp;
  1767. DestroyAndReopen(options);
  1768. int num_blobs = 5;
  1769. std::vector<std::string> keys;
  1770. std::vector<std::string> blobs;
  1771. WriteOptions write_opts;
  1772. std::vector<std::string> write_timestamps = {Timestamp(1, 0),
  1773. Timestamp(2, 0)};
  1774. // For each key in ["key0", ... "keyi", ...], write two versions:
  1775. // Timestamp(1, 0), "blobi0"
  1776. // Timestamp(2, 0), "blobi1"
  1777. for (int i = 0; i < num_blobs; i++) {
  1778. keys.push_back("key" + std::to_string(i));
  1779. blobs.push_back("blob" + std::to_string(i));
  1780. for (size_t j = 0; j < write_timestamps.size(); j++) {
  1781. ASSERT_OK(db_->Put(write_opts, keys[i], write_timestamps[j],
  1782. blobs[i] + std::to_string(j)));
  1783. }
  1784. }
  1785. ASSERT_OK(Flush());
  1786. ReadOptions read_options;
  1787. std::vector<std::string> read_timestamps = {Timestamp(0, 0), Timestamp(3, 0)};
  1788. Slice ts_upper_bound(read_timestamps[1]);
  1789. read_options.timestamp = &ts_upper_bound;
  1790. auto check_iter_entry =
  1791. [](const Iterator* iter, const std::string& expected_key,
  1792. const std::string& expected_ts, const std::string& expected_value,
  1793. bool key_is_internal = true) {
  1794. ASSERT_OK(iter->status());
  1795. if (key_is_internal) {
  1796. std::string expected_ukey_and_ts;
  1797. expected_ukey_and_ts.assign(expected_key.data(), expected_key.size());
  1798. expected_ukey_and_ts.append(expected_ts.data(), expected_ts.size());
  1799. ParsedInternalKey parsed_ikey;
  1800. ASSERT_OK(ParseInternalKey(iter->key(), &parsed_ikey,
  1801. true /* log_err_key */));
  1802. ASSERT_EQ(parsed_ikey.user_key, expected_ukey_and_ts);
  1803. } else {
  1804. ASSERT_EQ(iter->key(), expected_key);
  1805. }
  1806. ASSERT_EQ(iter->timestamp(), expected_ts);
  1807. ASSERT_EQ(iter->value(), expected_value);
  1808. };
  1809. // Forward iterating one version of each key, get in this order:
  1810. // [("key0", Timestamp(2, 0), "blob01"),
  1811. // ("key1", Timestamp(2, 0), "blob11")...]
  1812. {
  1813. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1814. ASSERT_OK(iter->status());
  1815. iter->SeekToFirst();
  1816. for (int i = 0; i < num_blobs; i++) {
  1817. check_iter_entry(iter.get(), keys[i], write_timestamps[1],
  1818. blobs[i] + std::to_string(1), /*key_is_internal*/ false);
  1819. iter->Next();
  1820. }
  1821. }
  1822. // Forward iteration, then reverse to backward.
  1823. {
  1824. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1825. ASSERT_OK(iter->status());
  1826. iter->SeekToFirst();
  1827. for (int i = 0; i < num_blobs * 2 - 1; i++) {
  1828. if (i < num_blobs) {
  1829. check_iter_entry(iter.get(), keys[i], write_timestamps[1],
  1830. blobs[i] + std::to_string(1),
  1831. /*key_is_internal*/ false);
  1832. if (i != num_blobs - 1) {
  1833. iter->Next();
  1834. }
  1835. } else {
  1836. if (i != num_blobs) {
  1837. check_iter_entry(iter.get(), keys[num_blobs * 2 - 1 - i],
  1838. write_timestamps[1],
  1839. blobs[num_blobs * 2 - 1 - i] + std::to_string(1),
  1840. /*key_is_internal*/ false);
  1841. }
  1842. iter->Prev();
  1843. }
  1844. }
  1845. }
  1846. // Backward iterating one versions of each key, get in this order:
  1847. // [("key4", Timestamp(2, 0), "blob41"),
  1848. // ("key3", Timestamp(2, 0), "blob31")...]
  1849. {
  1850. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1851. ASSERT_OK(iter->status());
  1852. iter->SeekToLast();
  1853. for (int i = 0; i < num_blobs; i++) {
  1854. check_iter_entry(iter.get(), keys[num_blobs - 1 - i], write_timestamps[1],
  1855. blobs[num_blobs - 1 - i] + std::to_string(1),
  1856. /*key_is_internal*/ false);
  1857. iter->Prev();
  1858. }
  1859. ASSERT_OK(iter->status());
  1860. }
  1861. // Backward iteration, then reverse to forward.
  1862. {
  1863. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1864. ASSERT_OK(iter->status());
  1865. iter->SeekToLast();
  1866. for (int i = 0; i < num_blobs * 2 - 1; i++) {
  1867. if (i < num_blobs) {
  1868. check_iter_entry(iter.get(), keys[num_blobs - 1 - i],
  1869. write_timestamps[1],
  1870. blobs[num_blobs - 1 - i] + std::to_string(1),
  1871. /*key_is_internal*/ false);
  1872. if (i != num_blobs - 1) {
  1873. iter->Prev();
  1874. }
  1875. } else {
  1876. if (i != num_blobs) {
  1877. check_iter_entry(iter.get(), keys[i - num_blobs], write_timestamps[1],
  1878. blobs[i - num_blobs] + std::to_string(1),
  1879. /*key_is_internal*/ false);
  1880. }
  1881. iter->Next();
  1882. }
  1883. }
  1884. }
  1885. Slice ts_lower_bound(read_timestamps[0]);
  1886. read_options.iter_start_ts = &ts_lower_bound;
  1887. // Forward iterating multiple versions of the same key, get in this order:
  1888. // [("key0", Timestamp(2, 0), "blob01"),
  1889. // ("key0", Timestamp(1, 0), "blob00"),
  1890. // ("key1", Timestamp(2, 0), "blob11")...]
  1891. {
  1892. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1893. ASSERT_OK(iter->status());
  1894. iter->SeekToFirst();
  1895. for (int i = 0; i < num_blobs; i++) {
  1896. for (size_t j = write_timestamps.size(); j > 0; --j) {
  1897. check_iter_entry(iter.get(), keys[i], write_timestamps[j - 1],
  1898. blobs[i] + std::to_string(j - 1));
  1899. iter->Next();
  1900. }
  1901. }
  1902. ASSERT_OK(iter->status());
  1903. }
  1904. // Backward iterating multiple versions of the same key, get in this order:
  1905. // [("key4", Timestamp(1, 0), "blob00"),
  1906. // ("key4", Timestamp(2, 0), "blob01"),
  1907. // ("key3", Timestamp(1, 0), "blob10")...]
  1908. {
  1909. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1910. ASSERT_OK(iter->status());
  1911. iter->SeekToLast();
  1912. for (int i = num_blobs; i > 0; i--) {
  1913. for (size_t j = 0; j < write_timestamps.size(); j++) {
  1914. check_iter_entry(iter.get(), keys[i - 1], write_timestamps[j],
  1915. blobs[i - 1] + std::to_string(j));
  1916. iter->Prev();
  1917. }
  1918. }
  1919. ASSERT_OK(iter->status());
  1920. }
  1921. int upper_bound_idx = num_blobs - 2;
  1922. int lower_bound_idx = 1;
  1923. Slice upper_bound_slice(keys[upper_bound_idx]);
  1924. Slice lower_bound_slice(keys[lower_bound_idx]);
  1925. read_options.iterate_upper_bound = &upper_bound_slice;
  1926. read_options.iterate_lower_bound = &lower_bound_slice;
  1927. // Forward iteration with upper and lower bound.
  1928. {
  1929. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1930. ASSERT_OK(iter->status());
  1931. iter->SeekToFirst();
  1932. for (int i = lower_bound_idx; i < upper_bound_idx; i++) {
  1933. for (size_t j = write_timestamps.size(); j > 0; --j) {
  1934. check_iter_entry(iter.get(), keys[i], write_timestamps[j - 1],
  1935. blobs[i] + std::to_string(j - 1));
  1936. iter->Next();
  1937. }
  1938. }
  1939. ASSERT_OK(iter->status());
  1940. }
  1941. // Backward iteration with upper and lower bound.
  1942. {
  1943. std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
  1944. ASSERT_OK(iter->status());
  1945. iter->SeekToLast();
  1946. for (int i = upper_bound_idx; i > lower_bound_idx; i--) {
  1947. for (size_t j = 0; j < write_timestamps.size(); j++) {
  1948. check_iter_entry(iter.get(), keys[i - 1], write_timestamps[j],
  1949. blobs[i - 1] + std::to_string(j));
  1950. iter->Prev();
  1951. }
  1952. }
  1953. ASSERT_OK(iter->status());
  1954. }
  1955. }
  1956. } // namespace ROCKSDB_NAMESPACE
  1957. int main(int argc, char** argv) {
  1958. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1959. ::testing::InitGoogleTest(&argc, argv);
  1960. RegisterCustomObjects(argc, argv);
  1961. return RUN_ALL_TESTS();
  1962. }