| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- #include <array>
- #include <sstream>
- #include <string>
- #include "cache/compressed_secondary_cache.h"
- #include "db/blob/blob_index.h"
- #include "db/blob/blob_log_format.h"
- #include "db/db_test_util.h"
- #include "db/db_with_timestamp_test_util.h"
- #include "port/stack_trace.h"
- #include "test_util/sync_point.h"
- #include "utilities/fault_injection_env.h"
- namespace ROCKSDB_NAMESPACE {
- class DBBlobBasicTest : public DBTestBase {
- protected:
- DBBlobBasicTest()
- : DBTestBase("db_blob_basic_test", /* env_do_fsync */ false) {}
- };
- TEST_F(DBBlobBasicTest, GetBlob) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Flush());
- ASSERT_EQ(Get(key), blob_value);
- // Try again with no I/O allowed. The table and the necessary blocks should
- // already be in their respective caches; however, the blob itself can only be
- // read from the blob file, so the read should return Incomplete.
- ReadOptions read_options;
- read_options.read_tier = kBlockCacheTier;
- PinnableSlice result;
- ASSERT_TRUE(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result)
- .IsIncomplete());
- }
- TEST_F(DBBlobBasicTest, GetBlobFromCache) {
- Options options = GetDefaultOptions();
- LRUCacheOptions co;
- co.capacity = 2 << 20; // 2MB
- co.num_shard_bits = 2;
- co.metadata_charge_policy = kDontChargeCacheMetadata;
- auto backing_cache = NewLRUCache(co);
- options.enable_blob_files = true;
- options.blob_cache = backing_cache;
- BlockBasedTableOptions block_based_options;
- block_based_options.no_block_cache = false;
- block_based_options.block_cache = backing_cache;
- block_based_options.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Flush());
- ReadOptions read_options;
- read_options.fill_cache = false;
- {
- PinnableSlice result;
- read_options.read_tier = kReadAllTier;
- ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
- ASSERT_EQ(result, blob_value);
- result.Reset();
- read_options.read_tier = kBlockCacheTier;
- // Try again with no I/O allowed. Since we didn't re-fill the cache, the
- // blob itself can only be read from the blob file, so the read should
- // return Incomplete.
- ASSERT_TRUE(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result)
- .IsIncomplete());
- ASSERT_TRUE(result.empty());
- }
- read_options.fill_cache = true;
- {
- PinnableSlice result;
- read_options.read_tier = kReadAllTier;
- ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
- ASSERT_EQ(result, blob_value);
- result.Reset();
- read_options.read_tier = kBlockCacheTier;
- // Try again with no I/O allowed. The table and the necessary blocks/blobs
- // should already be in their respective caches.
- ASSERT_OK(db_->Get(read_options, db_->DefaultColumnFamily(), key, &result));
- ASSERT_EQ(result, blob_value);
- }
- }
- TEST_F(DBBlobBasicTest, IterateBlobsFromCache) {
- Options options = GetDefaultOptions();
- LRUCacheOptions co;
- co.capacity = 2 << 20; // 2MB
- co.num_shard_bits = 2;
- co.metadata_charge_policy = kDontChargeCacheMetadata;
- auto backing_cache = NewLRUCache(co);
- options.enable_blob_files = true;
- options.blob_cache = backing_cache;
- BlockBasedTableOptions block_based_options;
- block_based_options.no_block_cache = false;
- block_based_options.block_cache = backing_cache;
- block_based_options.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
- options.statistics = CreateDBStatistics();
- Reopen(options);
- int num_blobs = 5;
- std::vector<std::string> keys;
- std::vector<std::string> blobs;
- for (int i = 0; i < num_blobs; ++i) {
- keys.push_back("key" + std::to_string(i));
- blobs.push_back("blob" + std::to_string(i));
- ASSERT_OK(Put(keys[i], blobs[i]));
- }
- ASSERT_OK(Flush());
- ReadOptions read_options;
- {
- read_options.fill_cache = false;
- read_options.read_tier = kReadAllTier;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- int i = 0;
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key().ToString(), keys[i]);
- ASSERT_EQ(iter->value().ToString(), blobs[i]);
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0);
- }
- {
- read_options.fill_cache = false;
- read_options.read_tier = kBlockCacheTier;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- // Try again with no I/O allowed. Since we didn't re-fill the cache,
- // the blob itself can only be read from the blob file, so iter->Valid()
- // should be false.
- iter->SeekToFirst();
- ASSERT_NOK(iter->status());
- ASSERT_FALSE(iter->Valid());
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0);
- }
- {
- read_options.fill_cache = true;
- read_options.read_tier = kReadAllTier;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- // Read blobs from the file and refill the cache.
- int i = 0;
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key().ToString(), keys[i]);
- ASSERT_EQ(iter->value().ToString(), blobs[i]);
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD),
- num_blobs);
- }
- {
- read_options.fill_cache = false;
- read_options.read_tier = kBlockCacheTier;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- // Try again with no I/O allowed. The table and the necessary blocks/blobs
- // should already be in their respective caches.
- int i = 0;
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key().ToString(), keys[i]);
- ASSERT_EQ(iter->value().ToString(), blobs[i]);
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 0);
- }
- }
- TEST_F(DBBlobBasicTest, IterateBlobsFromCachePinning) {
- constexpr size_t min_blob_size = 6;
- Options options = GetDefaultOptions();
- LRUCacheOptions cache_options;
- cache_options.capacity = 2048;
- cache_options.num_shard_bits = 0;
- cache_options.metadata_charge_policy = kDontChargeCacheMetadata;
- options.blob_cache = NewLRUCache(cache_options);
- options.enable_blob_files = true;
- options.min_blob_size = min_blob_size;
- Reopen(options);
- // Put then iterate over three key-values. The second value is below the size
- // limit and is thus stored inline; the other two are stored separately as
- // blobs. We expect to have something pinned in the cache iff we are
- // positioned on a blob.
- constexpr char first_key[] = "first_key";
- constexpr char first_value[] = "long_value";
- static_assert(sizeof(first_value) - 1 >= min_blob_size,
- "first_value too short to be stored as blob");
- ASSERT_OK(Put(first_key, first_value));
- constexpr char second_key[] = "second_key";
- constexpr char second_value[] = "short";
- static_assert(sizeof(second_value) - 1 < min_blob_size,
- "second_value too long to be inlined");
- ASSERT_OK(Put(second_key, second_value));
- constexpr char third_key[] = "third_key";
- constexpr char third_value[] = "other_long_value";
- static_assert(sizeof(third_value) - 1 >= min_blob_size,
- "third_value too short to be stored as blob");
- ASSERT_OK(Put(third_key, third_value));
- ASSERT_OK(Flush());
- {
- ReadOptions read_options;
- read_options.fill_cache = true;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- iter->SeekToFirst();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), first_key);
- ASSERT_EQ(iter->value(), first_value);
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), second_key);
- ASSERT_EQ(iter->value(), second_value);
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), third_key);
- ASSERT_EQ(iter->value(), third_value);
- iter->Next();
- ASSERT_FALSE(iter->Valid());
- ASSERT_OK(iter->status());
- }
- {
- ReadOptions read_options;
- read_options.fill_cache = false;
- read_options.read_tier = kBlockCacheTier;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- iter->SeekToFirst();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), first_key);
- ASSERT_EQ(iter->value(), first_value);
- ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), second_key);
- ASSERT_EQ(iter->value(), second_value);
- ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
- iter->Next();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), third_key);
- ASSERT_EQ(iter->value(), third_value);
- ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
- iter->Next();
- ASSERT_FALSE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
- }
- {
- ReadOptions read_options;
- read_options.fill_cache = false;
- read_options.read_tier = kBlockCacheTier;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- iter->SeekToLast();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), third_key);
- ASSERT_EQ(iter->value(), third_value);
- ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
- iter->Prev();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), second_key);
- ASSERT_EQ(iter->value(), second_value);
- ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
- iter->Prev();
- ASSERT_TRUE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(iter->key(), first_key);
- ASSERT_EQ(iter->value(), first_value);
- ASSERT_GT(options.blob_cache->GetPinnedUsage(), 0);
- iter->Prev();
- ASSERT_FALSE(iter->Valid());
- ASSERT_OK(iter->status());
- ASSERT_EQ(options.blob_cache->GetPinnedUsage(), 0);
- }
- }
- TEST_F(DBBlobBasicTest, IterateBlobsAllowUnpreparedValue) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- Reopen(options);
- constexpr size_t num_blobs = 5;
- std::vector<std::string> keys;
- std::vector<std::string> blobs;
- for (size_t i = 0; i < num_blobs; ++i) {
- keys.emplace_back("key" + std::to_string(i));
- blobs.emplace_back("blob" + std::to_string(i));
- ASSERT_OK(Put(keys[i], blobs[i]));
- }
- ASSERT_OK(Flush());
- ReadOptions read_options;
- read_options.allow_unprepared_value = true;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- {
- size_t i = 0;
- for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
- ASSERT_EQ(iter->key(), keys[i]);
- ASSERT_TRUE(iter->value().empty());
- ASSERT_OK(iter->status());
- ASSERT_TRUE(iter->PrepareValue());
- ASSERT_EQ(iter->key(), keys[i]);
- ASSERT_EQ(iter->value(), blobs[i]);
- ASSERT_OK(iter->status());
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- }
- {
- size_t i = 0;
- for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
- ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
- ASSERT_TRUE(iter->value().empty());
- ASSERT_OK(iter->status());
- ASSERT_TRUE(iter->PrepareValue());
- ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
- ASSERT_EQ(iter->value(), blobs[num_blobs - 1 - i]);
- ASSERT_OK(iter->status());
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- }
- {
- size_t i = 1;
- for (iter->Seek(keys[i]); iter->Valid(); iter->Next()) {
- ASSERT_EQ(iter->key(), keys[i]);
- ASSERT_TRUE(iter->value().empty());
- ASSERT_OK(iter->status());
- ASSERT_TRUE(iter->PrepareValue());
- ASSERT_EQ(iter->key(), keys[i]);
- ASSERT_EQ(iter->value(), blobs[i]);
- ASSERT_OK(iter->status());
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- }
- {
- size_t i = 1;
- for (iter->SeekForPrev(keys[num_blobs - 1 - i]); iter->Valid();
- iter->Prev()) {
- ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
- ASSERT_TRUE(iter->value().empty());
- ASSERT_OK(iter->status());
- ASSERT_TRUE(iter->PrepareValue());
- ASSERT_EQ(iter->key(), keys[num_blobs - 1 - i]);
- ASSERT_EQ(iter->value(), blobs[num_blobs - 1 - i]);
- ASSERT_OK(iter->status());
- ++i;
- }
- ASSERT_OK(iter->status());
- ASSERT_EQ(i, num_blobs);
- }
- }
- TEST_F(DBBlobBasicTest, MultiGetBlobs) {
- constexpr size_t min_blob_size = 6;
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = min_blob_size;
- Reopen(options);
- // Put then retrieve three key-values. The first value is below the size limit
- // and is thus stored inline; the other two are stored separately as blobs.
- constexpr size_t num_keys = 3;
- constexpr char first_key[] = "first_key";
- constexpr char first_value[] = "short";
- static_assert(sizeof(first_value) - 1 < min_blob_size,
- "first_value too long to be inlined");
- ASSERT_OK(Put(first_key, first_value));
- constexpr char second_key[] = "second_key";
- constexpr char second_value[] = "long_value";
- static_assert(sizeof(second_value) - 1 >= min_blob_size,
- "second_value too short to be stored as blob");
- ASSERT_OK(Put(second_key, second_value));
- constexpr char third_key[] = "third_key";
- constexpr char third_value[] = "other_long_value";
- static_assert(sizeof(third_value) - 1 >= min_blob_size,
- "third_value too short to be stored as blob");
- ASSERT_OK(Put(third_key, third_value));
- ASSERT_OK(Flush());
- ReadOptions read_options;
- std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], second_value);
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], third_value);
- }
- // Try again with no I/O allowed. The table and the necessary blocks should
- // already be in their respective caches. The first (inlined) value should be
- // successfully read; however, the two blob values could only be read from the
- // blob file, so for those the read should return Incomplete.
- read_options.read_tier = kBlockCacheTier;
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_TRUE(statuses[1].IsIncomplete());
- ASSERT_TRUE(statuses[2].IsIncomplete());
- }
- }
- TEST_F(DBBlobBasicTest, MultiGetBlobsFromCache) {
- Options options = GetDefaultOptions();
- LRUCacheOptions co;
- co.capacity = 2 << 20; // 2MB
- co.num_shard_bits = 2;
- co.metadata_charge_policy = kDontChargeCacheMetadata;
- auto backing_cache = NewLRUCache(co);
- constexpr size_t min_blob_size = 6;
- options.min_blob_size = min_blob_size;
- options.create_if_missing = true;
- options.enable_blob_files = true;
- options.blob_cache = backing_cache;
- BlockBasedTableOptions block_based_options;
- block_based_options.no_block_cache = false;
- block_based_options.block_cache = backing_cache;
- block_based_options.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
- DestroyAndReopen(options);
- // Put then retrieve three key-values. The first value is below the size limit
- // and is thus stored inline; the other two are stored separately as blobs.
- constexpr size_t num_keys = 3;
- constexpr char first_key[] = "first_key";
- constexpr char first_value[] = "short";
- static_assert(sizeof(first_value) - 1 < min_blob_size,
- "first_value too long to be inlined");
- ASSERT_OK(Put(first_key, first_value));
- constexpr char second_key[] = "second_key";
- constexpr char second_value[] = "long_value";
- static_assert(sizeof(second_value) - 1 >= min_blob_size,
- "second_value too short to be stored as blob");
- ASSERT_OK(Put(second_key, second_value));
- constexpr char third_key[] = "third_key";
- constexpr char third_value[] = "other_long_value";
- static_assert(sizeof(third_value) - 1 >= min_blob_size,
- "third_value too short to be stored as blob");
- ASSERT_OK(Put(third_key, third_value));
- ASSERT_OK(Flush());
- ReadOptions read_options;
- read_options.fill_cache = false;
- std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], second_value);
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], third_value);
- }
- // Try again with no I/O allowed. The first (inlined) value should be
- // successfully read; however, the two blob values could only be read from the
- // blob file, so for those the read should return Incomplete.
- read_options.read_tier = kBlockCacheTier;
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_TRUE(statuses[1].IsIncomplete());
- ASSERT_TRUE(statuses[2].IsIncomplete());
- }
- // Fill the cache when reading blobs from the blob file.
- read_options.read_tier = kReadAllTier;
- read_options.fill_cache = true;
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], second_value);
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], third_value);
- }
- // Try again with no I/O allowed. All blobs should be successfully read from
- // the cache.
- read_options.read_tier = kBlockCacheTier;
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], second_value);
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], third_value);
- }
- }
- TEST_F(DBBlobBasicTest, MultiGetWithDirectIO) {
- Options options = GetDefaultOptions();
- // First, create an external SST file ["b"].
- const std::string file_path = dbname_ + "/test.sst";
- {
- SstFileWriter sst_file_writer(EnvOptions(), GetDefaultOptions());
- Status s = sst_file_writer.Open(file_path);
- ASSERT_OK(s);
- ASSERT_OK(sst_file_writer.Put("b", "b_value"));
- ASSERT_OK(sst_file_writer.Finish());
- }
- options.enable_blob_files = true;
- options.min_blob_size = 1000;
- options.use_direct_reads = true;
- options.allow_ingest_behind = true;
- // Open DB with fixed-prefix sst-partitioner so that compaction will cut
- // new table file when encountering a new key whose 1-byte prefix changes.
- constexpr size_t key_len = 1;
- options.sst_partitioner_factory =
- NewSstPartitionerFixedPrefixFactory(key_len);
- Status s = TryReopen(options);
- if (s.IsInvalidArgument()) {
- ROCKSDB_GTEST_SKIP("This test requires direct IO support");
- return;
- }
- ASSERT_OK(s);
- constexpr size_t num_keys = 3;
- constexpr size_t blob_size = 3000;
- constexpr char first_key[] = "a";
- const std::string first_blob(blob_size, 'a');
- ASSERT_OK(Put(first_key, first_blob));
- constexpr char second_key[] = "b";
- const std::string second_blob(2 * blob_size, 'b');
- ASSERT_OK(Put(second_key, second_blob));
- constexpr char third_key[] = "d";
- const std::string third_blob(blob_size, 'd');
- ASSERT_OK(Put(third_key, third_blob));
- // first_blob, second_blob and third_blob in the same blob file.
- // SST Blob file
- // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'|
- // | | | ^ ^ ^
- // | | | | | |
- // | | +---------|-------|--------+
- // | +-----------------|-------+
- // +-------------------------+
- ASSERT_OK(Flush());
- constexpr char fourth_key[] = "c";
- const std::string fourth_blob(blob_size, 'c');
- ASSERT_OK(Put(fourth_key, fourth_blob));
- // fourth_blob in another blob file.
- // SST Blob file SST Blob file
- // L0 ["a", "b", "d"] |'aaaa', 'bbbb', 'dddd'| ["c"] |'cccc'|
- // | | | ^ ^ ^ | ^
- // | | | | | | | |
- // | | +---------|-------|--------+ +-------+
- // | +-----------------|-------+
- // +-------------------------+
- ASSERT_OK(Flush());
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
- /*end=*/nullptr));
- // Due to the above sst partitioner, we get 4 L1 files. The blob files are
- // unchanged.
- // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
- // ^ ^ ^ ^
- // | | | |
- // L0 | | | |
- // L1 ["a"] ["b"] ["c"] | | ["d"] |
- // | | | | | |
- // | | +---------|-------|---------------+
- // | +-----------------|-------+
- // +-------------------------+
- ASSERT_EQ(4, NumTableFilesAtLevel(/*level=*/1));
- {
- // Ingest the external SST file into bottommost level.
- std::vector<std::string> ext_files{file_path};
- IngestExternalFileOptions opts;
- opts.ingest_behind = true;
- ASSERT_OK(
- db_->IngestExternalFile(db_->DefaultColumnFamily(), ext_files, opts));
- }
- // Now the database becomes as follows.
- // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
- // ^ ^ ^ ^
- // | | | |
- // L0 | | | |
- // L1 ["a"] ["b"] ["c"] | | ["d"] |
- // | | | | | |
- // | | +---------|-------|---------------+
- // | +-----------------|-------+
- // +-------------------------+
- //
- // L6 ["b"]
- {
- // Compact ["b"] to bottommost level.
- Slice begin = Slice(second_key);
- Slice end = Slice(second_key);
- CompactRangeOptions cro;
- cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
- ASSERT_OK(db_->CompactRange(cro, &begin, &end));
- }
- // |'aaaa', 'bbbb', 'dddd'| |'cccc'|
- // ^ ^ ^ ^
- // | | | |
- // L0 | | | |
- // L1 ["a"] ["c"] | | ["d"] |
- // | | | | |
- // | +---------|-------|---------------+
- // | +-----------------|-------+
- // +-------|-----------------+
- // |
- // L6 ["b"]
- ASSERT_EQ(3, NumTableFilesAtLevel(/*level=*/1));
- ASSERT_EQ(1, NumTableFilesAtLevel(/*level=*/6));
- bool called = false;
- SyncPoint::GetInstance()->ClearAllCallBacks();
- SyncPoint::GetInstance()->SetCallBack(
- "RandomAccessFileReader::MultiRead:AlignedReqs", [&](void* arg) {
- auto* aligned_reqs = static_cast<std::vector<FSReadRequest>*>(arg);
- assert(aligned_reqs);
- ASSERT_EQ(1, aligned_reqs->size());
- called = true;
- });
- SyncPoint::GetInstance()->EnableProcessing();
- std::array<Slice, num_keys> keys{{first_key, third_key, second_key}};
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- // The MultiGet(), when constructing the KeyContexts, will process the keys
- // in such order: a, d, b. The reason is that ["a"] and ["d"] are in L1,
- // while ["b"] resides in L6.
- // Consequently, the original FSReadRequest list prepared by
- // Version::MultiGetblob() will be for "a", "d" and "b". It is unsorted as
- // follows:
- //
- // ["a", offset=30, len=3033],
- // ["d", offset=9096, len=3033],
- // ["b", offset=3063, len=6033]
- //
- // If we do not sort them before calling MultiRead() in DirectIO, then the
- // underlying IO merging logic will yield two requests.
- //
- // [offset=0, len=4096] (for "a")
- // [offset=0, len=12288] (result of merging the request for "d" and "b")
- //
- // We need to sort them in Version::MultiGetBlob() so that the underlying
- // IO merging logic in DirectIO mode works as expected. The correct
- // behavior will be one aligned request:
- //
- // [offset=0, len=12288]
- db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- ASSERT_TRUE(called);
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_blob);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], third_blob);
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], second_blob);
- }
- }
- TEST_F(DBBlobBasicTest, MultiGetBlobsFromMultipleFiles) {
- Options options = GetDefaultOptions();
- LRUCacheOptions co;
- co.capacity = 2 << 20; // 2MB
- co.num_shard_bits = 2;
- co.metadata_charge_policy = kDontChargeCacheMetadata;
- auto backing_cache = NewLRUCache(co);
- options.min_blob_size = 0;
- options.create_if_missing = true;
- options.enable_blob_files = true;
- options.blob_cache = backing_cache;
- BlockBasedTableOptions block_based_options;
- block_based_options.no_block_cache = false;
- block_based_options.block_cache = backing_cache;
- block_based_options.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
- Reopen(options);
- constexpr size_t kNumBlobFiles = 3;
- constexpr size_t kNumBlobsPerFile = 3;
- constexpr size_t kNumKeys = kNumBlobsPerFile * kNumBlobFiles;
- std::vector<std::string> key_strs;
- std::vector<std::string> value_strs;
- for (size_t i = 0; i < kNumBlobFiles; ++i) {
- for (size_t j = 0; j < kNumBlobsPerFile; ++j) {
- std::string key = "key" + std::to_string(i) + "_" + std::to_string(j);
- std::string value =
- "value_as_blob" + std::to_string(i) + "_" + std::to_string(j);
- ASSERT_OK(Put(key, value));
- key_strs.push_back(key);
- value_strs.push_back(value);
- }
- ASSERT_OK(Flush());
- }
- assert(key_strs.size() == kNumKeys);
- std::array<Slice, kNumKeys> keys;
- for (size_t i = 0; i < keys.size(); ++i) {
- keys[i] = key_strs[i];
- }
- ReadOptions read_options;
- read_options.read_tier = kReadAllTier;
- read_options.fill_cache = false;
- {
- std::array<PinnableSlice, kNumKeys> values;
- std::array<Status, kNumKeys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
- keys.data(), values.data(), statuses.data());
- for (size_t i = 0; i < kNumKeys; ++i) {
- ASSERT_OK(statuses[i]);
- ASSERT_EQ(value_strs[i], values[i]);
- }
- }
- read_options.read_tier = kBlockCacheTier;
- {
- std::array<PinnableSlice, kNumKeys> values;
- std::array<Status, kNumKeys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
- keys.data(), values.data(), statuses.data());
- for (size_t i = 0; i < kNumKeys; ++i) {
- ASSERT_TRUE(statuses[i].IsIncomplete());
- ASSERT_TRUE(values[i].empty());
- }
- }
- read_options.read_tier = kReadAllTier;
- read_options.fill_cache = true;
- {
- std::array<PinnableSlice, kNumKeys> values;
- std::array<Status, kNumKeys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
- keys.data(), values.data(), statuses.data());
- for (size_t i = 0; i < kNumKeys; ++i) {
- ASSERT_OK(statuses[i]);
- ASSERT_EQ(value_strs[i], values[i]);
- }
- }
- read_options.read_tier = kBlockCacheTier;
- {
- std::array<PinnableSlice, kNumKeys> values;
- std::array<Status, kNumKeys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), kNumKeys,
- keys.data(), values.data(), statuses.data());
- for (size_t i = 0; i < kNumKeys; ++i) {
- ASSERT_OK(statuses[i]);
- ASSERT_EQ(value_strs[i], values[i]);
- }
- }
- }
- TEST_F(DBBlobBasicTest, GetBlob_CorruptIndex) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob[] = "blob";
- ASSERT_OK(Put(key, blob));
- ASSERT_OK(Flush());
- SyncPoint::GetInstance()->SetCallBack(
- "Version::Get::TamperWithBlobIndex", [](void* arg) {
- Slice* const blob_index = static_cast<Slice*>(arg);
- assert(blob_index);
- assert(!blob_index->empty());
- blob_index->remove_prefix(1);
- });
- SyncPoint::GetInstance()->EnableProcessing();
- PinnableSlice result;
- ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
- .IsCorruption());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_F(DBBlobBasicTest, MultiGetBlob_CorruptIndex) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- options.create_if_missing = true;
- DestroyAndReopen(options);
- constexpr size_t kNumOfKeys = 3;
- std::array<std::string, kNumOfKeys> key_strs;
- std::array<std::string, kNumOfKeys> value_strs;
- std::array<Slice, kNumOfKeys + 1> keys;
- for (size_t i = 0; i < kNumOfKeys; ++i) {
- key_strs[i] = "foo" + std::to_string(i);
- value_strs[i] = "blob_value" + std::to_string(i);
- ASSERT_OK(Put(key_strs[i], value_strs[i]));
- keys[i] = key_strs[i];
- }
- constexpr char key[] = "key";
- constexpr char blob[] = "blob";
- ASSERT_OK(Put(key, blob));
- keys[kNumOfKeys] = key;
- ASSERT_OK(Flush());
- SyncPoint::GetInstance()->SetCallBack(
- "Version::MultiGet::TamperWithBlobIndex", [&key](void* arg) {
- KeyContext* const key_context = static_cast<KeyContext*>(arg);
- assert(key_context);
- assert(key_context->key);
- if (*(key_context->key) == key) {
- Slice* const blob_index = key_context->value;
- assert(blob_index);
- assert(!blob_index->empty());
- blob_index->remove_prefix(1);
- }
- });
- SyncPoint::GetInstance()->EnableProcessing();
- std::array<PinnableSlice, kNumOfKeys + 1> values;
- std::array<Status, kNumOfKeys + 1> statuses;
- db_->MultiGet(ReadOptions(), dbfull()->DefaultColumnFamily(), kNumOfKeys + 1,
- keys.data(), values.data(), statuses.data(),
- /*sorted_input=*/false);
- for (size_t i = 0; i < kNumOfKeys + 1; ++i) {
- if (i != kNumOfKeys) {
- ASSERT_OK(statuses[i]);
- ASSERT_EQ("blob_value" + std::to_string(i), values[i]);
- } else {
- ASSERT_TRUE(statuses[i].IsCorruption());
- }
- }
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_F(DBBlobBasicTest, MultiGetBlob_ExceedSoftLimit) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr size_t kNumOfKeys = 3;
- std::array<std::string, kNumOfKeys> key_bufs;
- std::array<std::string, kNumOfKeys> value_bufs;
- std::array<Slice, kNumOfKeys> keys;
- for (size_t i = 0; i < kNumOfKeys; ++i) {
- key_bufs[i] = "foo" + std::to_string(i);
- value_bufs[i] = "blob_value" + std::to_string(i);
- ASSERT_OK(Put(key_bufs[i], value_bufs[i]));
- keys[i] = key_bufs[i];
- }
- ASSERT_OK(Flush());
- std::array<PinnableSlice, kNumOfKeys> values;
- std::array<Status, kNumOfKeys> statuses;
- ReadOptions read_opts;
- read_opts.value_size_soft_limit = 1;
- db_->MultiGet(read_opts, dbfull()->DefaultColumnFamily(), kNumOfKeys,
- keys.data(), values.data(), statuses.data(),
- /*sorted_input=*/true);
- for (const auto& s : statuses) {
- ASSERT_TRUE(s.IsAborted());
- }
- }
- TEST_F(DBBlobBasicTest, GetBlob_InlinedTTLIndex) {
- constexpr uint64_t min_blob_size = 10;
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = min_blob_size;
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob[] = "short";
- static_assert(sizeof(short) - 1 < min_blob_size,
- "Blob too long to be inlined");
- // Fake an inlined TTL blob index.
- std::string blob_index;
- constexpr uint64_t expiration = 1234567890;
- BlobIndex::EncodeInlinedTTL(&blob_index, expiration, blob);
- WriteBatch batch;
- ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
- ASSERT_OK(db_->Write(WriteOptions(), &batch));
- ASSERT_OK(Flush());
- PinnableSlice result;
- ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
- .IsCorruption());
- }
- TEST_F(DBBlobBasicTest, GetBlob_IndexWithInvalidFileNumber) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key[] = "key";
- // Fake a blob index referencing a non-existent blob file.
- std::string blob_index;
- constexpr uint64_t blob_file_number = 1000;
- constexpr uint64_t offset = 1234;
- constexpr uint64_t size = 5678;
- BlobIndex::EncodeBlob(&blob_index, blob_file_number, offset, size,
- kNoCompression);
- WriteBatch batch;
- ASSERT_OK(WriteBatchInternal::PutBlobIndex(&batch, 0, key, blob_index));
- ASSERT_OK(db_->Write(WriteOptions(), &batch));
- ASSERT_OK(Flush());
- PinnableSlice result;
- ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
- .IsCorruption());
- }
- TEST_F(DBBlobBasicTest, GenerateIOTracing) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- std::string trace_file = dbname_ + "/io_trace_file";
- Reopen(options);
- {
- // Create IO trace file
- std::unique_ptr<TraceWriter> trace_writer;
- ASSERT_OK(
- NewFileTraceWriter(env_, EnvOptions(), trace_file, &trace_writer));
- ASSERT_OK(db_->StartIOTrace(TraceOptions(), std::move(trace_writer)));
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Flush());
- ASSERT_EQ(Get(key), blob_value);
- ASSERT_OK(db_->EndIOTrace());
- ASSERT_OK(env_->FileExists(trace_file));
- }
- {
- // Parse trace file to check file operations related to blob files are
- // recorded.
- std::unique_ptr<TraceReader> trace_reader;
- ASSERT_OK(
- NewFileTraceReader(env_, EnvOptions(), trace_file, &trace_reader));
- IOTraceReader reader(std::move(trace_reader));
- IOTraceHeader header;
- ASSERT_OK(reader.ReadHeader(&header));
- ASSERT_EQ(kMajorVersion, static_cast<int>(header.rocksdb_major_version));
- ASSERT_EQ(kMinorVersion, static_cast<int>(header.rocksdb_minor_version));
- // Read records.
- int blob_files_op_count = 0;
- Status status;
- while (true) {
- IOTraceRecord record;
- status = reader.ReadIOOp(&record);
- if (!status.ok()) {
- break;
- }
- if (record.file_name.find("blob") != std::string::npos) {
- blob_files_op_count++;
- }
- }
- // Assuming blob files will have Append, Close and then Read operations.
- ASSERT_GT(blob_files_op_count, 2);
- }
- }
- TEST_F(DBBlobBasicTest, BestEffortsRecovery_MissingNewestBlobFile) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- options.create_if_missing = true;
- Reopen(options);
- ASSERT_OK(dbfull()->DisableFileDeletions());
- constexpr int kNumTableFiles = 2;
- for (int i = 0; i < kNumTableFiles; ++i) {
- for (char ch = 'a'; ch != 'c'; ++ch) {
- std::string key(1, ch);
- ASSERT_OK(Put(key, "value" + std::to_string(i)));
- }
- ASSERT_OK(Flush());
- }
- Close();
- std::vector<std::string> files;
- ASSERT_OK(env_->GetChildren(dbname_, &files));
- std::string blob_file_path;
- uint64_t max_blob_file_num = kInvalidBlobFileNumber;
- for (const auto& fname : files) {
- uint64_t file_num = 0;
- FileType type;
- if (ParseFileName(fname, &file_num, /*info_log_name_prefix=*/"", &type) &&
- type == kBlobFile) {
- if (file_num > max_blob_file_num) {
- max_blob_file_num = file_num;
- blob_file_path = dbname_ + "/" + fname;
- }
- }
- }
- ASSERT_OK(env_->DeleteFile(blob_file_path));
- options.best_efforts_recovery = true;
- Reopen(options);
- std::string value;
- ASSERT_OK(db_->Get(ReadOptions(), "a", &value));
- ASSERT_EQ("value" + std::to_string(kNumTableFiles - 2), value);
- }
- TEST_F(DBBlobBasicTest, GetMergeBlobWithPut) {
- Options options = GetDefaultOptions();
- options.merge_operator = MergeOperators::CreateStringAppendOperator();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- ASSERT_OK(Put("Key1", "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Merge("Key1", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Merge("Key1", "v3"));
- ASSERT_OK(Flush());
- std::string value;
- ASSERT_OK(db_->Get(ReadOptions(), "Key1", &value));
- ASSERT_EQ(Get("Key1"), "v1,v2,v3");
- }
- TEST_F(DBBlobBasicTest, GetMergeBlobFromMemoryTier) {
- Options options = GetDefaultOptions();
- options.merge_operator = MergeOperators::CreateStringAppendOperator();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- ASSERT_OK(Put(Key(0), "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Merge(Key(0), "v2"));
- ASSERT_OK(Flush());
- // Regular `Get()` loads data block to cache.
- std::string value;
- ASSERT_OK(db_->Get(ReadOptions(), Key(0), &value));
- ASSERT_EQ("v1,v2", value);
- // Base value blob is still uncached, so an in-memory read will fail.
- ReadOptions read_options;
- read_options.read_tier = kBlockCacheTier;
- ASSERT_TRUE(db_->Get(read_options, Key(0), &value).IsIncomplete());
- }
- TEST_F(DBBlobBasicTest, MultiGetMergeBlobWithPut) {
- constexpr size_t num_keys = 3;
- Options options = GetDefaultOptions();
- options.merge_operator = MergeOperators::CreateStringAppendOperator();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- ASSERT_OK(Put("Key0", "v0_0"));
- ASSERT_OK(Put("Key1", "v1_0"));
- ASSERT_OK(Put("Key2", "v2_0"));
- ASSERT_OK(Flush());
- ASSERT_OK(Merge("Key0", "v0_1"));
- ASSERT_OK(Merge("Key1", "v1_1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Merge("Key0", "v0_2"));
- ASSERT_OK(Flush());
- std::array<Slice, num_keys> keys{{"Key0", "Key1", "Key2"}};
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], "v0_0,v0_1,v0_2");
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], "v1_0,v1_1");
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], "v2_0");
- }
- TEST_F(DBBlobBasicTest, Properties) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key1[] = "key1";
- constexpr size_t key1_size = sizeof(key1) - 1;
- constexpr char key2[] = "key2";
- constexpr size_t key2_size = sizeof(key2) - 1;
- constexpr char key3[] = "key3";
- constexpr size_t key3_size = sizeof(key3) - 1;
- constexpr char blob[] = "00000000000000";
- constexpr size_t blob_size = sizeof(blob) - 1;
- constexpr char longer_blob[] = "00000000000000000000";
- constexpr size_t longer_blob_size = sizeof(longer_blob) - 1;
- ASSERT_OK(Put(key1, blob));
- ASSERT_OK(Put(key2, longer_blob));
- ASSERT_OK(Flush());
- constexpr size_t first_blob_file_expected_size =
- BlobLogHeader::kSize +
- BlobLogRecord::CalculateAdjustmentForRecordHeader(key1_size) + blob_size +
- BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) +
- longer_blob_size + BlobLogFooter::kSize;
- ASSERT_OK(Put(key3, blob));
- ASSERT_OK(Flush());
- constexpr size_t second_blob_file_expected_size =
- BlobLogHeader::kSize +
- BlobLogRecord::CalculateAdjustmentForRecordHeader(key3_size) + blob_size +
- BlobLogFooter::kSize;
- constexpr size_t total_expected_size =
- first_blob_file_expected_size + second_blob_file_expected_size;
- // Number of blob files
- uint64_t num_blob_files = 0;
- ASSERT_TRUE(
- db_->GetIntProperty(DB::Properties::kNumBlobFiles, &num_blob_files));
- ASSERT_EQ(num_blob_files, 2);
- // Total size of live blob files
- uint64_t live_blob_file_size = 0;
- ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileSize,
- &live_blob_file_size));
- ASSERT_EQ(live_blob_file_size, total_expected_size);
- // Total amount of garbage in live blob files
- {
- uint64_t live_blob_file_garbage_size = 0;
- ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
- &live_blob_file_garbage_size));
- ASSERT_EQ(live_blob_file_garbage_size, 0);
- }
- // Total size of all blob files across all versions
- // Note: this should be the same as above since we only have one
- // version at this point.
- uint64_t total_blob_file_size = 0;
- ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize,
- &total_blob_file_size));
- ASSERT_EQ(total_blob_file_size, total_expected_size);
- // Delete key2 to create some garbage
- ASSERT_OK(Delete(key2));
- ASSERT_OK(Flush());
- constexpr Slice* begin = nullptr;
- constexpr Slice* end = nullptr;
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
- constexpr size_t expected_garbage_size =
- BlobLogRecord::CalculateAdjustmentForRecordHeader(key2_size) +
- longer_blob_size;
- constexpr double expected_space_amp =
- static_cast<double>(total_expected_size) /
- (total_expected_size - expected_garbage_size);
- // Blob file stats
- std::string blob_stats;
- ASSERT_TRUE(db_->GetProperty(DB::Properties::kBlobStats, &blob_stats));
- std::ostringstream oss;
- oss << "Number of blob files: 2\nTotal size of blob files: "
- << total_expected_size
- << "\nTotal size of garbage in blob files: " << expected_garbage_size
- << "\nBlob file space amplification: " << expected_space_amp << '\n';
- ASSERT_EQ(blob_stats, oss.str());
- // Total amount of garbage in live blob files
- {
- uint64_t live_blob_file_garbage_size = 0;
- ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kLiveBlobFileGarbageSize,
- &live_blob_file_garbage_size));
- ASSERT_EQ(live_blob_file_garbage_size, expected_garbage_size);
- }
- }
- TEST_F(DBBlobBasicTest, PropertiesMultiVersion) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key1[] = "key1";
- constexpr char key2[] = "key2";
- constexpr char key3[] = "key3";
- constexpr size_t key_size = sizeof(key1) - 1;
- static_assert(sizeof(key2) - 1 == key_size, "unexpected size: key2");
- static_assert(sizeof(key3) - 1 == key_size, "unexpected size: key3");
- constexpr char blob[] = "0000000000";
- constexpr size_t blob_size = sizeof(blob) - 1;
- ASSERT_OK(Put(key1, blob));
- ASSERT_OK(Flush());
- ASSERT_OK(Put(key2, blob));
- ASSERT_OK(Flush());
- // Create an iterator to keep the current version alive
- std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));
- ASSERT_OK(iter->status());
- // Note: the Delete and subsequent compaction results in the first blob file
- // not making it to the final version. (It is still part of the previous
- // version kept alive by the iterator though.) On the other hand, the Put
- // results in a third blob file.
- ASSERT_OK(Delete(key1));
- ASSERT_OK(Put(key3, blob));
- ASSERT_OK(Flush());
- constexpr Slice* begin = nullptr;
- constexpr Slice* end = nullptr;
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), begin, end));
- // Total size of all blob files across all versions: between the two versions,
- // we should have three blob files of the same size with one blob each.
- // The version kept alive by the iterator contains the first and the second
- // blob file, while the final version contains the second and the third blob
- // file. (The second blob file is thus shared by the two versions but should
- // be counted only once.)
- uint64_t total_blob_file_size = 0;
- ASSERT_TRUE(db_->GetIntProperty(DB::Properties::kTotalBlobFileSize,
- &total_blob_file_size));
- ASSERT_EQ(total_blob_file_size,
- 3 * (BlobLogHeader::kSize +
- BlobLogRecord::CalculateAdjustmentForRecordHeader(key_size) +
- blob_size + BlobLogFooter::kSize));
- }
- class DBBlobBasicIOErrorTest : public DBBlobBasicTest,
- public testing::WithParamInterface<std::string> {
- protected:
- DBBlobBasicIOErrorTest() : sync_point_(GetParam()) {
- fault_injection_env_.reset(new FaultInjectionTestEnv(env_));
- }
- ~DBBlobBasicIOErrorTest() { Close(); }
- std::unique_ptr<FaultInjectionTestEnv> fault_injection_env_;
- std::string sync_point_;
- };
- class DBBlobBasicIOErrorMultiGetTest : public DBBlobBasicIOErrorTest {
- public:
- DBBlobBasicIOErrorMultiGetTest() : DBBlobBasicIOErrorTest() {}
- };
- INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorTest,
- ::testing::ValuesIn(std::vector<std::string>{
- "BlobFileReader::OpenFile:NewRandomAccessFile",
- "BlobFileReader::GetBlob:ReadFromFile"}));
- INSTANTIATE_TEST_CASE_P(DBBlobBasicTest, DBBlobBasicIOErrorMultiGetTest,
- ::testing::ValuesIn(std::vector<std::string>{
- "BlobFileReader::OpenFile:NewRandomAccessFile",
- "BlobFileReader::MultiGetBlob:ReadFromFile"}));
- TEST_P(DBBlobBasicIOErrorTest, GetBlob_IOError) {
- Options options;
- options.env = fault_injection_env_.get();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Flush());
- SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
- fault_injection_env_->SetFilesystemActive(false,
- Status::IOError(sync_point_));
- });
- SyncPoint::GetInstance()->EnableProcessing();
- PinnableSlice result;
- ASSERT_TRUE(db_->Get(ReadOptions(), db_->DefaultColumnFamily(), key, &result)
- .IsIOError());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_P(DBBlobBasicIOErrorMultiGetTest, MultiGetBlobs_IOError) {
- Options options = GetDefaultOptions();
- options.env = fault_injection_env_.get();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr size_t num_keys = 2;
- constexpr char first_key[] = "first_key";
- constexpr char first_value[] = "first_value";
- ASSERT_OK(Put(first_key, first_value));
- constexpr char second_key[] = "second_key";
- constexpr char second_value[] = "second_value";
- ASSERT_OK(Put(second_key, second_value));
- ASSERT_OK(Flush());
- std::array<Slice, num_keys> keys{{first_key, second_key}};
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
- fault_injection_env_->SetFilesystemActive(false,
- Status::IOError(sync_point_));
- });
- SyncPoint::GetInstance()->EnableProcessing();
- db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- ASSERT_TRUE(statuses[0].IsIOError());
- ASSERT_TRUE(statuses[1].IsIOError());
- }
- TEST_P(DBBlobBasicIOErrorMultiGetTest, MultipleBlobFiles) {
- Options options = GetDefaultOptions();
- options.env = fault_injection_env_.get();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr size_t num_keys = 2;
- constexpr char key1[] = "key1";
- constexpr char value1[] = "blob1";
- ASSERT_OK(Put(key1, value1));
- ASSERT_OK(Flush());
- constexpr char key2[] = "key2";
- constexpr char value2[] = "blob2";
- ASSERT_OK(Put(key2, value2));
- ASSERT_OK(Flush());
- std::array<Slice, num_keys> keys{{key1, key2}};
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- bool first_blob_file = true;
- SyncPoint::GetInstance()->SetCallBack(
- sync_point_, [&first_blob_file, this](void* /* arg */) {
- if (first_blob_file) {
- first_blob_file = false;
- return;
- }
- fault_injection_env_->SetFilesystemActive(false,
- Status::IOError(sync_point_));
- });
- SyncPoint::GetInstance()->EnableProcessing();
- db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(value1, values[0]);
- ASSERT_TRUE(statuses[1].IsIOError());
- }
- TEST_F(DBBlobBasicTest, MultiGetFindTable_IOError) {
- // Repro test for a specific bug where `MultiGet()` would fail to open a table
- // in `FindTable()` and then proceed to return raw blob handles for the other
- // keys.
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- // Force no table cache so every read will preload the SST file.
- dbfull()->TEST_table_cache()->SetCapacity(0);
- constexpr size_t num_keys = 2;
- constexpr char key1[] = "key1";
- constexpr char value1[] = "blob1";
- ASSERT_OK(Put(key1, value1));
- ASSERT_OK(Flush());
- constexpr char key2[] = "key2";
- constexpr char value2[] = "blob2";
- ASSERT_OK(Put(key2, value2));
- ASSERT_OK(Flush());
- std::atomic<int> num_files_opened = 0;
- // This test would be more realistic if we injected an `IOError` from the
- // `FileSystem`
- SyncPoint::GetInstance()->SetCallBack(
- "TableCache::MultiGet:FindTable", [&](void* status) {
- num_files_opened++;
- if (num_files_opened == 2) {
- Status* s = static_cast<Status*>(status);
- *s = Status::IOError();
- }
- });
- SyncPoint::GetInstance()->EnableProcessing();
- std::array<Slice, num_keys> keys{{key1, key2}};
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_TRUE(statuses[0].IsIOError());
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(value2, values[1]);
- }
- namespace {
- class ReadBlobCompactionFilter : public CompactionFilter {
- public:
- ReadBlobCompactionFilter() = default;
- const char* Name() const override {
- return "rocksdb.compaction.filter.read.blob";
- }
- CompactionFilter::Decision FilterV2(
- int /*level*/, const Slice& /*key*/, ValueType value_type,
- const Slice& existing_value, std::string* new_value,
- std::string* /*skip_until*/) const override {
- if (value_type != CompactionFilter::ValueType::kValue) {
- return CompactionFilter::Decision::kKeep;
- }
- assert(new_value);
- new_value->assign(existing_value.data(), existing_value.size());
- return CompactionFilter::Decision::kChangeValue;
- }
- };
- } // anonymous namespace
- TEST_P(DBBlobBasicIOErrorTest, CompactionFilterReadBlob_IOError) {
- Options options = GetDefaultOptions();
- options.env = fault_injection_env_.get();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- options.create_if_missing = true;
- std::unique_ptr<CompactionFilter> compaction_filter_guard(
- new ReadBlobCompactionFilter);
- options.compaction_filter = compaction_filter_guard.get();
- DestroyAndReopen(options);
- constexpr char key[] = "foo";
- constexpr char blob_value[] = "foo_blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Flush());
- SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
- fault_injection_env_->SetFilesystemActive(false,
- Status::IOError(sync_point_));
- });
- SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_TRUE(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
- /*end=*/nullptr)
- .IsIOError());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_P(DBBlobBasicIOErrorTest, IterateBlobsAllowUnpreparedValue_IOError) {
- Options options;
- options.env = fault_injection_env_.get();
- options.enable_blob_files = true;
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Flush());
- SyncPoint::GetInstance()->SetCallBack(sync_point_, [this](void* /* arg */) {
- fault_injection_env_->SetFilesystemActive(false,
- Status::IOError(sync_point_));
- });
- SyncPoint::GetInstance()->EnableProcessing();
- ReadOptions read_options;
- read_options.allow_unprepared_value = true;
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- iter->SeekToFirst();
- ASSERT_TRUE(iter->Valid());
- ASSERT_EQ(iter->key(), key);
- ASSERT_TRUE(iter->value().empty());
- ASSERT_OK(iter->status());
- ASSERT_FALSE(iter->PrepareValue());
- ASSERT_FALSE(iter->Valid());
- ASSERT_TRUE(iter->status().IsIOError());
- SyncPoint::GetInstance()->DisableProcessing();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- }
- TEST_F(DBBlobBasicTest, WarmCacheWithBlobsDuringFlush) {
- Options options = GetDefaultOptions();
- LRUCacheOptions co;
- co.capacity = 1 << 25;
- co.num_shard_bits = 2;
- co.metadata_charge_policy = kDontChargeCacheMetadata;
- auto backing_cache = NewLRUCache(co);
- options.blob_cache = backing_cache;
- BlockBasedTableOptions block_based_options;
- block_based_options.no_block_cache = false;
- block_based_options.block_cache = backing_cache;
- block_based_options.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
- options.enable_blob_files = true;
- options.create_if_missing = true;
- options.disable_auto_compactions = true;
- options.enable_blob_garbage_collection = true;
- options.blob_garbage_collection_age_cutoff = 1.0;
- options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- DestroyAndReopen(options);
- constexpr size_t kNumBlobs = 10;
- constexpr size_t kValueSize = 100;
- std::string value(kValueSize, 'a');
- for (size_t i = 1; i <= kNumBlobs; i++) {
- ASSERT_OK(Put(std::to_string(i), value));
- ASSERT_OK(Put(std::to_string(i + kNumBlobs), value)); // Add some overlap
- ASSERT_OK(Flush());
- ASSERT_EQ(i * 2, options.statistics->getTickerCount(BLOB_DB_CACHE_ADD));
- ASSERT_EQ(value, Get(std::to_string(i)));
- ASSERT_EQ(value, Get(std::to_string(i + kNumBlobs)));
- ASSERT_EQ(0, options.statistics->getTickerCount(BLOB_DB_CACHE_MISS));
- ASSERT_EQ(i * 2, options.statistics->getTickerCount(BLOB_DB_CACHE_HIT));
- }
- // Verify compaction not counted
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
- /*end=*/nullptr));
- EXPECT_EQ(kNumBlobs * 2,
- options.statistics->getTickerCount(BLOB_DB_CACHE_ADD));
- }
- TEST_F(DBBlobBasicTest, DynamicallyWarmCacheDuringFlush) {
- Options options = GetDefaultOptions();
- LRUCacheOptions co;
- co.capacity = 1 << 25;
- co.num_shard_bits = 2;
- co.metadata_charge_policy = kDontChargeCacheMetadata;
- auto backing_cache = NewLRUCache(co);
- options.blob_cache = backing_cache;
- BlockBasedTableOptions block_based_options;
- block_based_options.no_block_cache = false;
- block_based_options.block_cache = backing_cache;
- block_based_options.cache_index_and_filter_blocks = true;
- options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
- options.enable_blob_files = true;
- options.create_if_missing = true;
- options.disable_auto_compactions = true;
- options.enable_blob_garbage_collection = true;
- options.blob_garbage_collection_age_cutoff = 1.0;
- options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
- options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
- DestroyAndReopen(options);
- constexpr size_t kNumBlobs = 10;
- constexpr size_t kValueSize = 100;
- std::string value(kValueSize, 'a');
- for (size_t i = 1; i <= 5; i++) {
- ASSERT_OK(Put(std::to_string(i), value));
- ASSERT_OK(Put(std::to_string(i + kNumBlobs), value)); // Add some overlap
- ASSERT_OK(Flush());
- ASSERT_EQ(2, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
- ASSERT_EQ(value, Get(std::to_string(i)));
- ASSERT_EQ(value, Get(std::to_string(i + kNumBlobs)));
- ASSERT_EQ(0, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
- ASSERT_EQ(0,
- options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS));
- ASSERT_EQ(2, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT));
- }
- ASSERT_OK(dbfull()->SetOptions({{"prepopulate_blob_cache", "kDisable"}}));
- for (size_t i = 6; i <= kNumBlobs; i++) {
- ASSERT_OK(Put(std::to_string(i), value));
- ASSERT_OK(Put(std::to_string(i + kNumBlobs), value)); // Add some overlap
- ASSERT_OK(Flush());
- ASSERT_EQ(0, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
- ASSERT_EQ(value, Get(std::to_string(i)));
- ASSERT_EQ(value, Get(std::to_string(i + kNumBlobs)));
- ASSERT_EQ(2, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD));
- ASSERT_EQ(2,
- options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS));
- ASSERT_EQ(0, options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT));
- }
- // Verify compaction not counted
- ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
- /*end=*/nullptr));
- EXPECT_EQ(0, options.statistics->getTickerCount(BLOB_DB_CACHE_ADD));
- }
- TEST_F(DBBlobBasicTest, WarmCacheWithBlobsSecondary) {
- CompressedSecondaryCacheOptions secondary_cache_opts;
- secondary_cache_opts.capacity = 1 << 20;
- secondary_cache_opts.num_shard_bits = 0;
- secondary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
- secondary_cache_opts.compression_type = kNoCompression;
- LRUCacheOptions primary_cache_opts;
- primary_cache_opts.capacity = 1024;
- primary_cache_opts.num_shard_bits = 0;
- primary_cache_opts.metadata_charge_policy = kDontChargeCacheMetadata;
- primary_cache_opts.secondary_cache =
- NewCompressedSecondaryCache(secondary_cache_opts);
- Options options = GetDefaultOptions();
- options.create_if_missing = true;
- options.statistics = CreateDBStatistics();
- options.enable_blob_files = true;
- options.blob_cache = NewLRUCache(primary_cache_opts);
- options.prepopulate_blob_cache = PrepopulateBlobCache::kFlushOnly;
- DestroyAndReopen(options);
- // Note: only one of the two blobs fit in the primary cache at any given time.
- constexpr char first_key[] = "foo";
- constexpr size_t first_blob_size = 512;
- const std::string first_blob(first_blob_size, 'a');
- constexpr char second_key[] = "bar";
- constexpr size_t second_blob_size = 768;
- const std::string second_blob(second_blob_size, 'b');
- // First blob is inserted into primary cache during flush.
- ASSERT_OK(Put(first_key, first_blob));
- ASSERT_OK(Flush());
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 1);
- // Second blob is inserted into primary cache during flush,
- // First blob is evicted but only a dummy handle is inserted into secondary
- // cache.
- ASSERT_OK(Put(second_key, second_blob));
- ASSERT_OK(Flush());
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_ADD), 1);
- // First blob is inserted into primary cache.
- // Second blob is evicted but only a dummy handle is inserted into secondary
- // cache.
- ASSERT_EQ(Get(first_key), first_blob);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 1);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 0);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
- 0);
- // Second blob is inserted into primary cache,
- // First blob is evicted and is inserted into secondary cache.
- ASSERT_EQ(Get(second_key), second_blob);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 1);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 0);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
- 0);
- // First blob's dummy item is inserted into primary cache b/c of lookup.
- // Second blob is still in primary cache.
- ASSERT_EQ(Get(first_key), first_blob);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 0);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 1);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
- 1);
- // First blob's item is inserted into primary cache b/c of lookup.
- // Second blob is evicted and inserted into secondary cache.
- ASSERT_EQ(Get(first_key), first_blob);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_MISS), 0);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(BLOB_DB_CACHE_HIT), 1);
- ASSERT_EQ(options.statistics->getAndResetTickerCount(SECONDARY_CACHE_HITS),
- 1);
- }
- TEST_F(DBBlobBasicTest, GetEntityBlob) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- Reopen(options);
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- constexpr char other_key[] = "other_key";
- constexpr char other_blob_value[] = "other_blob_value";
- ASSERT_OK(Put(key, blob_value));
- ASSERT_OK(Put(other_key, other_blob_value));
- ASSERT_OK(Flush());
- WideColumns expected_columns{{kDefaultWideColumnName, blob_value}};
- WideColumns other_expected_columns{
- {kDefaultWideColumnName, other_blob_value}};
- {
- PinnableWideColumns result;
- ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(), key,
- &result));
- ASSERT_EQ(result.columns(), expected_columns);
- }
- {
- PinnableWideColumns result;
- ASSERT_OK(db_->GetEntity(ReadOptions(), db_->DefaultColumnFamily(),
- other_key, &result));
- ASSERT_EQ(result.columns(), other_expected_columns);
- }
- {
- constexpr size_t num_keys = 2;
- std::array<Slice, num_keys> keys{{key, other_key}};
- std::array<PinnableWideColumns, num_keys> results;
- std::array<Status, num_keys> statuses;
- db_->MultiGetEntity(ReadOptions(), db_->DefaultColumnFamily(), num_keys,
- keys.data(), results.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(results[0].columns(), expected_columns);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(results[1].columns(), other_expected_columns);
- }
- }
- class DBBlobWithTimestampTest : public DBBasicTestWithTimestampBase {
- protected:
- DBBlobWithTimestampTest()
- : DBBasicTestWithTimestampBase("db_blob_with_timestamp_test") {}
- };
- TEST_F(DBBlobWithTimestampTest, GetBlob) {
- Options options = GetDefaultOptions();
- options.create_if_missing = true;
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- const size_t kTimestampSize = Timestamp(0, 0).size();
- TestComparator test_cmp(kTimestampSize);
- options.comparator = &test_cmp;
- DestroyAndReopen(options);
- WriteOptions write_opts;
- const std::string ts = Timestamp(1, 0);
- constexpr char key[] = "key";
- constexpr char blob_value[] = "blob_value";
- ASSERT_OK(db_->Put(write_opts, key, ts, blob_value));
- ASSERT_OK(Flush());
- const std::string read_ts = Timestamp(2, 0);
- Slice read_ts_slice(read_ts);
- ReadOptions read_opts;
- read_opts.timestamp = &read_ts_slice;
- std::string value;
- ASSERT_OK(db_->Get(read_opts, key, &value));
- ASSERT_EQ(value, blob_value);
- }
- TEST_F(DBBlobWithTimestampTest, MultiGetBlobs) {
- constexpr size_t min_blob_size = 6;
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.min_blob_size = min_blob_size;
- options.create_if_missing = true;
- const size_t kTimestampSize = Timestamp(0, 0).size();
- TestComparator test_cmp(kTimestampSize);
- options.comparator = &test_cmp;
- DestroyAndReopen(options);
- // Put then retrieve three key-values. The first value is below the size limit
- // and is thus stored inline; the other two are stored separately as blobs.
- constexpr size_t num_keys = 3;
- constexpr char first_key[] = "first_key";
- constexpr char first_value[] = "short";
- static_assert(sizeof(first_value) - 1 < min_blob_size,
- "first_value too long to be inlined");
- DestroyAndReopen(options);
- WriteOptions write_opts;
- const std::string ts = Timestamp(1, 0);
- ASSERT_OK(db_->Put(write_opts, first_key, ts, first_value));
- constexpr char second_key[] = "second_key";
- constexpr char second_value[] = "long_value";
- static_assert(sizeof(second_value) - 1 >= min_blob_size,
- "second_value too short to be stored as blob");
- ASSERT_OK(db_->Put(write_opts, second_key, ts, second_value));
- constexpr char third_key[] = "third_key";
- constexpr char third_value[] = "other_long_value";
- static_assert(sizeof(third_value) - 1 >= min_blob_size,
- "third_value too short to be stored as blob");
- ASSERT_OK(db_->Put(write_opts, third_key, ts, third_value));
- ASSERT_OK(Flush());
- ReadOptions read_options;
- const std::string read_ts = Timestamp(2, 0);
- Slice read_ts_slice(read_ts);
- read_options.timestamp = &read_ts_slice;
- std::array<Slice, num_keys> keys{{first_key, second_key, third_key}};
- {
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_options, db_->DefaultColumnFamily(), num_keys,
- keys.data(), values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], first_value);
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], second_value);
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], third_value);
- }
- }
- TEST_F(DBBlobWithTimestampTest, GetMergeBlobWithPut) {
- Options options = GetDefaultOptions();
- options.merge_operator = MergeOperators::CreateStringAppendOperator();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- options.create_if_missing = true;
- const size_t kTimestampSize = Timestamp(0, 0).size();
- TestComparator test_cmp(kTimestampSize);
- options.comparator = &test_cmp;
- DestroyAndReopen(options);
- WriteOptions write_opts;
- const std::string ts = Timestamp(1, 0);
- ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(
- db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(
- db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v3"));
- ASSERT_OK(Flush());
- std::string value;
- const std::string read_ts = Timestamp(2, 0);
- Slice read_ts_slice(read_ts);
- ReadOptions read_opts;
- read_opts.timestamp = &read_ts_slice;
- ASSERT_OK(db_->Get(read_opts, "Key1", &value));
- ASSERT_EQ(value, "v1,v2,v3");
- }
- TEST_F(DBBlobWithTimestampTest, MultiGetMergeBlobWithPut) {
- constexpr size_t num_keys = 3;
- Options options = GetDefaultOptions();
- options.merge_operator = MergeOperators::CreateStringAppendOperator();
- options.enable_blob_files = true;
- options.min_blob_size = 0;
- options.create_if_missing = true;
- const size_t kTimestampSize = Timestamp(0, 0).size();
- TestComparator test_cmp(kTimestampSize);
- options.comparator = &test_cmp;
- DestroyAndReopen(options);
- WriteOptions write_opts;
- const std::string ts = Timestamp(1, 0);
- ASSERT_OK(db_->Put(write_opts, "Key0", ts, "v0_0"));
- ASSERT_OK(db_->Put(write_opts, "Key1", ts, "v1_0"));
- ASSERT_OK(db_->Put(write_opts, "Key2", ts, "v2_0"));
- ASSERT_OK(Flush());
- ASSERT_OK(
- db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_1"));
- ASSERT_OK(
- db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key1", ts, "v1_1"));
- ASSERT_OK(Flush());
- ASSERT_OK(
- db_->Merge(write_opts, db_->DefaultColumnFamily(), "Key0", ts, "v0_2"));
- ASSERT_OK(Flush());
- const std::string read_ts = Timestamp(2, 0);
- Slice read_ts_slice(read_ts);
- ReadOptions read_opts;
- read_opts.timestamp = &read_ts_slice;
- std::array<Slice, num_keys> keys{{"Key0", "Key1", "Key2"}};
- std::array<PinnableSlice, num_keys> values;
- std::array<Status, num_keys> statuses;
- db_->MultiGet(read_opts, db_->DefaultColumnFamily(), num_keys, keys.data(),
- values.data(), statuses.data());
- ASSERT_OK(statuses[0]);
- ASSERT_EQ(values[0], "v0_0,v0_1,v0_2");
- ASSERT_OK(statuses[1]);
- ASSERT_EQ(values[1], "v1_0,v1_1");
- ASSERT_OK(statuses[2]);
- ASSERT_EQ(values[2], "v2_0");
- }
- TEST_F(DBBlobWithTimestampTest, IterateBlobs) {
- Options options = GetDefaultOptions();
- options.enable_blob_files = true;
- options.create_if_missing = true;
- const size_t kTimestampSize = Timestamp(0, 0).size();
- TestComparator test_cmp(kTimestampSize);
- options.comparator = &test_cmp;
- DestroyAndReopen(options);
- int num_blobs = 5;
- std::vector<std::string> keys;
- std::vector<std::string> blobs;
- WriteOptions write_opts;
- std::vector<std::string> write_timestamps = {Timestamp(1, 0),
- Timestamp(2, 0)};
- // For each key in ["key0", ... "keyi", ...], write two versions:
- // Timestamp(1, 0), "blobi0"
- // Timestamp(2, 0), "blobi1"
- for (int i = 0; i < num_blobs; i++) {
- keys.push_back("key" + std::to_string(i));
- blobs.push_back("blob" + std::to_string(i));
- for (size_t j = 0; j < write_timestamps.size(); j++) {
- ASSERT_OK(db_->Put(write_opts, keys[i], write_timestamps[j],
- blobs[i] + std::to_string(j)));
- }
- }
- ASSERT_OK(Flush());
- ReadOptions read_options;
- std::vector<std::string> read_timestamps = {Timestamp(0, 0), Timestamp(3, 0)};
- Slice ts_upper_bound(read_timestamps[1]);
- read_options.timestamp = &ts_upper_bound;
- auto check_iter_entry =
- [](const Iterator* iter, const std::string& expected_key,
- const std::string& expected_ts, const std::string& expected_value,
- bool key_is_internal = true) {
- ASSERT_OK(iter->status());
- if (key_is_internal) {
- std::string expected_ukey_and_ts;
- expected_ukey_and_ts.assign(expected_key.data(), expected_key.size());
- expected_ukey_and_ts.append(expected_ts.data(), expected_ts.size());
- ParsedInternalKey parsed_ikey;
- ASSERT_OK(ParseInternalKey(iter->key(), &parsed_ikey,
- true /* log_err_key */));
- ASSERT_EQ(parsed_ikey.user_key, expected_ukey_and_ts);
- } else {
- ASSERT_EQ(iter->key(), expected_key);
- }
- ASSERT_EQ(iter->timestamp(), expected_ts);
- ASSERT_EQ(iter->value(), expected_value);
- };
- // Forward iterating one version of each key, get in this order:
- // [("key0", Timestamp(2, 0), "blob01"),
- // ("key1", Timestamp(2, 0), "blob11")...]
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToFirst();
- for (int i = 0; i < num_blobs; i++) {
- check_iter_entry(iter.get(), keys[i], write_timestamps[1],
- blobs[i] + std::to_string(1), /*key_is_internal*/ false);
- iter->Next();
- }
- }
- // Forward iteration, then reverse to backward.
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToFirst();
- for (int i = 0; i < num_blobs * 2 - 1; i++) {
- if (i < num_blobs) {
- check_iter_entry(iter.get(), keys[i], write_timestamps[1],
- blobs[i] + std::to_string(1),
- /*key_is_internal*/ false);
- if (i != num_blobs - 1) {
- iter->Next();
- }
- } else {
- if (i != num_blobs) {
- check_iter_entry(iter.get(), keys[num_blobs * 2 - 1 - i],
- write_timestamps[1],
- blobs[num_blobs * 2 - 1 - i] + std::to_string(1),
- /*key_is_internal*/ false);
- }
- iter->Prev();
- }
- }
- }
- // Backward iterating one versions of each key, get in this order:
- // [("key4", Timestamp(2, 0), "blob41"),
- // ("key3", Timestamp(2, 0), "blob31")...]
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToLast();
- for (int i = 0; i < num_blobs; i++) {
- check_iter_entry(iter.get(), keys[num_blobs - 1 - i], write_timestamps[1],
- blobs[num_blobs - 1 - i] + std::to_string(1),
- /*key_is_internal*/ false);
- iter->Prev();
- }
- ASSERT_OK(iter->status());
- }
- // Backward iteration, then reverse to forward.
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToLast();
- for (int i = 0; i < num_blobs * 2 - 1; i++) {
- if (i < num_blobs) {
- check_iter_entry(iter.get(), keys[num_blobs - 1 - i],
- write_timestamps[1],
- blobs[num_blobs - 1 - i] + std::to_string(1),
- /*key_is_internal*/ false);
- if (i != num_blobs - 1) {
- iter->Prev();
- }
- } else {
- if (i != num_blobs) {
- check_iter_entry(iter.get(), keys[i - num_blobs], write_timestamps[1],
- blobs[i - num_blobs] + std::to_string(1),
- /*key_is_internal*/ false);
- }
- iter->Next();
- }
- }
- }
- Slice ts_lower_bound(read_timestamps[0]);
- read_options.iter_start_ts = &ts_lower_bound;
- // Forward iterating multiple versions of the same key, get in this order:
- // [("key0", Timestamp(2, 0), "blob01"),
- // ("key0", Timestamp(1, 0), "blob00"),
- // ("key1", Timestamp(2, 0), "blob11")...]
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToFirst();
- for (int i = 0; i < num_blobs; i++) {
- for (size_t j = write_timestamps.size(); j > 0; --j) {
- check_iter_entry(iter.get(), keys[i], write_timestamps[j - 1],
- blobs[i] + std::to_string(j - 1));
- iter->Next();
- }
- }
- ASSERT_OK(iter->status());
- }
- // Backward iterating multiple versions of the same key, get in this order:
- // [("key4", Timestamp(1, 0), "blob00"),
- // ("key4", Timestamp(2, 0), "blob01"),
- // ("key3", Timestamp(1, 0), "blob10")...]
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToLast();
- for (int i = num_blobs; i > 0; i--) {
- for (size_t j = 0; j < write_timestamps.size(); j++) {
- check_iter_entry(iter.get(), keys[i - 1], write_timestamps[j],
- blobs[i - 1] + std::to_string(j));
- iter->Prev();
- }
- }
- ASSERT_OK(iter->status());
- }
- int upper_bound_idx = num_blobs - 2;
- int lower_bound_idx = 1;
- Slice upper_bound_slice(keys[upper_bound_idx]);
- Slice lower_bound_slice(keys[lower_bound_idx]);
- read_options.iterate_upper_bound = &upper_bound_slice;
- read_options.iterate_lower_bound = &lower_bound_slice;
- // Forward iteration with upper and lower bound.
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToFirst();
- for (int i = lower_bound_idx; i < upper_bound_idx; i++) {
- for (size_t j = write_timestamps.size(); j > 0; --j) {
- check_iter_entry(iter.get(), keys[i], write_timestamps[j - 1],
- blobs[i] + std::to_string(j - 1));
- iter->Next();
- }
- }
- ASSERT_OK(iter->status());
- }
- // Backward iteration with upper and lower bound.
- {
- std::unique_ptr<Iterator> iter(db_->NewIterator(read_options));
- ASSERT_OK(iter->status());
- iter->SeekToLast();
- for (int i = upper_bound_idx; i > lower_bound_idx; i--) {
- for (size_t j = 0; j < write_timestamps.size(); j++) {
- check_iter_entry(iter.get(), keys[i - 1], write_timestamps[j],
- blobs[i - 1] + std::to_string(j));
- iter->Prev();
- }
- }
- ASSERT_OK(iter->status());
- }
- }
- } // namespace ROCKSDB_NAMESPACE
- int main(int argc, char** argv) {
- ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
- ::testing::InitGoogleTest(&argc, argv);
- RegisterCustomObjects(argc, argv);
- return RUN_ALL_TESTS();
- }
|