| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #include "db/db_test_util.h"
- #include "db/write_thread.h"
- #include "port/stack_trace.h"
- namespace ROCKSDB_NAMESPACE {
- class DBWriteBufferManagerTest : public DBTestBase,
- public testing::WithParamInterface<bool> {
- public:
- DBWriteBufferManagerTest()
- : DBTestBase("db_write_buffer_manager_test", /*env_do_fsync=*/false) {}
- bool cost_cache_;
- };
- TEST_P(DBWriteBufferManagerTest, SharedBufferAcrossCFs1) {
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- options.write_buffer_size = 500000; // this is never hit
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, cache, true));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, nullptr, true));
- }
- WriteOptions wo;
- wo.disableWAL = true;
- CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options);
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(3));
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(0));
- // Write to "Default", "cf2" and "cf3".
- ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(40000), wo));
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(3, Key(2), DummyString(40000), wo));
- // WriteBufferManager::buffer_size_ has exceeded after the previous write is
- // completed.
- // This make sures write will go through and if stall was in effect, it will
- // end.
- ASSERT_OK(Put(0, Key(2), DummyString(1), wo));
- }
- // Test Single DB with multiple writer threads get blocked when
- // WriteBufferManager execeeds buffer_size_ and flush is waiting to be
- // finished.
- TEST_P(DBWriteBufferManagerTest, SharedWriteBufferAcrossCFs2) {
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- options.write_buffer_size = 500000; // this is never hit
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, cache, true));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, nullptr, true));
- }
- WriteOptions wo;
- wo.disableWAL = true;
- CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options);
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(3));
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(0));
- // Write to "Default", "cf2" and "cf3". No flush will be triggered.
- ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(40000), wo));
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(3, Key(2), DummyString(40000), wo));
- // WriteBufferManager::buffer_size_ has exceeded after the previous write is
- // completed.
- std::unordered_set<WriteThread::Writer*> w_set;
- std::vector<port::Thread> threads;
- int wait_count_db = 0;
- int num_writers = 4;
- InstrumentedMutex mutex;
- InstrumentedCondVar cv(&mutex);
- std::atomic<int> thread_num(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
- "DBImpl::BackgroundCallFlush:start"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WBMStallInterface::BlockDB", [&](void*) {
- InstrumentedMutexLock lock(&mutex);
- wait_count_db++;
- cv.SignalAll();
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WriteThread::WriteStall::Wait", [&](void* arg) {
- InstrumentedMutexLock lock(&mutex);
- WriteThread::Writer* w = static_cast<WriteThread::Writer*>(arg);
- w_set.insert(w);
- // Allow the flush to continue if all writer threads are blocked.
- if (w_set.size() == (unsigned long)num_writers) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- bool s = true;
- std::function<void(int)> writer = [&](int cf) {
- int a = thread_num.fetch_add(1);
- std::string key = "foo" + std::to_string(a);
- Status tmp = Put(cf, Slice(key), DummyString(1), wo);
- InstrumentedMutexLock lock(&mutex);
- s = s && tmp.ok();
- };
- // Flow:
- // main_writer thread will write but will be blocked (as Flush will on hold,
- // buffer_size_ has exceeded, thus will create stall in effect).
- // |
- // |
- // multiple writer threads will be created to write across multiple columns
- // and they will be blocked.
- // |
- // |
- // Last writer thread will write and when its blocked it will signal Flush to
- // continue to clear the stall.
- threads.emplace_back(writer, 1);
- // Wait untill first thread (main_writer) writing to DB is blocked and then
- // create the multiple writers which will be blocked from getting added to the
- // queue because stall is in effect.
- {
- InstrumentedMutexLock lock(&mutex);
- while (wait_count_db != 1) {
- cv.Wait();
- }
- }
- for (int i = 0; i < num_writers; i++) {
- threads.emplace_back(writer, i % 4);
- }
- for (auto& t : threads) {
- t.join();
- }
- ASSERT_TRUE(s);
- // Number of DBs blocked.
- ASSERT_EQ(wait_count_db, 1);
- // Number of Writer threads blocked.
- ASSERT_EQ(w_set.size(), num_writers);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- // Test multiple DBs get blocked when WriteBufferManager limit exceeds and flush
- // is waiting to be finished but DBs tries to write meanwhile.
- TEST_P(DBWriteBufferManagerTest, SharedWriteBufferLimitAcrossDB) {
- std::vector<std::string> dbnames;
- std::vector<DB*> dbs;
- int num_dbs = 3;
- for (int i = 0; i < num_dbs; i++) {
- dbs.push_back(nullptr);
- dbnames.push_back(
- test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i)));
- }
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- options.write_buffer_size = 500000; // this is never hit
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, cache, true));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, nullptr, true));
- }
- CreateAndReopenWithCF({"cf1", "cf2"}, options);
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(DestroyDB(dbnames[i], options));
- ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i])));
- }
- WriteOptions wo;
- wo.disableWAL = true;
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000)));
- }
- // Insert to db_.
- ASSERT_OK(Put(0, Key(1), DummyString(30000), wo));
- // WriteBufferManager Limit exceeded.
- std::vector<port::Thread> threads;
- int wait_count_db = 0;
- InstrumentedMutex mutex;
- InstrumentedCondVar cv(&mutex);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
- "DBImpl::BackgroundCallFlush:start"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WBMStallInterface::BlockDB", [&](void*) {
- {
- InstrumentedMutexLock lock(&mutex);
- wait_count_db++;
- cv.Signal();
- // Since this is the last DB, signal Flush to continue.
- if (wait_count_db == num_dbs + 1) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- bool s = true;
- // Write to DB.
- std::function<void(DB*)> write_db = [&](DB* db) {
- Status tmp = db->Put(wo, Key(3), DummyString(1));
- InstrumentedMutexLock lock(&mutex);
- s = s && tmp.ok();
- };
- // Flow:
- // db_ will write and will be blocked (as Flush will on hold and will create
- // stall in effect).
- // |
- // multiple dbs writers will be created to write to that db and they will be
- // blocked.
- // |
- // |
- // Last writer will write and when its blocked it will signal Flush to
- // continue to clear the stall.
- threads.emplace_back(write_db, db_);
- // Wait untill first DB is blocked and then create the multiple writers for
- // different DBs which will be blocked from getting added to the queue because
- // stall is in effect.
- {
- InstrumentedMutexLock lock(&mutex);
- while (wait_count_db != 1) {
- cv.Wait();
- }
- }
- for (int i = 0; i < num_dbs; i++) {
- threads.emplace_back(write_db, dbs[i]);
- }
- for (auto& t : threads) {
- t.join();
- }
- ASSERT_TRUE(s);
- ASSERT_EQ(num_dbs + 1, wait_count_db);
- // Clean up DBs.
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(dbs[i]->Close());
- ASSERT_OK(DestroyDB(dbnames[i], options));
- delete dbs[i];
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- // Test multiple threads writing across multiple DBs and multiple columns get
- // blocked when stall by WriteBufferManager is in effect.
- TEST_P(DBWriteBufferManagerTest, SharedWriteBufferLimitAcrossDB1) {
- std::vector<std::string> dbnames;
- std::vector<DB*> dbs;
- int num_dbs = 3;
- for (int i = 0; i < num_dbs; i++) {
- dbs.push_back(nullptr);
- dbnames.push_back(
- test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i)));
- }
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- options.write_buffer_size = 500000; // this is never hit
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, cache, true));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, nullptr, true));
- }
- CreateAndReopenWithCF({"cf1", "cf2"}, options);
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(DestroyDB(dbnames[i], options));
- ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i])));
- }
- WriteOptions wo;
- wo.disableWAL = true;
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000)));
- }
- // Insert to db_.
- ASSERT_OK(Put(0, Key(1), DummyString(30000), wo));
- // WriteBufferManager::buffer_size_ has exceeded after the previous write to
- // dbs[0] is completed.
- std::vector<port::Thread> threads;
- int wait_count_db = 0;
- InstrumentedMutex mutex;
- InstrumentedCondVar cv(&mutex);
- std::unordered_set<WriteThread::Writer*> w_set;
- std::vector<port::Thread> writer_threads;
- std::atomic<int> thread_num(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
- "DBImpl::BackgroundCallFlush:start"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WBMStallInterface::BlockDB", [&](void*) {
- {
- InstrumentedMutexLock lock(&mutex);
- wait_count_db++;
- thread_num.fetch_add(1);
- cv.Signal();
- // Allow the flush to continue if all writer threads are blocked.
- if (thread_num.load(std::memory_order_relaxed) == 2 * num_dbs + 1) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WriteThread::WriteStall::Wait", [&](void* arg) {
- WriteThread::Writer* w = static_cast<WriteThread::Writer*>(arg);
- {
- InstrumentedMutexLock lock(&mutex);
- w_set.insert(w);
- thread_num.fetch_add(1);
- // Allow the flush continue if all writer threads are blocked.
- if (thread_num.load(std::memory_order_relaxed) == 2 * num_dbs + 1) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- bool s1 = true, s2 = true;
- // Write to multiple columns of db_.
- std::function<void(int)> write_cf = [&](int cf) {
- Status tmp = Put(cf, Key(3), DummyString(1), wo);
- InstrumentedMutexLock lock(&mutex);
- s1 = s1 && tmp.ok();
- };
- // Write to multiple DBs.
- std::function<void(DB*)> write_db = [&](DB* db) {
- Status tmp = db->Put(wo, Key(3), DummyString(1));
- InstrumentedMutexLock lock(&mutex);
- s2 = s2 && tmp.ok();
- };
- // Flow:
- // thread will write to db_ will be blocked (as Flush will on hold,
- // buffer_size_ has exceeded and will create stall in effect).
- // |
- // |
- // multiple writers threads writing to different DBs and to db_ across
- // multiple columns will be created and they will be blocked due to stall.
- // |
- // |
- // Last writer thread will write and when its blocked it will signal Flush to
- // continue to clear the stall.
- threads.emplace_back(write_db, db_);
- // Wait untill first thread is blocked and then create the multiple writer
- // threads.
- {
- InstrumentedMutexLock lock(&mutex);
- while (wait_count_db != 1) {
- cv.Wait();
- }
- }
- for (int i = 0; i < num_dbs; i++) {
- // Write to multiple columns of db_.
- writer_threads.emplace_back(write_cf, i % 3);
- // Write to different dbs.
- threads.emplace_back(write_db, dbs[i]);
- }
- for (auto& t : threads) {
- t.join();
- }
- for (auto& t : writer_threads) {
- t.join();
- }
- ASSERT_TRUE(s1);
- ASSERT_TRUE(s2);
- // Number of DBs blocked.
- ASSERT_EQ(num_dbs + 1, wait_count_db);
- // Number of Writer threads blocked.
- ASSERT_EQ(w_set.size(), num_dbs);
- // Clean up DBs.
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(dbs[i]->Close());
- ASSERT_OK(DestroyDB(dbnames[i], options));
- delete dbs[i];
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- // Test multiple threads writing across multiple columns of db_ by passing
- // different values to WriteOption.no_slown_down.
- TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsSingleDB) {
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- options.write_buffer_size = 500000; // this is never hit
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, cache, true));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, nullptr, true));
- }
- WriteOptions wo;
- wo.disableWAL = true;
- CreateAndReopenWithCF({"cf1", "cf2", "cf3"}, options);
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(3));
- ASSERT_OK(Put(3, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(1), wo));
- ASSERT_OK(Flush(0));
- // Write to "Default", "cf2" and "cf3". No flush will be triggered.
- ASSERT_OK(Put(3, Key(1), DummyString(30000), wo));
- ASSERT_OK(Put(0, Key(1), DummyString(40000), wo));
- ASSERT_OK(Put(2, Key(1), DummyString(1), wo));
- ASSERT_OK(Put(3, Key(2), DummyString(40000), wo));
- // WriteBufferManager::buffer_size_ has exceeded after the previous write to
- // db_ is completed.
- std::unordered_set<WriteThread::Writer*> w_slowdown_set;
- std::vector<port::Thread> threads;
- int wait_count_db = 0;
- int num_writers = 4;
- InstrumentedMutex mutex;
- InstrumentedCondVar cv(&mutex);
- std::atomic<int> thread_num(0);
- std::atomic<int> w_no_slowdown(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
- "DBImpl::BackgroundCallFlush:start"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WBMStallInterface::BlockDB", [&](void*) {
- {
- InstrumentedMutexLock lock(&mutex);
- wait_count_db++;
- cv.SignalAll();
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WriteThread::WriteStall::Wait", [&](void* arg) {
- {
- InstrumentedMutexLock lock(&mutex);
- WriteThread::Writer* w = static_cast<WriteThread::Writer*>(arg);
- w_slowdown_set.insert(w);
- // Allow the flush continue if all writer threads are blocked.
- if (w_slowdown_set.size() + (unsigned long)w_no_slowdown.load(
- std::memory_order_relaxed) ==
- (unsigned long)num_writers) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- bool s1 = true, s2 = true;
- std::function<void(int)> write_slow_down = [&](int cf) {
- int a = thread_num.fetch_add(1);
- std::string key = "foo" + std::to_string(a);
- WriteOptions write_op;
- write_op.no_slowdown = false;
- Status tmp = Put(cf, Slice(key), DummyString(1), write_op);
- InstrumentedMutexLock lock(&mutex);
- s1 = s1 && tmp.ok();
- };
- std::function<void(int)> write_no_slow_down = [&](int cf) {
- int a = thread_num.fetch_add(1);
- std::string key = "foo" + std::to_string(a);
- WriteOptions write_op;
- write_op.no_slowdown = true;
- Status tmp = Put(cf, Slice(key), DummyString(1), write_op);
- {
- InstrumentedMutexLock lock(&mutex);
- s2 = s2 && !tmp.ok();
- w_no_slowdown.fetch_add(1);
- // Allow the flush continue if all writer threads are blocked.
- if (w_slowdown_set.size() +
- (unsigned long)w_no_slowdown.load(std::memory_order_relaxed) ==
- (unsigned long)num_writers) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- }
- };
- // Flow:
- // main_writer thread will write but will be blocked (as Flush will on hold,
- // buffer_size_ has exceeded, thus will create stall in effect).
- // |
- // |
- // multiple writer threads will be created to write across multiple columns
- // with different values of WriteOptions.no_slowdown. Some of them will
- // be blocked and some of them will return with Incomplete status.
- // |
- // |
- // Last writer thread will write and when its blocked/return it will signal
- // Flush to continue to clear the stall.
- threads.emplace_back(write_slow_down, 1);
- // Wait untill first thread (main_writer) writing to DB is blocked and then
- // create the multiple writers which will be blocked from getting added to the
- // queue because stall is in effect.
- {
- InstrumentedMutexLock lock(&mutex);
- while (wait_count_db != 1) {
- cv.Wait();
- }
- }
- for (int i = 0; i < num_writers; i += 2) {
- threads.emplace_back(write_no_slow_down, (i) % 4);
- threads.emplace_back(write_slow_down, (i + 1) % 4);
- }
- for (auto& t : threads) {
- t.join();
- }
- ASSERT_TRUE(s1);
- ASSERT_TRUE(s2);
- // Number of DBs blocked.
- ASSERT_EQ(wait_count_db, 1);
- // Number of Writer threads blocked.
- ASSERT_EQ(w_slowdown_set.size(), num_writers / 2);
- // Number of Writer threads with WriteOptions.no_slowdown = true.
- ASSERT_EQ(w_no_slowdown.load(std::memory_order_relaxed), num_writers / 2);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- // Test multiple threads writing across multiple columns of db_ and different
- // dbs by passing different values to WriteOption.no_slown_down.
- TEST_P(DBWriteBufferManagerTest, MixedSlowDownOptionsMultipleDB) {
- std::vector<std::string> dbnames;
- std::vector<DB*> dbs;
- int num_dbs = 4;
- for (int i = 0; i < num_dbs; i++) {
- dbs.push_back(nullptr);
- dbnames.push_back(
- test::PerThreadDBPath("db_shared_wb_db" + std::to_string(i)));
- }
- Options options = CurrentOptions();
- options.arena_block_size = 4096;
- options.write_buffer_size = 500000; // this is never hit
- std::shared_ptr<Cache> cache = NewLRUCache(4 * 1024 * 1024, 2);
- ASSERT_LT(cache->GetUsage(), 256 * 1024);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, cache, true));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(100000, nullptr, true));
- }
- CreateAndReopenWithCF({"cf1", "cf2"}, options);
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(DestroyDB(dbnames[i], options));
- ASSERT_OK(DB::Open(options, dbnames[i], &(dbs[i])));
- }
- WriteOptions wo;
- wo.disableWAL = true;
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(dbs[i]->Put(wo, Key(1), DummyString(20000)));
- }
- // Insert to db_.
- ASSERT_OK(Put(0, Key(1), DummyString(30000), wo));
- // WriteBufferManager::buffer_size_ has exceeded after the previous write to
- // dbs[0] is completed.
- std::vector<port::Thread> threads;
- int wait_count_db = 0;
- InstrumentedMutex mutex;
- InstrumentedCondVar cv(&mutex);
- std::unordered_set<WriteThread::Writer*> w_slowdown_set;
- std::vector<port::Thread> writer_threads;
- std::atomic<int> thread_num(0);
- std::atomic<int> w_no_slowdown(0);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0",
- "DBImpl::BackgroundCallFlush:start"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WBMStallInterface::BlockDB", [&](void*) {
- InstrumentedMutexLock lock(&mutex);
- wait_count_db++;
- cv.Signal();
- // Allow the flush continue if all writer threads are blocked.
- if (w_slowdown_set.size() +
- (unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) +
- wait_count_db) ==
- (unsigned long)(2 * num_dbs + 1)) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
- "WriteThread::WriteStall::Wait", [&](void* arg) {
- WriteThread::Writer* w = static_cast<WriteThread::Writer*>(arg);
- InstrumentedMutexLock lock(&mutex);
- w_slowdown_set.insert(w);
- // Allow the flush continue if all writer threads are blocked.
- if (w_slowdown_set.size() +
- (unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) +
- wait_count_db) ==
- (unsigned long)(2 * num_dbs + 1)) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- });
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- bool s1 = true, s2 = true;
- std::function<void(DB*)> write_slow_down = [&](DB* db) {
- int a = thread_num.fetch_add(1);
- std::string key = "foo" + std::to_string(a);
- WriteOptions write_op;
- write_op.no_slowdown = false;
- Status tmp = db->Put(write_op, Slice(key), DummyString(1));
- InstrumentedMutexLock lock(&mutex);
- s1 = s1 && tmp.ok();
- };
- std::function<void(DB*)> write_no_slow_down = [&](DB* db) {
- int a = thread_num.fetch_add(1);
- std::string key = "foo" + std::to_string(a);
- WriteOptions write_op;
- write_op.no_slowdown = true;
- Status tmp = db->Put(write_op, Slice(key), DummyString(1));
- {
- InstrumentedMutexLock lock(&mutex);
- s2 = s2 && !tmp.ok();
- w_no_slowdown.fetch_add(1);
- if (w_slowdown_set.size() +
- (unsigned long)(w_no_slowdown.load(std::memory_order_relaxed) +
- wait_count_db) ==
- (unsigned long)(2 * num_dbs + 1)) {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::SharedWriteBufferAcrossCFs:0");
- }
- }
- };
- // Flow:
- // first thread will write but will be blocked (as Flush will on hold,
- // buffer_size_ has exceeded, thus will create stall in effect).
- // |
- // |
- // multiple writer threads will be created to write across multiple columns
- // of db_ and different DBs with different values of
- // WriteOptions.no_slowdown. Some of them will be blocked and some of them
- // will return with Incomplete status.
- // |
- // |
- // Last writer thread will write and when its blocked/return it will signal
- // Flush to continue to clear the stall.
- threads.emplace_back(write_slow_down, db_);
- // Wait untill first thread writing to DB is blocked and then
- // create the multiple writers.
- {
- InstrumentedMutexLock lock(&mutex);
- while (wait_count_db != 1) {
- cv.Wait();
- }
- }
- for (int i = 0; i < num_dbs; i += 2) {
- // Write to multiple columns of db_.
- writer_threads.emplace_back(write_slow_down, db_);
- writer_threads.emplace_back(write_no_slow_down, db_);
- // Write to different DBs.
- threads.emplace_back(write_slow_down, dbs[i]);
- threads.emplace_back(write_no_slow_down, dbs[i + 1]);
- }
- for (auto& t : threads) {
- t.join();
- }
- for (auto& t : writer_threads) {
- t.join();
- }
- ASSERT_TRUE(s1);
- ASSERT_TRUE(s2);
- // Number of DBs blocked.
- ASSERT_EQ((num_dbs / 2) + 1, wait_count_db);
- // Number of writer threads writing to db_ blocked from getting added to the
- // queue.
- ASSERT_EQ(w_slowdown_set.size(), num_dbs / 2);
- // Number of threads with WriteOptions.no_slowdown = true.
- ASSERT_EQ(w_no_slowdown.load(std::memory_order_relaxed), num_dbs);
- // Clean up DBs.
- for (int i = 0; i < num_dbs; i++) {
- ASSERT_OK(dbs[i]->Close());
- ASSERT_OK(DestroyDB(dbnames[i], options));
- delete dbs[i];
- }
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- }
- // Tests a `WriteBufferManager` constructed with `allow_stall == false` does not
- // thrash memtable switching when full and a CF receives multiple writes.
- // Instead, we expect to switch a CF's memtable for flush only when that CF does
- // not have any pending or running flush.
- //
- // This test uses multiple DBs each with a single CF instead of a single DB
- // with multiple CFs. That way we can control which CF is considered for switch
- // by writing to that CF's DB.
- //
- // Not supported in LITE mode due to `GetProperty()` unavailable.
- TEST_P(DBWriteBufferManagerTest, StopSwitchingMemTablesOnceFlushing) {
- Options options = CurrentOptions();
- options.arena_block_size = 4 << 10; // 4KB
- options.write_buffer_size = 1 << 20; // 1MB
- std::shared_ptr<Cache> cache =
- NewLRUCache(4 << 20 /* capacity (4MB) */, 2 /* num_shard_bits */);
- ASSERT_LT(cache->GetUsage(), 256 << 10 /* 256KB */);
- cost_cache_ = GetParam();
- if (cost_cache_) {
- options.write_buffer_manager.reset(new WriteBufferManager(
- 512 << 10 /* buffer_size (512KB) */, cache, false /* allow_stall */));
- } else {
- options.write_buffer_manager.reset(
- new WriteBufferManager(512 << 10 /* buffer_size (512KB) */,
- nullptr /* cache */, false /* allow_stall */));
- }
- Reopen(options);
- std::string dbname = test::PerThreadDBPath("db_shared_wbm_db");
- DB* shared_wbm_db = nullptr;
- ASSERT_OK(DestroyDB(dbname, options));
- ASSERT_OK(DB::Open(options, dbname, &shared_wbm_db));
- // The last write will make WBM need flush, but it won't flush yet.
- ASSERT_OK(Put(Key(1), DummyString(256 << 10 /* 256KB */), WriteOptions()));
- ASSERT_FALSE(options.write_buffer_manager->ShouldFlush());
- ASSERT_OK(Put(Key(1), DummyString(256 << 10 /* 256KB */), WriteOptions()));
- ASSERT_TRUE(options.write_buffer_manager->ShouldFlush());
- // Flushes will be pending, not running because flush threads are blocked.
- test::SleepingBackgroundTask sleeping_task_high;
- env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
- &sleeping_task_high, Env::Priority::HIGH);
- for (int i = 0; i < 3; ++i) {
- ASSERT_OK(
- shared_wbm_db->Put(WriteOptions(), Key(1), DummyString(1 /* len */)));
- std::string prop;
- ASSERT_TRUE(
- shared_wbm_db->GetProperty("rocksdb.num-immutable-mem-table", &prop));
- ASSERT_EQ(std::to_string(i > 0 ? 1 : 0), prop);
- ASSERT_TRUE(
- shared_wbm_db->GetProperty("rocksdb.mem-table-flush-pending", &prop));
- ASSERT_EQ(std::to_string(i > 0 ? 1 : 0), prop);
- }
- // Clean up DBs.
- sleeping_task_high.WakeUp();
- sleeping_task_high.WaitUntilDone();
- ASSERT_OK(shared_wbm_db->Close());
- ASSERT_OK(DestroyDB(dbname, options));
- delete shared_wbm_db;
- }
- TEST_F(DBWriteBufferManagerTest, RuntimeChangeableAllowStall) {
- constexpr int kBigValue = 10000;
- Options options = CurrentOptions();
- options.write_buffer_manager.reset(
- new WriteBufferManager(1, nullptr /* cache */, true /* allow_stall */));
- DestroyAndReopen(options);
- // Pause flush thread so that
- // (a) the only way to exist write stall below is to change the `allow_stall`
- // (b) the write stall is "stable" without being interfered by flushes so that
- // we can check it without flakiness
- std::unique_ptr<test::SleepingBackgroundTask> sleeping_task(
- new test::SleepingBackgroundTask());
- env_->SetBackgroundThreads(1, Env::HIGH);
- env_->Schedule(&test::SleepingBackgroundTask::DoSleepTask,
- sleeping_task.get(), Env::Priority::HIGH);
- sleeping_task->WaitUntilSleeping();
- // Test 1: test setting `allow_stall` from true to false
- //
- // Assert existence of a write stall
- WriteOptions wo_no_slowdown;
- wo_no_slowdown.no_slowdown = true;
- Status s = Put(Key(0), DummyString(kBigValue), wo_no_slowdown);
- ASSERT_TRUE(s.IsIncomplete());
- ASSERT_TRUE(s.ToString().find("Write stall") != std::string::npos);
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
- {{"WBMStallInterface::BlockDB",
- "DBWriteBufferManagerTest::RuntimeChangeableThreadSafeParameters::"
- "ChangeParameter"}});
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
- // Test `SetAllowStall()`
- port::Thread thread1([&] { ASSERT_OK(Put(Key(0), DummyString(kBigValue))); });
- port::Thread thread2([&] {
- TEST_SYNC_POINT(
- "DBWriteBufferManagerTest::RuntimeChangeableThreadSafeParameters::"
- "ChangeParameter");
- options.write_buffer_manager->SetAllowStall(false);
- });
- // Verify `allow_stall` is successfully set to false in thread2.
- // Othwerwise, thread1's write will be stalled and this test will hang
- // forever.
- thread1.join();
- thread2.join();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
- ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
- // Test 2: test setting `allow_stall` from false to true
- //
- // Assert no write stall
- ASSERT_OK(Put(Key(0), DummyString(kBigValue), wo_no_slowdown));
- // Test `SetAllowStall()`
- options.write_buffer_manager->SetAllowStall(true);
- // Verify `allow_stall` is successfully set to true.
- // Otherwise the following write will not be stalled and therefore succeed.
- s = Put(Key(0), DummyString(kBigValue), wo_no_slowdown);
- ASSERT_TRUE(s.IsIncomplete());
- ASSERT_TRUE(s.ToString().find("Write stall") != std::string::npos);
- sleeping_task->WakeUp();
- }
- INSTANTIATE_TEST_CASE_P(DBWriteBufferManagerTest, DBWriteBufferManagerTest,
- testing::Bool());
- } // namespace ROCKSDB_NAMESPACE
- int main(int argc, char** argv) {
- ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
- ::testing::InitGoogleTest(&argc, argv);
- RegisterCustomObjects(argc, argv);
- return RUN_ALL_TESTS();
- }
|