| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531 |
- // Copyright (c) 2024-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- #include "db/db_test_util.h"
- #include "port/stack_trace.h"
- #include "test_util/sync_point.h"
- namespace ROCKSDB_NAMESPACE {
- #ifdef OS_LINUX
- class DBFollowerTest : public DBTestBase {
- public:
- // Create directories for leader and follower
- // Create the leader DB object
- DBFollowerTest() : DBTestBase("/db_follower_test", /*env_do_fsync*/ false) {
- follower_name_ = dbname_ + "/follower";
- db_parent_ = dbname_;
- Close();
- Destroy(CurrentOptions());
- EXPECT_EQ(env_->CreateDirIfMissing(dbname_), Status::OK());
- dbname_ = dbname_ + "/leader";
- Reopen(CurrentOptions());
- }
- ~DBFollowerTest() {
- follower_.reset();
- EXPECT_EQ(DestroyDB(follower_name_, CurrentOptions()), Status::OK());
- Destroy(CurrentOptions());
- dbname_ = db_parent_;
- }
- protected:
- class DBFollowerTestFS : public FileSystemWrapper {
- public:
- explicit DBFollowerTestFS(const std::shared_ptr<FileSystem>& target)
- : FileSystemWrapper(target),
- cv_(&mutex_),
- barrier_(false),
- count_(0),
- reinit_count_(0) {}
- const char* Name() const override { return "DBFollowerTestFS"; }
- IOStatus NewSequentialFile(const std::string& fname,
- const FileOptions& file_opts,
- std::unique_ptr<FSSequentialFile>* result,
- IODebugContext* dbg = nullptr) override {
- class DBFollowerTestSeqFile : public FSSequentialFileWrapper {
- public:
- DBFollowerTestSeqFile(DBFollowerTestFS* fs,
- std::unique_ptr<FSSequentialFile>&& file,
- uint64_t /*size*/)
- : FSSequentialFileWrapper(file.get()),
- fs_(fs),
- file_(std::move(file)) {}
- IOStatus Read(size_t n, const IOOptions& options, Slice* result,
- char* scratch, IODebugContext* dbg) override {
- fs_->BarrierWait();
- return target()->Read(n, options, result, scratch, dbg);
- }
- private:
- DBFollowerTestFS* fs_;
- std::unique_ptr<FSSequentialFile> file_;
- };
- std::unique_ptr<FSSequentialFile> file;
- IOStatus s = target()->NewSequentialFile(fname, file_opts, &file, dbg);
- if (s.ok() && test::GetFileType(fname) == kDescriptorFile) {
- uint64_t size = 0;
- EXPECT_EQ(target()->GetFileSize(fname, IOOptions(), &size, nullptr),
- IOStatus::OK());
- result->reset(new DBFollowerTestSeqFile(this, std::move(file), size));
- } else {
- *result = std::move(file);
- }
- return s;
- }
- void BarrierInit(int count) {
- MutexLock l(&mutex_);
- barrier_ = true;
- count_ = count;
- }
- void BarrierWait() {
- MutexLock l(&mutex_);
- if (!barrier_) {
- return;
- }
- if (--count_ == 0) {
- if (reinit_count_ > 0) {
- count_ = reinit_count_;
- reinit_count_ = 0;
- } else {
- barrier_ = false;
- }
- cv_.SignalAll();
- } else {
- cv_.Wait();
- }
- }
- void BarrierWaitAndReinit(int count) {
- MutexLock l(&mutex_);
- if (!barrier_) {
- return;
- }
- reinit_count_ = count;
- if (--count_ == 0) {
- if (reinit_count_ > 0) {
- count_ = reinit_count_;
- reinit_count_ = 0;
- } else {
- barrier_ = false;
- }
- cv_.SignalAll();
- } else {
- cv_.Wait();
- }
- }
- private:
- port::Mutex mutex_;
- port::CondVar cv_;
- bool barrier_;
- int count_;
- int reinit_count_;
- };
- class DBFollowerTestSstPartitioner : public SstPartitioner {
- public:
- explicit DBFollowerTestSstPartitioner(uint64_t max_keys)
- : max_keys_(max_keys), num_keys_(0) {}
- const char* Name() const override { return "DBFollowerTestSstPartitioner"; }
- PartitionerResult ShouldPartition(
- const PartitionerRequest& /*request*/) override {
- if (++num_keys_ > max_keys_) {
- num_keys_ = 0;
- return PartitionerResult::kRequired;
- } else {
- return PartitionerResult::kNotRequired;
- }
- }
- bool CanDoTrivialMove(const Slice& /*smallest_user_key*/,
- const Slice& /*largest_user_key*/) override {
- return true;
- }
- private:
- uint64_t max_keys_;
- uint64_t num_keys_;
- };
- class DBFollowerTestSstPartitionerFactory : public SstPartitionerFactory {
- public:
- explicit DBFollowerTestSstPartitionerFactory(uint64_t max_keys)
- : max_keys_(max_keys) {}
- std::unique_ptr<SstPartitioner> CreatePartitioner(
- const SstPartitioner::Context& /*context*/) const override {
- std::unique_ptr<SstPartitioner> partitioner;
- partitioner.reset(new DBFollowerTestSstPartitioner(max_keys_));
- return partitioner;
- }
- const char* Name() const override {
- return "DBFollowerTestSstPartitionerFactory";
- }
- private:
- uint64_t max_keys_;
- };
- Status OpenAsFollower() {
- Options opts = CurrentOptions();
- if (!follower_env_) {
- follower_env_ = NewCompositeEnv(
- std::make_shared<DBFollowerTestFS>(env_->GetFileSystem()));
- }
- opts.env = follower_env_.get();
- opts.follower_refresh_catchup_period_ms = 100;
- return DB::OpenAsFollower(opts, follower_name_, dbname_, &follower_);
- }
- std::string FollowerGet(const std::string& k) {
- ReadOptions options;
- options.verify_checksums = true;
- std::string result;
- Status s = follower()->Get(options, k, &result);
- if (s.IsNotFound()) {
- result = "NOT_FOUND";
- } else if (!s.ok()) {
- result = s.ToString();
- }
- return result;
- }
- DB* follower() { return follower_.get(); }
- DBFollowerTestFS* follower_fs() {
- return static_cast<DBFollowerTestFS*>(follower_env_->GetFileSystem().get());
- }
- void CheckDirs() {
- std::vector<std::string> db_children;
- std::vector<std::string> follower_children;
- EXPECT_OK(env_->GetChildren(dbname_, &db_children));
- EXPECT_OK(env_->GetChildren(follower_name_, &follower_children));
- std::set<uint64_t> db_filenums;
- std::set<uint64_t> follower_filenums;
- for (auto& name : db_children) {
- if (test::GetFileType(name) != kTableFile) {
- continue;
- }
- db_filenums.insert(test::GetFileNumber(name));
- }
- for (auto& name : follower_children) {
- if (test::GetFileType(name) != kTableFile) {
- continue;
- }
- follower_filenums.insert(test::GetFileNumber(name));
- }
- db_filenums.merge(follower_filenums);
- EXPECT_EQ(follower_filenums.size(), db_filenums.size());
- }
- private:
- std::string follower_name_;
- std::string db_parent_;
- std::unique_ptr<Env> follower_env_;
- std::unique_ptr<DB> follower_;
- };
- TEST_F(DBFollowerTest, Basic) {
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k2", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(OpenAsFollower());
- std::string val;
- ASSERT_OK(follower()->Get(ReadOptions(), "k1", &val));
- ASSERT_EQ(val, "v1");
- CheckDirs();
- }
- TEST_F(DBFollowerTest, Flush) {
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:Begin1", "Leader::Start"},
- {"Leader::Done", "DBImplFollower::TryCatchupWithLeader:Begin2"},
- {"DBImplFollower::TryCatchupWithLeader:End", "Follower::WaitForCatchup"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- ASSERT_OK(OpenAsFollower());
- TEST_SYNC_POINT("Leader::Start");
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("Leader::Done");
- TEST_SYNC_POINT("Follower::WaitForCatchup");
- std::string val;
- ASSERT_OK(follower()->Get(ReadOptions(), "k1", &val));
- ASSERT_EQ(val, "v1");
- CheckDirs();
- SyncPoint::GetInstance()->DisableProcessing();
- }
- // This test creates 4 L0 files, immediately followed by a compaction to L1.
- // The follower replays the 4 flush records from the MANIFEST unsuccessfully,
- // and then successfully recovers a Version from the compaction record
- TEST_F(DBFollowerTest, RetryCatchup) {
- Options opts = CurrentOptions();
- opts.disable_auto_compactions = true;
- Reopen(opts);
- ASSERT_OK(OpenAsFollower());
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:Begin1", "Leader::Start"},
- {"DBImpl::BackgroundCompaction:Start",
- "DBImplFollower::TryCatchupWithLeader:Begin2"},
- {"VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit:"
- "Begin1",
- "DBImpl::BackgroundCompaction:BeforeCompaction"},
- {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles",
- "VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit:"
- "Begin2"},
- {"DBImplFollower::TryCatchupWithLeader:End", "Follower::WaitForCatchup"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- TEST_SYNC_POINT("Leader::Start");
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v3"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v4"));
- ASSERT_OK(Flush());
- ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true));
- TEST_SYNC_POINT("Follower::WaitForCatchup");
- ASSERT_EQ(FollowerGet("k1"), "v4");
- CheckDirs();
- SyncPoint::GetInstance()->ClearAllCallBacks();
- SyncPoint::GetInstance()->DisableProcessing();
- }
- // This test validates the same as the previous test, except there is a
- // MANIFEST rollover between the flushes and compaction. The follower
- // does not switch to a new MANIFEST in ReadAndApply. So it would require
- // another round of refresh before catching up.
- TEST_F(DBFollowerTest, RetryCatchupManifestRollover) {
- Options opts = CurrentOptions();
- opts.disable_auto_compactions = true;
- Reopen(opts);
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v3"));
- ASSERT_OK(Flush());
- ASSERT_OK(OpenAsFollower());
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:Begin1", "Leader::Start"},
- {"Leader::Flushed", "DBImplFollower::TryCatchupWithLeader:Begin2"},
- {"VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit:"
- "Begin1",
- "Leader::Done"},
- {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles",
- "VersionEditHandlerPointInTime::MaybeCreateVersionBeforeApplyEdit:"
- "Begin2"},
- {"DBImplFollower::TryCatchupWithLeader:End",
- "Follower::WaitForCatchup:1"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- TEST_SYNC_POINT("Leader::Start");
- ASSERT_OK(Put("k1", "v4"));
- ASSERT_OK(Flush());
- TEST_SYNC_POINT("Leader::Flushed");
- TEST_SYNC_POINT("Leader::Done");
- Reopen(opts);
- ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true));
- TEST_SYNC_POINT("Follower::WaitForCatchup:1");
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:End",
- "Follower::WaitForCatchup:2"},
- });
- TEST_SYNC_POINT("Follower::WaitForCatchup:2");
- ASSERT_EQ(FollowerGet("k1"), "v4");
- SyncPoint::GetInstance()->ClearAllCallBacks();
- SyncPoint::GetInstance()->DisableProcessing();
- }
- // This test creates 4 L0 files and compacts them. The follower, during catchup,
- // successfully instantiates 4 Versions corresponding to the 4 files (but
- // doesn't install them yet), followed by deleting those 4 and adding a new
- // file from compaction. The test verifies that the 4 L0 files are deleted
- // correctly by the follower.
- // We use the Barrier* functions to ensure that the follower first sees the 4
- // L0 files and is able to link them, and then sees the compaction that
- // obsoletes those L0 files (so those L0 files are intermediates that it has
- // to explicitly delete). Suppose we don't have any barriers, its possible
- // the follower reads the L0 records and compaction records from the MANIFEST
- // in one read, which means those L0 files would have already been deleted
- // by the leader and the follower cannot link to them.
- TEST_F(DBFollowerTest, IntermediateObsoleteFiles) {
- Options opts = CurrentOptions();
- opts.disable_auto_compactions = true;
- Reopen(opts);
- ASSERT_OK(OpenAsFollower());
- follower_fs()->BarrierInit(2);
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v3"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k1", "v4"));
- ASSERT_OK(Flush());
- follower_fs()->BarrierWaitAndReinit(2);
- ASSERT_OK(dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr, true));
- follower_fs()->BarrierWait();
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:End",
- "Follower::WaitForCatchup:1"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- TEST_SYNC_POINT("Follower::WaitForCatchup:1");
- CheckDirs();
- ASSERT_EQ(FollowerGet("k1"), "v4");
- }
- // This test verifies a scenario where the follower can recover a Version
- // partially (i.e some of the additions cannot be found), and the files
- // that are found are obsoleted by a subsequent VersionEdit.
- TEST_F(DBFollowerTest, PartialVersionRecovery) {
- Options opts = CurrentOptions();
- opts.disable_auto_compactions = true;
- opts.sst_partitioner_factory =
- std::make_shared<DBFollowerTestSstPartitionerFactory>(1);
- Reopen(opts);
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Put("k2", "v1"));
- ASSERT_OK(Put("k3", "v1"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(2);
- ASSERT_OK(Put("k1", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k3", "v2"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(1);
- ASSERT_OK(OpenAsFollower());
- ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(),
- {{"max_compaction_bytes", "1"}}));
- follower_fs()->BarrierInit(2);
- Slice key("k1");
- ASSERT_OK(dbfull()->TEST_CompactRange(1, &key, &key, nullptr, true));
- follower_fs()->BarrierWaitAndReinit(2);
- // The second compaction input overlaps the previous compaction outputs
- // by one file. This file is never added to VersionStorageInfo since it
- // was added and deleted before the catch up completes. We later verify that
- // the follower correctly deleted this file.
- key = Slice("k3");
- ASSERT_OK(dbfull()->TEST_CompactRange(1, &key, &key, nullptr, true));
- follower_fs()->BarrierWait();
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:End",
- "Follower::WaitForCatchup:1"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- TEST_SYNC_POINT("Follower::WaitForCatchup:1");
- CheckDirs();
- ASSERT_EQ(FollowerGet("k1"), "v2");
- ASSERT_EQ(FollowerGet("k2"), "v1");
- ASSERT_EQ(FollowerGet("k3"), "v2");
- SyncPoint::GetInstance()->DisableProcessing();
- }
- // This test verifies a scenario similar to the PartialVersionRecovery, except
- // with a MANIFEST rollover in between. When there is a rollover, the
- // follower's attempt ends without installing a new Version. The next catch up
- // attempt will recover a full Version.
- TEST_F(DBFollowerTest, PartialVersionRecoveryWithRollover) {
- Options opts = CurrentOptions();
- opts.disable_auto_compactions = true;
- opts.sst_partitioner_factory =
- std::make_shared<DBFollowerTestSstPartitionerFactory>(1);
- Reopen(opts);
- ASSERT_OK(Put("k1", "v1"));
- ASSERT_OK(Put("k2", "v1"));
- ASSERT_OK(Put("k3", "v1"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(2);
- ASSERT_OK(Put("k1", "v2"));
- ASSERT_OK(Flush());
- ASSERT_OK(Put("k3", "v2"));
- ASSERT_OK(Flush());
- MoveFilesToLevel(1);
- opts.max_compaction_bytes = 1;
- Reopen(opts);
- ASSERT_OK(OpenAsFollower());
- follower_fs()->BarrierInit(2);
- Slice key("k1");
- ASSERT_OK(dbfull()->TEST_CompactRange(1, &key, &key, nullptr, true));
- follower_fs()->BarrierWaitAndReinit(2);
- Reopen(opts);
- key = Slice("k3");
- ASSERT_OK(dbfull()->TEST_CompactRange(1, &key, &key, nullptr, true));
- follower_fs()->BarrierWait();
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:Begin1",
- "Follower::WaitForCatchup:1"},
- {"Follower::WaitForCatchup:2",
- "DBImplFollower::TryCatchupWithLeader:Begin2"},
- });
- SyncPoint::GetInstance()->EnableProcessing();
- TEST_SYNC_POINT("Follower::WaitForCatchup:1");
- TEST_SYNC_POINT("Follower::WaitForCatchup:2");
- SyncPoint::GetInstance()->LoadDependency({
- {"DBImplFollower::TryCatchupWithLeader:End",
- "Follower::WaitForCatchup:3"},
- });
- TEST_SYNC_POINT("Follower::WaitForCatchup:3");
- CheckDirs();
- ASSERT_EQ(FollowerGet("k1"), "v2");
- ASSERT_EQ(FollowerGet("k2"), "v1");
- ASSERT_EQ(FollowerGet("k3"), "v2");
- SyncPoint::GetInstance()->DisableProcessing();
- }
- #endif
- } // namespace ROCKSDB_NAMESPACE
- int main(int argc, char** argv) {
- ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
- ::testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
- }
|