| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #pragma once
- #include "db/version_builder.h"
- #include "db/version_edit.h"
- #include "db/version_set.h"
- namespace ROCKSDB_NAMESPACE {
- struct FileMetaData;
- class VersionEditHandlerBase {
- public:
- explicit VersionEditHandlerBase(const ReadOptions& read_options)
- : read_options_(read_options),
- max_manifest_read_size_(std::numeric_limits<uint64_t>::max()) {}
- virtual ~VersionEditHandlerBase() {}
- void Iterate(log::Reader& reader, Status* log_read_status);
- const Status& status() const { return status_; }
- AtomicGroupReadBuffer& GetReadBuffer() { return read_buffer_; }
- protected:
- explicit VersionEditHandlerBase(const ReadOptions& read_options,
- uint64_t max_read_size)
- : read_options_(read_options), max_manifest_read_size_(max_read_size) {}
- virtual Status Initialize() { return Status::OK(); }
- virtual Status ApplyVersionEdit(VersionEdit& edit,
- ColumnFamilyData** cfd) = 0;
- virtual Status OnAtomicGroupReplayBegin() { return Status::OK(); }
- virtual Status OnAtomicGroupReplayEnd() { return Status::OK(); }
- virtual void CheckIterationResult(const log::Reader& /*reader*/,
- Status* /*s*/) {}
- void ClearReadBuffer() { read_buffer_.Clear(); }
- Status status_;
- const ReadOptions& read_options_;
- private:
- AtomicGroupReadBuffer read_buffer_;
- const uint64_t max_manifest_read_size_;
- };
- class ListColumnFamiliesHandler : public VersionEditHandlerBase {
- public:
- explicit ListColumnFamiliesHandler(const ReadOptions& read_options)
- : VersionEditHandlerBase(read_options) {}
- ~ListColumnFamiliesHandler() override {}
- const std::map<uint32_t, std::string> GetColumnFamilyNames() const {
- return column_family_names_;
- }
- protected:
- Status ApplyVersionEdit(VersionEdit& edit,
- ColumnFamilyData** /*unused*/) override;
- private:
- // default column family is always implicitly there
- std::map<uint32_t, std::string> column_family_names_{
- {0, kDefaultColumnFamilyName}};
- };
- class FileChecksumRetriever : public VersionEditHandlerBase {
- public:
- FileChecksumRetriever(const ReadOptions& read_options, uint64_t max_read_size)
- : VersionEditHandlerBase(read_options, max_read_size) {}
- ~FileChecksumRetriever() override {}
- Status FetchFileChecksumList(FileChecksumList& file_checksum_list);
- protected:
- Status ApplyVersionEdit(VersionEdit& edit,
- ColumnFamilyData** /*unused*/) override;
- private:
- // Map from CF to file # to string pair, where first portion of the value
- // is checksum, and second portion of the value is checksum function name.
- //
- // [column family id A]
- // |
- // |-- [file #1] -> [checksum #1, checksum function name #1]
- // |-- [file #2] -> [checksum #2, checksum function name #2]
- // |
- // ...
- // |
- // |-- [file #N] -> [checksum #N, checksum function name #N]
- // [column family id B]
- // |
- // |-- [file #1] -> [checksum #1, checksum function name #1]
- // |
- // ...
- // |
- // |-- [file #M] -> [checksum #M, checksum function name #M]
- // |
- // ...
- std::unordered_map<
- uint32_t,
- std::unordered_map<uint64_t, std::pair<std::string, std::string>>>
- cf_file_checksums_;
- };
- using VersionBuilderUPtr = std::unique_ptr<BaseReferencedVersionBuilder>;
- // A class used for scanning MANIFEST file.
- // VersionEditHandler reads a MANIFEST file, parses the version edits, and
- // builds the version set's in-memory state, e.g. the version storage info for
- // the versions of column families. It replays all the version edits in one
- // MANIFEST file to build the end version.
- //
- // To use this class and its subclasses,
- // 1. Create an object of VersionEditHandler or its subclasses.
- // VersionEditHandler handler(read_only, column_families, version_set,
- // track_found_and_missing_files,
- // no_error_if_files_missing);
- // 2. Status s = handler.Iterate(reader, &db_id);
- // 3. Check s and handle possible errors.
- //
- // Not thread-safe, external synchronization is necessary if an object of
- // VersionEditHandler is shared by multiple threads.
- class VersionEditHandler : public VersionEditHandlerBase {
- public:
- explicit VersionEditHandler(
- bool read_only,
- const std::vector<ColumnFamilyDescriptor>& column_families,
- VersionSet* version_set, bool track_found_and_missing_files,
- bool no_error_if_files_missing,
- const std::shared_ptr<IOTracer>& io_tracer,
- const ReadOptions& read_options, bool allow_incomplete_valid_version,
- EpochNumberRequirement epoch_number_requirement =
- EpochNumberRequirement::kMustPresent)
- : VersionEditHandler(read_only, column_families, version_set,
- track_found_and_missing_files,
- no_error_if_files_missing, io_tracer, read_options,
- /*skip_load_table_files=*/false,
- allow_incomplete_valid_version,
- epoch_number_requirement) {}
- ~VersionEditHandler() override {}
- const VersionEditParams& GetVersionEditParams() const {
- return version_edit_params_;
- }
- void GetDbId(std::string* db_id) const {
- if (db_id && version_edit_params_.HasDbId()) {
- *db_id = version_edit_params_.GetDbId();
- }
- }
- virtual Status VerifyFile(ColumnFamilyData* /*cfd*/,
- const std::string& /*fpath*/, int /*level*/,
- const FileMetaData& /*fmeta*/) {
- return Status::OK();
- }
- virtual Status VerifyBlobFile(ColumnFamilyData* /*cfd*/,
- uint64_t /*blob_file_num*/,
- const BlobFileAddition& /*blob_addition*/) {
- return Status::OK();
- }
- protected:
- explicit VersionEditHandler(
- bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
- VersionSet* version_set, bool track_found_and_missing_files,
- bool no_error_if_files_missing,
- const std::shared_ptr<IOTracer>& io_tracer,
- const ReadOptions& read_options, bool skip_load_table_files,
- bool allow_incomplete_valid_version,
- EpochNumberRequirement epoch_number_requirement =
- EpochNumberRequirement::kMustPresent);
- Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override;
- virtual Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd);
- Status OnColumnFamilyDrop(VersionEdit& edit, ColumnFamilyData** cfd);
- Status OnNonCfOperation(VersionEdit& edit, ColumnFamilyData** cfd);
- Status OnWalAddition(VersionEdit& edit);
- Status OnWalDeletion(VersionEdit& edit);
- Status Initialize() override;
- void CheckColumnFamilyId(const VersionEdit& edit, bool* do_not_open_cf,
- bool* cf_in_builders) const;
- void CheckIterationResult(const log::Reader& reader, Status* s) override;
- ColumnFamilyData* CreateCfAndInit(const ColumnFamilyOptions& cf_options,
- const VersionEdit& edit);
- virtual ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit);
- virtual Status MaybeCreateVersionBeforeApplyEdit(const VersionEdit& edit,
- ColumnFamilyData* cfd,
- bool force_create_version);
- virtual Status LoadTables(ColumnFamilyData* cfd,
- bool prefetch_index_and_filter_in_cache,
- bool is_initial_load);
- virtual bool MustOpenAllColumnFamilies() const {
- return !version_set_->unchanging();
- }
- const bool read_only_;
- std::vector<ColumnFamilyDescriptor> column_families_;
- VersionSet* version_set_;
- std::unordered_map<uint32_t, VersionBuilderUPtr> builders_;
- std::unordered_map<std::string, ColumnFamilyOptions> name_to_options_;
- const bool track_found_and_missing_files_;
- // Keeps track of column families in manifest that were not found in
- // column families parameters. Namely, the user asks to not open these column
- // families. In non read only mode, if those column families are not dropped
- // by subsequent manifest records, Recover() will return failure status.
- std::unordered_map<uint32_t, std::string> do_not_open_column_families_;
- VersionEditParams version_edit_params_;
- bool no_error_if_files_missing_;
- std::shared_ptr<IOTracer> io_tracer_;
- bool skip_load_table_files_;
- bool initialized_;
- std::unique_ptr<std::unordered_map<uint32_t, std::string>> cf_to_cmp_names_;
- // If false, only a complete Version for which all files consisting it can be
- // found is considered a valid Version. If true, besides complete Version, an
- // incomplete Version with only a suffix of L0 files missing is also
- // considered valid if the Version is never edited in an atomic group.
- const bool allow_incomplete_valid_version_;
- EpochNumberRequirement epoch_number_requirement_;
- std::unordered_set<uint32_t> cfds_to_mark_no_udt_;
- private:
- Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd,
- const VersionEdit& edit);
- // When `FileMetaData.user_defined_timestamps_persisted` is false and
- // user-defined timestamp size is non-zero. User-defined timestamps are
- // stripped from file boundaries: `smallest`, `largest` in
- // `VersionEdit.DecodeFrom` before they were written to Manifest.
- // This is the mirroring change to handle file boundaries on the Manifest read
- // path for this scenario: to pad a minimum timestamp to the user key in
- // `smallest` and `largest` so their format are consistent with the running
- // user comparator.
- Status MaybeHandleFileBoundariesForNewFiles(VersionEdit& edit,
- const ColumnFamilyData* cfd);
- };
- // A class similar to its base class, i.e. VersionEditHandler.
- // Unlike VersionEditHandler that only aims to build the end version, this class
- // supports building the most recent point in time version. A point in time
- // version is a version for which no files are missing, or if
- // `allow_incomplete_valid_version` is true, only a suffix of L0 files (and
- // their associated blob files) are missing.
- //
- // Building a point in time version when end version is not available can
- // be useful for best efforts recovery (options.best_efforts_recovery), which
- // uses this class and sets `allow_incomplete_valid_version` to true.
- // It's also useful for secondary instances/follower instances for which end
- // version could be transiently unavailable. These two cases use subclass
- // `ManifestTailer` and sets `allow_incomplete_valid_version` to false.
- //
- // Not thread-safe, external synchronization is necessary if an object of
- // VersionEditHandlerPointInTime is shared by multiple threads.
- class VersionEditHandlerPointInTime : public VersionEditHandler {
- public:
- VersionEditHandlerPointInTime(
- bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
- VersionSet* version_set, const std::shared_ptr<IOTracer>& io_tracer,
- const ReadOptions& read_options, bool allow_incomplete_valid_version,
- EpochNumberRequirement epoch_number_requirement =
- EpochNumberRequirement::kMustPresent);
- ~VersionEditHandlerPointInTime() override;
- bool HasMissingFiles() const;
- virtual Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath,
- int level, const FileMetaData& fmeta) override;
- virtual Status VerifyBlobFile(ColumnFamilyData* cfd, uint64_t blob_file_num,
- const BlobFileAddition& blob_addition) override;
- protected:
- Status OnAtomicGroupReplayBegin() override;
- Status OnAtomicGroupReplayEnd() override;
- void CheckIterationResult(const log::Reader& reader, Status* s) override;
- ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit) override;
- // `MaybeCreateVersionBeforeApplyEdit(..., false)` creates a version upon a
- // negative edge trigger (transition from valid to invalid).
- //
- // `MaybeCreateVersionBeforeApplyEdit(..., true)` creates a version on a
- // positive level trigger (state is valid).
- Status MaybeCreateVersionBeforeApplyEdit(const VersionEdit& edit,
- ColumnFamilyData* cfd,
- bool force_create_version) override;
- Status LoadTables(ColumnFamilyData* cfd,
- bool prefetch_index_and_filter_in_cache,
- bool is_initial_load) override;
- std::unordered_map<uint32_t, Version*> versions_;
- // `atomic_update_versions_` is for ensuring all-or-nothing AtomicGroup
- // recoveries. When `atomic_update_versions_` is nonempty, it serves as a
- // barrier to updating `versions_` until all its values are populated.
- std::unordered_map<uint32_t, Version*> atomic_update_versions_;
- // `atomic_update_versions_missing_` counts the nullptr values in
- // `atomic_update_versions_`.
- size_t atomic_update_versions_missing_;
- bool in_atomic_group_ = false;
- private:
- bool AtomicUpdateVersionsCompleted();
- bool AtomicUpdateVersionsContains(uint32_t cfid);
- void AtomicUpdateVersionsDropCf(uint32_t cfid);
- // This function is called for `Version*` updates for column families in an
- // incomplete atomic update. It buffers `Version*` updates in
- // `atomic_update_versions_`.
- void AtomicUpdateVersionsPut(Version* version);
- // This function is called upon completion of an atomic update. It applies
- // `Version*` updates in `atomic_update_versions_` to `versions_`.
- void AtomicUpdateVersionsApply();
- };
- // A class similar to `VersionEditHandlerPointInTime` that parse MANIFEST and
- // builds point in time version.
- // `ManifestTailer` supports reading one MANIFEST file in multiple tailing
- // attempts and supports switching to a different MANIFEST after
- // `PrepareToReadNewManifest` is called. This class is used by secondary and
- // follower instance.
- class ManifestTailer : public VersionEditHandlerPointInTime {
- public:
- explicit ManifestTailer(std::vector<ColumnFamilyDescriptor> column_families,
- VersionSet* version_set,
- const std::shared_ptr<IOTracer>& io_tracer,
- const ReadOptions& read_options,
- EpochNumberRequirement epoch_number_requirement =
- EpochNumberRequirement::kMustPresent)
- : VersionEditHandlerPointInTime(
- /*read_only=*/true, column_families, version_set, io_tracer,
- read_options,
- /*allow_incomplete_valid_version=*/false, epoch_number_requirement),
- mode_(Mode::kRecovery) {}
- Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath, int level,
- const FileMetaData& fmeta) override;
- void PrepareToReadNewManifest() {
- initialized_ = false;
- ClearReadBuffer();
- }
- std::unordered_set<ColumnFamilyData*>& GetUpdatedColumnFamilies() {
- return cfds_changed_;
- }
- std::vector<std::string> GetAndClearIntermediateFiles();
- protected:
- Status Initialize() override;
- bool MustOpenAllColumnFamilies() const override { return false; }
- Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override;
- Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd) override;
- void CheckIterationResult(const log::Reader& reader, Status* s) override;
- enum Mode : uint8_t {
- kRecovery = 0,
- kCatchUp = 1,
- };
- Mode mode_;
- std::unordered_set<ColumnFamilyData*> cfds_changed_;
- };
- class DumpManifestHandler : public VersionEditHandler {
- public:
- DumpManifestHandler(std::vector<ColumnFamilyDescriptor> column_families,
- VersionSet* version_set,
- const std::shared_ptr<IOTracer>& io_tracer,
- const ReadOptions& read_options, bool verbose, bool hex,
- bool json)
- : VersionEditHandler(
- /*read_only=*/true, column_families, version_set,
- /*track_found_and_missing_files=*/false,
- /*no_error_if_files_missing=*/false, io_tracer, read_options,
- /*skip_load_table_files=*/true,
- /*allow_incomplete_valid_version=*/false,
- /*epoch_number_requirement=*/EpochNumberRequirement::kMustPresent),
- verbose_(verbose),
- hex_(hex),
- json_(json),
- count_(0) {
- cf_to_cmp_names_.reset(new std::unordered_map<uint32_t, std::string>());
- }
- ~DumpManifestHandler() override {}
- Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override {
- // Write out each individual edit
- if (json_) {
- // Print out DebugStrings. Can include non-terminating null characters.
- std::string edit_dump_str = edit.DebugJSON(count_, hex_);
- fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
- fwrite("\n", sizeof(char), 1, stdout);
- } else if (verbose_) {
- // Print out DebugStrings. Can include non-terminating null characters.
- std::string edit_dump_str = edit.DebugString(hex_);
- fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
- }
- ++count_;
- return VersionEditHandler::ApplyVersionEdit(edit, cfd);
- }
- void CheckIterationResult(const log::Reader& reader, Status* s) override;
- private:
- const bool verbose_;
- const bool hex_;
- const bool json_;
- int count_;
- };
- } // namespace ROCKSDB_NAMESPACE
|