| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #pragma once
- #include <algorithm>
- #include <set>
- #include <string>
- #include <utility>
- #include <vector>
- #include "db/dbformat.h"
- #include "memory/arena.h"
- #include "rocksdb/cache.h"
- #include "table/table_reader.h"
- #include "util/autovector.h"
- namespace ROCKSDB_NAMESPACE {
- class VersionSet;
- constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF;
- constexpr uint64_t kInvalidBlobFileNumber = 0;
- constexpr uint64_t kUnknownOldestAncesterTime = 0;
- constexpr uint64_t kUnknownFileCreationTime = 0;
- extern const std::string kUnknownFileChecksum;
- extern const std::string kUnknownFileChecksumFuncName;
- extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);
- // A copyable structure contains information needed to read data from an SST
- // file. It can contain a pointer to a table reader opened for the file, or
- // file number and size, which can be used to create a new table reader for it.
- // The behavior is undefined when a copied of the structure is used when the
- // file is not in any live version any more.
- struct FileDescriptor {
- // Table reader in table_reader_handle
- TableReader* table_reader;
- uint64_t packed_number_and_path_id;
- uint64_t file_size; // File size in bytes
- SequenceNumber smallest_seqno; // The smallest seqno in this file
- SequenceNumber largest_seqno; // The largest seqno in this file
- FileDescriptor() : FileDescriptor(0, 0, 0) {}
- FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size)
- : FileDescriptor(number, path_id, _file_size, kMaxSequenceNumber, 0) {}
- FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size,
- SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno)
- : table_reader(nullptr),
- packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)),
- file_size(_file_size),
- smallest_seqno(_smallest_seqno),
- largest_seqno(_largest_seqno) {}
- FileDescriptor(const FileDescriptor& fd) { *this = fd; }
- FileDescriptor& operator=(const FileDescriptor& fd) {
- table_reader = fd.table_reader;
- packed_number_and_path_id = fd.packed_number_and_path_id;
- file_size = fd.file_size;
- smallest_seqno = fd.smallest_seqno;
- largest_seqno = fd.largest_seqno;
- return *this;
- }
- uint64_t GetNumber() const {
- return packed_number_and_path_id & kFileNumberMask;
- }
- uint32_t GetPathId() const {
- return static_cast<uint32_t>(
- packed_number_and_path_id / (kFileNumberMask + 1));
- }
- uint64_t GetFileSize() const { return file_size; }
- };
- struct FileSampledStats {
- FileSampledStats() : num_reads_sampled(0) {}
- FileSampledStats(const FileSampledStats& other) { *this = other; }
- FileSampledStats& operator=(const FileSampledStats& other) {
- num_reads_sampled = other.num_reads_sampled.load();
- return *this;
- }
- // number of user reads to this file.
- mutable std::atomic<uint64_t> num_reads_sampled;
- };
- struct FileMetaData {
- FileDescriptor fd;
- InternalKey smallest; // Smallest internal key served by table
- InternalKey largest; // Largest internal key served by table
- // Needs to be disposed when refs becomes 0.
- Cache::Handle* table_reader_handle = nullptr;
- FileSampledStats stats;
- // Stats for compensating deletion entries during compaction
- // File size compensated by deletion entry.
- // This is updated in Version::UpdateAccumulatedStats() first time when the
- // file is created or loaded. After it is updated (!= 0), it is immutable.
- uint64_t compensated_file_size = 0;
- // These values can mutate, but they can only be read or written from
- // single-threaded LogAndApply thread
- uint64_t num_entries = 0; // the number of entries.
- uint64_t num_deletions = 0; // the number of deletion entries.
- uint64_t raw_key_size = 0; // total uncompressed key size.
- uint64_t raw_value_size = 0; // total uncompressed value size.
- int refs = 0; // Reference count
- bool being_compacted = false; // Is this file undergoing compaction?
- bool init_stats_from_file = false; // true if the data-entry stats of this
- // file has initialized from file.
- bool marked_for_compaction = false; // True if client asked us nicely to
- // compact this file.
- // Used only in BlobDB. The file number of the oldest blob file this SST file
- // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1.
- uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;
- // The file could be the compaction output from other SST files, which could
- // in turn be outputs for compact older SST files. We track the memtable
- // flush timestamp for the oldest SST file that eventaully contribute data
- // to this file. 0 means the information is not available.
- uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;
- // Unix time when the SST file is created.
- uint64_t file_creation_time = kUnknownFileCreationTime;
- // File checksum
- std::string file_checksum = kUnknownFileChecksum;
- // File checksum function name
- std::string file_checksum_func_name = kUnknownFileChecksumFuncName;
- FileMetaData() = default;
- FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,
- const InternalKey& smallest_key, const InternalKey& largest_key,
- const SequenceNumber& smallest_seq,
- const SequenceNumber& largest_seq, bool marked_for_compact,
- uint64_t oldest_blob_file, uint64_t _oldest_ancester_time,
- uint64_t _file_creation_time, const std::string& _file_checksum,
- const std::string& _file_checksum_func_name)
- : fd(file, file_path_id, file_size, smallest_seq, largest_seq),
- smallest(smallest_key),
- largest(largest_key),
- marked_for_compaction(marked_for_compact),
- oldest_blob_file_number(oldest_blob_file),
- oldest_ancester_time(_oldest_ancester_time),
- file_creation_time(_file_creation_time),
- file_checksum(_file_checksum),
- file_checksum_func_name(_file_checksum_func_name) {
- TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this);
- }
- // REQUIRED: Keys must be given to the function in sorted order (it expects
- // the last key to be the largest).
- void UpdateBoundaries(const Slice& key, const Slice& value,
- SequenceNumber seqno, ValueType value_type);
- // Unlike UpdateBoundaries, ranges do not need to be presented in any
- // particular order.
- void UpdateBoundariesForRange(const InternalKey& start,
- const InternalKey& end, SequenceNumber seqno,
- const InternalKeyComparator& icmp) {
- if (smallest.size() == 0 || icmp.Compare(start, smallest) < 0) {
- smallest = start;
- }
- if (largest.size() == 0 || icmp.Compare(largest, end) < 0) {
- largest = end;
- }
- fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);
- fd.largest_seqno = std::max(fd.largest_seqno, seqno);
- }
- // Try to get oldest ancester time from the class itself or table properties
- // if table reader is already pinned.
- // 0 means the information is not available.
- uint64_t TryGetOldestAncesterTime() {
- if (oldest_ancester_time != kUnknownOldestAncesterTime) {
- return oldest_ancester_time;
- } else if (fd.table_reader != nullptr &&
- fd.table_reader->GetTableProperties() != nullptr) {
- return fd.table_reader->GetTableProperties()->creation_time;
- }
- return kUnknownOldestAncesterTime;
- }
- uint64_t TryGetFileCreationTime() {
- if (file_creation_time != kUnknownFileCreationTime) {
- return file_creation_time;
- } else if (fd.table_reader != nullptr &&
- fd.table_reader->GetTableProperties() != nullptr) {
- return fd.table_reader->GetTableProperties()->file_creation_time;
- }
- return kUnknownFileCreationTime;
- }
- };
- // A compressed copy of file meta data that just contain minimum data needed
- // to server read operations, while still keeping the pointer to full metadata
- // of the file in case it is needed.
- struct FdWithKeyRange {
- FileDescriptor fd;
- FileMetaData* file_metadata; // Point to all metadata
- Slice smallest_key; // slice that contain smallest key
- Slice largest_key; // slice that contain largest key
- FdWithKeyRange()
- : fd(),
- file_metadata(nullptr),
- smallest_key(),
- largest_key() {
- }
- FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key,
- FileMetaData* _file_metadata)
- : fd(_fd),
- file_metadata(_file_metadata),
- smallest_key(_smallest_key),
- largest_key(_largest_key) {}
- };
- // Data structure to store an array of FdWithKeyRange in one level
- // Actual data is guaranteed to be stored closely
- struct LevelFilesBrief {
- size_t num_files;
- FdWithKeyRange* files;
- LevelFilesBrief() {
- num_files = 0;
- files = nullptr;
- }
- };
- // The state of a DB at any given time is referred to as a Version.
- // Any modification to the Version is considered a Version Edit. A Version is
- // constructed by joining a sequence of Version Edits. Version Edits are written
- // to the MANIFEST file.
- class VersionEdit {
- public:
- void Clear();
- void SetDBId(const std::string& db_id) {
- has_db_id_ = true;
- db_id_ = db_id;
- }
- bool HasDbId() const { return has_db_id_; }
- const std::string& GetDbId() const { return db_id_; }
- void SetComparatorName(const Slice& name) {
- has_comparator_ = true;
- comparator_ = name.ToString();
- }
- bool HasComparatorName() const { return has_comparator_; }
- const std::string& GetComparatorName() const { return comparator_; }
- void SetLogNumber(uint64_t num) {
- has_log_number_ = true;
- log_number_ = num;
- }
- bool HasLogNumber() const { return has_log_number_; }
- uint64_t GetLogNumber() const { return log_number_; }
- void SetPrevLogNumber(uint64_t num) {
- has_prev_log_number_ = true;
- prev_log_number_ = num;
- }
- bool HasPrevLogNumber() const { return has_prev_log_number_; }
- uint64_t GetPrevLogNumber() const { return prev_log_number_; }
- void SetNextFile(uint64_t num) {
- has_next_file_number_ = true;
- next_file_number_ = num;
- }
- bool HasNextFile() const { return has_next_file_number_; }
- uint64_t GetNextFile() const { return next_file_number_; }
- void SetMaxColumnFamily(uint32_t max_column_family) {
- has_max_column_family_ = true;
- max_column_family_ = max_column_family;
- }
- bool HasMaxColumnFamily() const { return has_max_column_family_; }
- uint32_t GetMaxColumnFamily() const { return max_column_family_; }
- void SetMinLogNumberToKeep(uint64_t num) {
- has_min_log_number_to_keep_ = true;
- min_log_number_to_keep_ = num;
- }
- bool HasMinLogNumberToKeep() const { return has_min_log_number_to_keep_; }
- uint64_t GetMinLogNumberToKeep() const { return min_log_number_to_keep_; }
- void SetLastSequence(SequenceNumber seq) {
- has_last_sequence_ = true;
- last_sequence_ = seq;
- }
- bool HasLastSequence() const { return has_last_sequence_; }
- SequenceNumber GetLastSequence() const { return last_sequence_; }
- // Delete the specified "file" from the specified "level".
- void DeleteFile(int level, uint64_t file) {
- deleted_files_.emplace(level, file);
- }
- // Retrieve the files deleted as well as their associated levels.
- using DeletedFiles = std::set<std::pair<int, uint64_t>>;
- const DeletedFiles& GetDeletedFiles() const { return deleted_files_; }
- // Add the specified file at the specified level.
- // REQUIRES: This version has not been saved (see VersionSet::SaveTo)
- // REQUIRES: "smallest" and "largest" are smallest and largest keys in file
- // REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file
- // referred to by this file if any, kInvalidBlobFileNumber otherwise.
- void AddFile(int level, uint64_t file, uint32_t file_path_id,
- uint64_t file_size, const InternalKey& smallest,
- const InternalKey& largest, const SequenceNumber& smallest_seqno,
- const SequenceNumber& largest_seqno, bool marked_for_compaction,
- uint64_t oldest_blob_file_number, uint64_t oldest_ancester_time,
- uint64_t file_creation_time, const std::string& file_checksum,
- const std::string& file_checksum_func_name) {
- assert(smallest_seqno <= largest_seqno);
- new_files_.emplace_back(
- level, FileMetaData(file, file_path_id, file_size, smallest, largest,
- smallest_seqno, largest_seqno,
- marked_for_compaction, oldest_blob_file_number,
- oldest_ancester_time, file_creation_time,
- file_checksum, file_checksum_func_name));
- }
- void AddFile(int level, const FileMetaData& f) {
- assert(f.fd.smallest_seqno <= f.fd.largest_seqno);
- new_files_.emplace_back(level, f);
- }
- // Retrieve the files added as well as their associated levels.
- using NewFiles = std::vector<std::pair<int, FileMetaData>>;
- const NewFiles& GetNewFiles() const { return new_files_; }
- // Number of edits
- size_t NumEntries() const { return new_files_.size() + deleted_files_.size(); }
- void SetColumnFamily(uint32_t column_family_id) {
- column_family_ = column_family_id;
- }
- uint32_t GetColumnFamily() const { return column_family_; }
- // set column family ID by calling SetColumnFamily()
- void AddColumnFamily(const std::string& name) {
- assert(!is_column_family_drop_);
- assert(!is_column_family_add_);
- assert(NumEntries() == 0);
- is_column_family_add_ = true;
- column_family_name_ = name;
- }
- // set column family ID by calling SetColumnFamily()
- void DropColumnFamily() {
- assert(!is_column_family_drop_);
- assert(!is_column_family_add_);
- assert(NumEntries() == 0);
- is_column_family_drop_ = true;
- }
- bool IsColumnFamilyManipulation() const {
- return is_column_family_add_ || is_column_family_drop_;
- }
- void MarkAtomicGroup(uint32_t remaining_entries) {
- is_in_atomic_group_ = true;
- remaining_entries_ = remaining_entries;
- }
- bool IsInAtomicGroup() const { return is_in_atomic_group_; }
- uint32_t GetRemainingEntries() const { return remaining_entries_; }
- // return true on success.
- bool EncodeTo(std::string* dst) const;
- Status DecodeFrom(const Slice& src);
- std::string DebugString(bool hex_key = false) const;
- std::string DebugJSON(int edit_num, bool hex_key = false) const;
- private:
- friend class ReactiveVersionSet;
- friend class VersionSet;
- friend class Version;
- friend class AtomicGroupReadBuffer;
- bool GetLevel(Slice* input, int* level, const char** msg);
- const char* DecodeNewFile4From(Slice* input);
- int max_level_ = 0;
- std::string db_id_;
- std::string comparator_;
- uint64_t log_number_ = 0;
- uint64_t prev_log_number_ = 0;
- uint64_t next_file_number_ = 0;
- uint32_t max_column_family_ = 0;
- // The most recent WAL log number that is deleted
- uint64_t min_log_number_to_keep_ = 0;
- SequenceNumber last_sequence_ = 0;
- bool has_db_id_ = false;
- bool has_comparator_ = false;
- bool has_log_number_ = false;
- bool has_prev_log_number_ = false;
- bool has_next_file_number_ = false;
- bool has_max_column_family_ = false;
- bool has_min_log_number_to_keep_ = false;
- bool has_last_sequence_ = false;
- DeletedFiles deleted_files_;
- NewFiles new_files_;
- // Each version edit record should have column_family_ set
- // If it's not set, it is default (0)
- uint32_t column_family_ = 0;
- // a version edit can be either column_family add or
- // column_family drop. If it's column family add,
- // it also includes column family name.
- bool is_column_family_drop_ = false;
- bool is_column_family_add_ = false;
- std::string column_family_name_;
- bool is_in_atomic_group_ = false;
- uint32_t remaining_entries_ = 0;
- };
- } // namespace ROCKSDB_NAMESPACE
|