| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438 | //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.//  This source code is licensed under both the GPLv2 (found in the//  COPYING file in the root directory) and Apache 2.0 License//  (found in the LICENSE.Apache file in the root directory).//// Copyright (c) 2011 The LevelDB Authors. All rights reserved.// Use of this source code is governed by a BSD-style license that can be// found in the LICENSE file. See the AUTHORS file for names of contributors.#pragma once#include <algorithm>#include <set>#include <string>#include <utility>#include <vector>#include "db/dbformat.h"#include "memory/arena.h"#include "rocksdb/cache.h"#include "table/table_reader.h"#include "util/autovector.h"namespace ROCKSDB_NAMESPACE {class VersionSet;constexpr uint64_t kFileNumberMask = 0x3FFFFFFFFFFFFFFF;constexpr uint64_t kInvalidBlobFileNumber = 0;constexpr uint64_t kUnknownOldestAncesterTime = 0;constexpr uint64_t kUnknownFileCreationTime = 0;extern const std::string kUnknownFileChecksum;extern const std::string kUnknownFileChecksumFuncName;extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);// A copyable structure contains information needed to read data from an SST// file. It can contain a pointer to a table reader opened for the file, or// file number and size, which can be used to create a new table reader for it.// The behavior is undefined when a copied of the structure is used when the// file is not in any live version any more.struct FileDescriptor {  // Table reader in table_reader_handle  TableReader* table_reader;  uint64_t packed_number_and_path_id;  uint64_t file_size;  // File size in bytes  SequenceNumber smallest_seqno;  // The smallest seqno in this file  SequenceNumber largest_seqno;   // The largest seqno in this file  FileDescriptor() : FileDescriptor(0, 0, 0) {}  FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size)      : FileDescriptor(number, path_id, _file_size, kMaxSequenceNumber, 0) {}  FileDescriptor(uint64_t number, uint32_t path_id, uint64_t _file_size,                 SequenceNumber _smallest_seqno, SequenceNumber _largest_seqno)      : table_reader(nullptr),        packed_number_and_path_id(PackFileNumberAndPathId(number, path_id)),        file_size(_file_size),        smallest_seqno(_smallest_seqno),        largest_seqno(_largest_seqno) {}  FileDescriptor(const FileDescriptor& fd) { *this = fd; }  FileDescriptor& operator=(const FileDescriptor& fd) {    table_reader = fd.table_reader;    packed_number_and_path_id = fd.packed_number_and_path_id;    file_size = fd.file_size;    smallest_seqno = fd.smallest_seqno;    largest_seqno = fd.largest_seqno;    return *this;  }  uint64_t GetNumber() const {    return packed_number_and_path_id & kFileNumberMask;  }  uint32_t GetPathId() const {    return static_cast<uint32_t>(        packed_number_and_path_id / (kFileNumberMask + 1));  }  uint64_t GetFileSize() const { return file_size; }};struct FileSampledStats {  FileSampledStats() : num_reads_sampled(0) {}  FileSampledStats(const FileSampledStats& other) { *this = other; }  FileSampledStats& operator=(const FileSampledStats& other) {    num_reads_sampled = other.num_reads_sampled.load();    return *this;  }  // number of user reads to this file.  mutable std::atomic<uint64_t> num_reads_sampled;};struct FileMetaData {  FileDescriptor fd;  InternalKey smallest;            // Smallest internal key served by table  InternalKey largest;             // Largest internal key served by table  // Needs to be disposed when refs becomes 0.  Cache::Handle* table_reader_handle = nullptr;  FileSampledStats stats;  // Stats for compensating deletion entries during compaction  // File size compensated by deletion entry.  // This is updated in Version::UpdateAccumulatedStats() first time when the  // file is created or loaded.  After it is updated (!= 0), it is immutable.  uint64_t compensated_file_size = 0;  // These values can mutate, but they can only be read or written from  // single-threaded LogAndApply thread  uint64_t num_entries = 0;     // the number of entries.  uint64_t num_deletions = 0;   // the number of deletion entries.  uint64_t raw_key_size = 0;    // total uncompressed key size.  uint64_t raw_value_size = 0;  // total uncompressed value size.  int refs = 0;  // Reference count  bool being_compacted = false;       // Is this file undergoing compaction?  bool init_stats_from_file = false;  // true if the data-entry stats of this                                      // file has initialized from file.  bool marked_for_compaction = false;  // True if client asked us nicely to                                       // compact this file.  // Used only in BlobDB. The file number of the oldest blob file this SST file  // refers to. 0 is an invalid value; BlobDB numbers the files starting from 1.  uint64_t oldest_blob_file_number = kInvalidBlobFileNumber;  // The file could be the compaction output from other SST files, which could  // in turn be outputs for compact older SST files. We track the memtable  // flush timestamp for the oldest SST file that eventaully contribute data  // to this file. 0 means the information is not available.  uint64_t oldest_ancester_time = kUnknownOldestAncesterTime;  // Unix time when the SST file is created.  uint64_t file_creation_time = kUnknownFileCreationTime;  // File checksum  std::string file_checksum = kUnknownFileChecksum;  // File checksum function name  std::string file_checksum_func_name = kUnknownFileChecksumFuncName;  FileMetaData() = default;  FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,               const InternalKey& smallest_key, const InternalKey& largest_key,               const SequenceNumber& smallest_seq,               const SequenceNumber& largest_seq, bool marked_for_compact,               uint64_t oldest_blob_file, uint64_t _oldest_ancester_time,               uint64_t _file_creation_time, const std::string& _file_checksum,               const std::string& _file_checksum_func_name)      : fd(file, file_path_id, file_size, smallest_seq, largest_seq),        smallest(smallest_key),        largest(largest_key),        marked_for_compaction(marked_for_compact),        oldest_blob_file_number(oldest_blob_file),        oldest_ancester_time(_oldest_ancester_time),        file_creation_time(_file_creation_time),        file_checksum(_file_checksum),        file_checksum_func_name(_file_checksum_func_name) {    TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this);  }  // REQUIRED: Keys must be given to the function in sorted order (it expects  // the last key to be the largest).  void UpdateBoundaries(const Slice& key, const Slice& value,                        SequenceNumber seqno, ValueType value_type);  // Unlike UpdateBoundaries, ranges do not need to be presented in any  // particular order.  void UpdateBoundariesForRange(const InternalKey& start,                                const InternalKey& end, SequenceNumber seqno,                                const InternalKeyComparator& icmp) {    if (smallest.size() == 0 || icmp.Compare(start, smallest) < 0) {      smallest = start;    }    if (largest.size() == 0 || icmp.Compare(largest, end) < 0) {      largest = end;    }    fd.smallest_seqno = std::min(fd.smallest_seqno, seqno);    fd.largest_seqno = std::max(fd.largest_seqno, seqno);  }  // Try to get oldest ancester time from the class itself or table properties  // if table reader is already pinned.  // 0 means the information is not available.  uint64_t TryGetOldestAncesterTime() {    if (oldest_ancester_time != kUnknownOldestAncesterTime) {      return oldest_ancester_time;    } else if (fd.table_reader != nullptr &&               fd.table_reader->GetTableProperties() != nullptr) {      return fd.table_reader->GetTableProperties()->creation_time;    }    return kUnknownOldestAncesterTime;  }  uint64_t TryGetFileCreationTime() {    if (file_creation_time != kUnknownFileCreationTime) {      return file_creation_time;    } else if (fd.table_reader != nullptr &&               fd.table_reader->GetTableProperties() != nullptr) {      return fd.table_reader->GetTableProperties()->file_creation_time;    }    return kUnknownFileCreationTime;  }};// A compressed copy of file meta data that just contain minimum data needed// to server read operations, while still keeping the pointer to full metadata// of the file in case it is needed.struct FdWithKeyRange {  FileDescriptor fd;  FileMetaData* file_metadata;  // Point to all metadata  Slice smallest_key;    // slice that contain smallest key  Slice largest_key;     // slice that contain largest key  FdWithKeyRange()      : fd(),        file_metadata(nullptr),        smallest_key(),        largest_key() {  }  FdWithKeyRange(FileDescriptor _fd, Slice _smallest_key, Slice _largest_key,                 FileMetaData* _file_metadata)      : fd(_fd),        file_metadata(_file_metadata),        smallest_key(_smallest_key),        largest_key(_largest_key) {}};// Data structure to store an array of FdWithKeyRange in one level// Actual data is guaranteed to be stored closelystruct LevelFilesBrief {  size_t num_files;  FdWithKeyRange* files;  LevelFilesBrief() {    num_files = 0;    files = nullptr;  }};// The state of a DB at any given time is referred to as a Version.// Any modification to the Version is considered a Version Edit. A Version is// constructed by joining a sequence of Version Edits. Version Edits are written// to the MANIFEST file.class VersionEdit { public:  void Clear();  void SetDBId(const std::string& db_id) {    has_db_id_ = true;    db_id_ = db_id;  }  bool HasDbId() const { return has_db_id_; }  const std::string& GetDbId() const { return db_id_; }  void SetComparatorName(const Slice& name) {    has_comparator_ = true;    comparator_ = name.ToString();  }  bool HasComparatorName() const { return has_comparator_; }  const std::string& GetComparatorName() const { return comparator_; }  void SetLogNumber(uint64_t num) {    has_log_number_ = true;    log_number_ = num;  }  bool HasLogNumber() const { return has_log_number_; }  uint64_t GetLogNumber() const { return log_number_; }  void SetPrevLogNumber(uint64_t num) {    has_prev_log_number_ = true;    prev_log_number_ = num;  }  bool HasPrevLogNumber() const { return has_prev_log_number_; }  uint64_t GetPrevLogNumber() const { return prev_log_number_; }  void SetNextFile(uint64_t num) {    has_next_file_number_ = true;    next_file_number_ = num;  }  bool HasNextFile() const { return has_next_file_number_; }  uint64_t GetNextFile() const { return next_file_number_; }  void SetMaxColumnFamily(uint32_t max_column_family) {    has_max_column_family_ = true;    max_column_family_ = max_column_family;  }  bool HasMaxColumnFamily() const { return has_max_column_family_; }  uint32_t GetMaxColumnFamily() const { return max_column_family_; }  void SetMinLogNumberToKeep(uint64_t num) {    has_min_log_number_to_keep_ = true;    min_log_number_to_keep_ = num;  }  bool HasMinLogNumberToKeep() const { return has_min_log_number_to_keep_; }  uint64_t GetMinLogNumberToKeep() const { return min_log_number_to_keep_; }  void SetLastSequence(SequenceNumber seq) {    has_last_sequence_ = true;    last_sequence_ = seq;  }  bool HasLastSequence() const { return has_last_sequence_; }  SequenceNumber GetLastSequence() const { return last_sequence_; }  // Delete the specified "file" from the specified "level".  void DeleteFile(int level, uint64_t file) {    deleted_files_.emplace(level, file);  }  // Retrieve the files deleted as well as their associated levels.  using DeletedFiles = std::set<std::pair<int, uint64_t>>;  const DeletedFiles& GetDeletedFiles() const { return deleted_files_; }  // Add the specified file at the specified level.  // REQUIRES: This version has not been saved (see VersionSet::SaveTo)  // REQUIRES: "smallest" and "largest" are smallest and largest keys in file  // REQUIRES: "oldest_blob_file_number" is the number of the oldest blob file  // referred to by this file if any, kInvalidBlobFileNumber otherwise.  void AddFile(int level, uint64_t file, uint32_t file_path_id,               uint64_t file_size, const InternalKey& smallest,               const InternalKey& largest, const SequenceNumber& smallest_seqno,               const SequenceNumber& largest_seqno, bool marked_for_compaction,               uint64_t oldest_blob_file_number, uint64_t oldest_ancester_time,               uint64_t file_creation_time, const std::string& file_checksum,               const std::string& file_checksum_func_name) {    assert(smallest_seqno <= largest_seqno);    new_files_.emplace_back(        level, FileMetaData(file, file_path_id, file_size, smallest, largest,                            smallest_seqno, largest_seqno,                            marked_for_compaction, oldest_blob_file_number,                            oldest_ancester_time, file_creation_time,                            file_checksum, file_checksum_func_name));  }  void AddFile(int level, const FileMetaData& f) {    assert(f.fd.smallest_seqno <= f.fd.largest_seqno);    new_files_.emplace_back(level, f);  }  // Retrieve the files added as well as their associated levels.  using NewFiles = std::vector<std::pair<int, FileMetaData>>;  const NewFiles& GetNewFiles() const { return new_files_; }  // Number of edits  size_t NumEntries() const { return new_files_.size() + deleted_files_.size(); }  void SetColumnFamily(uint32_t column_family_id) {    column_family_ = column_family_id;  }  uint32_t GetColumnFamily() const { return column_family_; }  // set column family ID by calling SetColumnFamily()  void AddColumnFamily(const std::string& name) {    assert(!is_column_family_drop_);    assert(!is_column_family_add_);    assert(NumEntries() == 0);    is_column_family_add_ = true;    column_family_name_ = name;  }  // set column family ID by calling SetColumnFamily()  void DropColumnFamily() {    assert(!is_column_family_drop_);    assert(!is_column_family_add_);    assert(NumEntries() == 0);    is_column_family_drop_ = true;  }  bool IsColumnFamilyManipulation() const {    return is_column_family_add_ || is_column_family_drop_;  }  void MarkAtomicGroup(uint32_t remaining_entries) {    is_in_atomic_group_ = true;    remaining_entries_ = remaining_entries;  }  bool IsInAtomicGroup() const { return is_in_atomic_group_; }  uint32_t GetRemainingEntries() const { return remaining_entries_; }  // return true on success.  bool EncodeTo(std::string* dst) const;  Status DecodeFrom(const Slice& src);  std::string DebugString(bool hex_key = false) const;  std::string DebugJSON(int edit_num, bool hex_key = false) const; private:  friend class ReactiveVersionSet;  friend class VersionSet;  friend class Version;  friend class AtomicGroupReadBuffer;  bool GetLevel(Slice* input, int* level, const char** msg);  const char* DecodeNewFile4From(Slice* input);  int max_level_ = 0;  std::string db_id_;  std::string comparator_;  uint64_t log_number_ = 0;  uint64_t prev_log_number_ = 0;  uint64_t next_file_number_ = 0;  uint32_t max_column_family_ = 0;  // The most recent WAL log number that is deleted  uint64_t min_log_number_to_keep_ = 0;  SequenceNumber last_sequence_ = 0;  bool has_db_id_ = false;  bool has_comparator_ = false;  bool has_log_number_ = false;  bool has_prev_log_number_ = false;  bool has_next_file_number_ = false;  bool has_max_column_family_ = false;  bool has_min_log_number_to_keep_ = false;  bool has_last_sequence_ = false;  DeletedFiles deleted_files_;  NewFiles new_files_;  // Each version edit record should have column_family_ set  // If it's not set, it is default (0)  uint32_t column_family_ = 0;  // a version edit can be either column_family add or  // column_family drop. If it's column family add,  // it also includes column family name.  bool is_column_family_drop_ = false;  bool is_column_family_add_ = false;  std::string column_family_name_;  bool is_in_atomic_group_ = false;  uint32_t remaining_entries_ = 0;};}  // namespace ROCKSDB_NAMESPACE
 |