| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #include "db/compaction/compaction_picker_level.h"
- #include <string>
- #include <utility>
- #include <vector>
- #include "db/version_edit.h"
- #include "logging/log_buffer.h"
- #include "test_util/sync_point.h"
- namespace ROCKSDB_NAMESPACE {
- bool LevelCompactionPicker::NeedsCompaction(
- const VersionStorageInfo* vstorage) const {
- if (!vstorage->ExpiredTtlFiles().empty()) {
- return true;
- }
- if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) {
- return true;
- }
- if (!vstorage->BottommostFilesMarkedForCompaction().empty()) {
- return true;
- }
- if (!vstorage->FilesMarkedForCompaction().empty()) {
- return true;
- }
- if (!vstorage->FilesMarkedForForcedBlobGC().empty()) {
- return true;
- }
- for (int i = 0; i <= vstorage->MaxInputLevel(); i++) {
- if (vstorage->CompactionScore(i) >= 1) {
- return true;
- }
- }
- return false;
- }
- namespace {
- enum class CompactToNextLevel {
- kNo, // compact to the same level as the input file
- kYes, // compact to the next level except the last level to the same level
- kSkipLastLevel, // compact to the next level but skip the last level
- };
- // A class to build a leveled compaction step-by-step.
- class LevelCompactionBuilder {
- public:
- LevelCompactionBuilder(const std::string& cf_name,
- VersionStorageInfo* vstorage,
- CompactionPicker* compaction_picker,
- LogBuffer* log_buffer,
- const MutableCFOptions& mutable_cf_options,
- const ImmutableOptions& ioptions,
- const MutableDBOptions& mutable_db_options)
- : cf_name_(cf_name),
- vstorage_(vstorage),
- compaction_picker_(compaction_picker),
- log_buffer_(log_buffer),
- mutable_cf_options_(mutable_cf_options),
- ioptions_(ioptions),
- mutable_db_options_(mutable_db_options) {}
- // Pick and return a compaction.
- Compaction* PickCompaction();
- // Pick the initial files to compact to the next level. (or together
- // in Intra-L0 compactions)
- void SetupInitialFiles();
- // If the initial files are from L0 level, pick other L0
- // files if needed.
- bool SetupOtherL0FilesIfNeeded();
- // Compaction with round-robin compaction priority allows more files to be
- // picked to form a large compaction
- void SetupOtherFilesWithRoundRobinExpansion();
- // Based on initial files, setup other files need to be compacted
- // in this compaction, accordingly.
- bool SetupOtherInputsIfNeeded();
- Compaction* GetCompaction();
- // From `start_level_`, pick files to compact to `output_level_`.
- // Returns false if there is no file to compact.
- // If it returns true, inputs->files.size() will be exactly one for
- // all compaction priorities except round-robin. For round-robin,
- // multiple consecutive files may be put into inputs->files.
- // If level is 0 and there is already a compaction on that level, this
- // function will return false.
- bool PickFileToCompact();
- // Return true if a L0 trivial move is picked up.
- bool TryPickL0TrivialMove();
- // For L0->L0, picks the longest span of files that aren't currently
- // undergoing compaction for which work-per-deleted-file decreases. The span
- // always starts from the newest L0 file.
- //
- // Intra-L0 compaction is independent of all other files, so it can be
- // performed even when L0->base_level compactions are blocked.
- //
- // Returns true if `inputs` is populated with a span of files to be compacted;
- // otherwise, returns false.
- bool PickIntraL0Compaction();
- // When total L0 size is small compared to Lbase, try to pick intra-L0
- // compaction starting from the newest L0 file. This helps to prevent
- // L0->Lbase compaction with large write-amp.
- //
- // Returns true iff an intra-L0 compaction is picked.
- // `start_level_inputs_` and `output_level_` will be updated accordingly if
- // a compaction is picked.
- bool PickSizeBasedIntraL0Compaction();
- // Return true if TrivialMove is extended. `start_index` is the index of
- // the initial file picked, which should already be in `start_level_inputs_`.
- bool TryExtendNonL0TrivialMove(int start_index,
- bool only_expand_right = false);
- // Picks a file from level_files to compact.
- // level_files is a vector of (level, file metadata) in ascending order of
- // level. If compact_to_next_level is true, compact the file to the next
- // level, otherwise, compact to the same level as the input file.
- // If skip_last_level is true, skip the last level.
- void PickFileToCompact(
- const autovector<std::pair<int, FileMetaData*>>& level_files,
- CompactToNextLevel compact_to_next_level);
- const std::string& cf_name_;
- VersionStorageInfo* vstorage_;
- CompactionPicker* compaction_picker_;
- LogBuffer* log_buffer_;
- int start_level_ = -1;
- int output_level_ = -1;
- int parent_index_ = -1;
- int base_index_ = -1;
- double start_level_score_ = 0;
- bool is_l0_trivial_move_ = false;
- CompactionInputFiles start_level_inputs_;
- std::vector<CompactionInputFiles> compaction_inputs_;
- CompactionInputFiles output_level_inputs_;
- std::vector<FileMetaData*> grandparents_;
- CompactionReason compaction_reason_ = CompactionReason::kUnknown;
- const MutableCFOptions& mutable_cf_options_;
- const ImmutableOptions& ioptions_;
- const MutableDBOptions& mutable_db_options_;
- // Pick a path ID to place a newly generated file, with its level
- static uint32_t GetPathId(const ImmutableCFOptions& ioptions,
- const MutableCFOptions& mutable_cf_options,
- int level);
- static const int kMinFilesForIntraL0Compaction = 4;
- };
- void LevelCompactionBuilder::PickFileToCompact(
- const autovector<std::pair<int, FileMetaData*>>& level_files,
- CompactToNextLevel compact_to_next_level) {
- for (auto& level_file : level_files) {
- // If it's being compacted it has nothing to do here.
- // If this assert() fails that means that some function marked some
- // files as being_compacted, but didn't call ComputeCompactionScore()
- assert(!level_file.second->being_compacted);
- start_level_ = level_file.first;
- if ((compact_to_next_level == CompactToNextLevel::kSkipLastLevel &&
- start_level_ == vstorage_->num_non_empty_levels() - 1) ||
- (start_level_ == 0 &&
- !compaction_picker_->level0_compactions_in_progress()->empty())) {
- continue;
- }
- // Compact to the next level only if the file is not in the last level and
- // compact_to_next_level is kYes or kSkipLastLevel.
- if (compact_to_next_level != CompactToNextLevel::kNo &&
- (start_level_ < vstorage_->num_non_empty_levels() - 1)) {
- output_level_ =
- (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1;
- } else {
- output_level_ = start_level_;
- }
- start_level_inputs_.files = {level_file.second};
- start_level_inputs_.level = start_level_;
- if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
- &start_level_inputs_)) {
- return;
- }
- }
- start_level_inputs_.files.clear();
- }
- void LevelCompactionBuilder::SetupInitialFiles() {
- // Find the compactions by size on all levels.
- bool skipped_l0_to_base = false;
- for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) {
- start_level_score_ = vstorage_->CompactionScore(i);
- start_level_ = vstorage_->CompactionScoreLevel(i);
- assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1));
- if (start_level_score_ >= 1) {
- if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) {
- // If L0->base_level compaction is pending, don't schedule further
- // compaction from base level. Otherwise L0->base_level compaction
- // may starve.
- continue;
- }
- output_level_ =
- (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1;
- bool picked_file_to_compact = PickFileToCompact();
- TEST_SYNC_POINT_CALLBACK("PostPickFileToCompact",
- &picked_file_to_compact);
- if (picked_file_to_compact) {
- // found the compaction!
- if (start_level_ == 0) {
- // L0 score = `num L0 files` / `level0_file_num_compaction_trigger`
- compaction_reason_ = CompactionReason::kLevelL0FilesNum;
- } else {
- // L1+ score = `Level files size` / `MaxBytesForLevel`
- compaction_reason_ = CompactionReason::kLevelMaxLevelSize;
- }
- break;
- } else {
- // didn't find the compaction, clear the inputs
- start_level_inputs_.clear();
- if (start_level_ == 0) {
- skipped_l0_to_base = true;
- // L0->base_level may be blocked due to ongoing L0->base_level
- // compactions. It may also be blocked by an ongoing compaction from
- // base_level downwards.
- //
- // In these cases, to reduce L0 file count and thus reduce likelihood
- // of write stalls, we can attempt compacting a span of files within
- // L0.
- if (PickIntraL0Compaction()) {
- output_level_ = 0;
- compaction_reason_ = CompactionReason::kLevelL0FilesNum;
- break;
- }
- }
- }
- } else {
- // Compaction scores are sorted in descending order, no further scores
- // will be >= 1.
- break;
- }
- }
- if (!start_level_inputs_.empty()) {
- return;
- }
- // if we didn't find a compaction, check if there are any files marked for
- // compaction
- parent_index_ = base_index_ = -1;
- compaction_picker_->PickFilesMarkedForCompaction(
- cf_name_, vstorage_, &start_level_, &output_level_, &start_level_inputs_,
- /*skip_marked_file*/ [](const FileMetaData* /* file */) {
- return false;
- });
- if (!start_level_inputs_.empty()) {
- compaction_reason_ = CompactionReason::kFilesMarkedForCompaction;
- return;
- }
- // Bottommost Files Compaction on deleting tombstones
- PickFileToCompact(vstorage_->BottommostFilesMarkedForCompaction(),
- CompactToNextLevel::kNo);
- if (!start_level_inputs_.empty()) {
- compaction_reason_ = CompactionReason::kBottommostFiles;
- return;
- }
- // TTL Compaction
- if (ioptions_.compaction_pri == kRoundRobin &&
- !vstorage_->ExpiredTtlFiles().empty()) {
- auto expired_files = vstorage_->ExpiredTtlFiles();
- // the expired files list should already be sorted by level
- start_level_ = expired_files.front().first;
- #ifndef NDEBUG
- for (const auto& file : expired_files) {
- assert(start_level_ <= file.first);
- }
- #endif
- if (start_level_ > 0) {
- output_level_ = start_level_ + 1;
- if (PickFileToCompact()) {
- compaction_reason_ = CompactionReason::kRoundRobinTtl;
- return;
- }
- }
- }
- PickFileToCompact(vstorage_->ExpiredTtlFiles(),
- CompactToNextLevel::kSkipLastLevel);
- if (!start_level_inputs_.empty()) {
- compaction_reason_ = CompactionReason::kTtl;
- return;
- }
- // Periodic Compaction
- PickFileToCompact(vstorage_->FilesMarkedForPeriodicCompaction(),
- ioptions_.level_compaction_dynamic_level_bytes
- ? CompactToNextLevel::kYes
- : CompactToNextLevel::kNo);
- if (!start_level_inputs_.empty()) {
- compaction_reason_ = CompactionReason::kPeriodicCompaction;
- return;
- }
- // Forced blob garbage collection
- PickFileToCompact(vstorage_->FilesMarkedForForcedBlobGC(),
- CompactToNextLevel::kNo);
- if (!start_level_inputs_.empty()) {
- compaction_reason_ = CompactionReason::kForcedBlobGC;
- return;
- }
- }
- bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() {
- if (start_level_ == 0 && output_level_ != 0 && !is_l0_trivial_move_) {
- return compaction_picker_->GetOverlappingL0Files(
- vstorage_, &start_level_inputs_, output_level_, &parent_index_);
- }
- return true;
- }
- void LevelCompactionBuilder::SetupOtherFilesWithRoundRobinExpansion() {
- // We only expand when the start level is not L0 under round robin
- assert(start_level_ >= 1);
- // For round-robin compaction priority, we have 3 constraints when picking
- // multiple files.
- // Constraint 1: We can only pick consecutive files
- // -> Constraint 1a: When a file is being compacted (or some input files
- // are being compacted after expanding, we cannot
- // choose it and have to stop choosing more files
- // -> Constraint 1b: When we reach the last file (with largest keys), we
- // cannot choose more files (the next file will be the
- // first one)
- // Constraint 2: We should ensure the total compaction bytes (including the
- // overlapped files from the next level) is no more than
- // mutable_cf_options_.max_compaction_bytes
- // Constraint 3: We try our best to pick as many files as possible so that
- // the post-compaction level size is less than
- // MaxBytesForLevel(start_level_)
- // Constraint 4: We do not expand if it is possible to apply a trivial move
- // Constraint 5 (TODO): Try to pick minimal files to split into the target
- // number of subcompactions
- TEST_SYNC_POINT("LevelCompactionPicker::RoundRobin");
- // Only expand the inputs when we have selected a file in start_level_inputs_
- if (start_level_inputs_.size() == 0) {
- return;
- }
- uint64_t start_lvl_bytes_no_compacting = 0;
- uint64_t curr_bytes_to_compact = 0;
- uint64_t start_lvl_max_bytes_to_compact = 0;
- const std::vector<FileMetaData*>& level_files =
- vstorage_->LevelFiles(start_level_);
- // Constraint 3 (pre-calculate the ideal max bytes to compact)
- for (auto f : level_files) {
- if (!f->being_compacted) {
- start_lvl_bytes_no_compacting += f->fd.GetFileSize();
- }
- }
- if (start_lvl_bytes_no_compacting >
- vstorage_->MaxBytesForLevel(start_level_)) {
- start_lvl_max_bytes_to_compact = start_lvl_bytes_no_compacting -
- vstorage_->MaxBytesForLevel(start_level_);
- }
- size_t start_index = vstorage_->FilesByCompactionPri(start_level_)[0];
- InternalKey smallest, largest;
- // Constraint 4 (No need to check again later)
- compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest);
- CompactionInputFiles output_level_inputs;
- output_level_inputs.level = output_level_;
- vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
- &output_level_inputs.files);
- if (output_level_inputs.empty()) {
- if (TryExtendNonL0TrivialMove((int)start_index,
- true /* only_expand_right */)) {
- return;
- }
- }
- // Constraint 3
- if (start_level_inputs_[0]->fd.GetFileSize() >=
- start_lvl_max_bytes_to_compact) {
- return;
- }
- CompactionInputFiles tmp_start_level_inputs;
- tmp_start_level_inputs = start_level_inputs_;
- // TODO (zichen): Future parallel round-robin may also need to update this
- // Constraint 1b (only expand till the end)
- for (size_t i = start_index + 1; i < level_files.size(); i++) {
- auto* f = level_files[i];
- if (f->being_compacted) {
- // Constraint 1a
- return;
- }
- tmp_start_level_inputs.files.push_back(f);
- if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
- &tmp_start_level_inputs) ||
- compaction_picker_->FilesRangeOverlapWithCompaction(
- {tmp_start_level_inputs}, output_level_,
- Compaction::EvaluateProximalLevel(vstorage_, mutable_cf_options_,
- ioptions_, start_level_,
- output_level_))) {
- // Constraint 1a
- tmp_start_level_inputs.clear();
- return;
- }
- curr_bytes_to_compact = 0;
- for (auto start_lvl_f : tmp_start_level_inputs.files) {
- curr_bytes_to_compact += start_lvl_f->fd.GetFileSize();
- }
- // Check whether any output level files are locked
- compaction_picker_->GetRange(tmp_start_level_inputs, &smallest, &largest);
- vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
- &output_level_inputs.files);
- if (!output_level_inputs.empty() &&
- !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
- &output_level_inputs)) {
- // Constraint 1a
- tmp_start_level_inputs.clear();
- return;
- }
- uint64_t start_lvl_curr_bytes_to_compact = curr_bytes_to_compact;
- for (auto output_lvl_f : output_level_inputs.files) {
- curr_bytes_to_compact += output_lvl_f->fd.GetFileSize();
- }
- if (curr_bytes_to_compact > mutable_cf_options_.max_compaction_bytes) {
- // Constraint 2
- tmp_start_level_inputs.clear();
- return;
- }
- start_level_inputs_.files = tmp_start_level_inputs.files;
- // Constraint 3
- if (start_lvl_curr_bytes_to_compact > start_lvl_max_bytes_to_compact) {
- return;
- }
- }
- }
- bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() {
- // Setup input files from output level. For output to L0, we only compact
- // spans of files that do not interact with any pending compactions, so don't
- // need to consider other levels.
- if (output_level_ != 0) {
- output_level_inputs_.level = output_level_;
- bool round_robin_expanding =
- ioptions_.compaction_pri == kRoundRobin &&
- compaction_reason_ == CompactionReason::kLevelMaxLevelSize;
- if (round_robin_expanding) {
- SetupOtherFilesWithRoundRobinExpansion();
- }
- if (!is_l0_trivial_move_ &&
- !compaction_picker_->SetupOtherInputs(
- cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_,
- &output_level_inputs_, &parent_index_, base_index_,
- round_robin_expanding)) {
- return false;
- }
- compaction_inputs_.push_back(start_level_inputs_);
- if (!output_level_inputs_.empty()) {
- compaction_inputs_.push_back(output_level_inputs_);
- }
- // In some edge cases we could pick a compaction that will be compacting
- // a key range that overlap with another running compaction, and both
- // of them have the same output level. This could happen if
- // (1) we are running a non-exclusive manual compaction
- // (2) AddFile ingest a new file into the LSM tree
- // We need to disallow this from happening.
- if (compaction_picker_->FilesRangeOverlapWithCompaction(
- compaction_inputs_, output_level_,
- Compaction::EvaluateProximalLevel(vstorage_, mutable_cf_options_,
- ioptions_, start_level_,
- output_level_))) {
- // This compaction output could potentially conflict with the output
- // of a currently running compaction, we cannot run it.
- return false;
- }
- if (!is_l0_trivial_move_) {
- compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_,
- output_level_inputs_, &grandparents_);
- }
- } else {
- compaction_inputs_.push_back(start_level_inputs_);
- }
- return true;
- }
- Compaction* LevelCompactionBuilder::PickCompaction() {
- // Pick up the first file to start compaction. It may have been extended
- // to a clean cut.
- SetupInitialFiles();
- if (start_level_inputs_.empty()) {
- return nullptr;
- }
- assert(start_level_ >= 0 && output_level_ >= 0);
- // If it is a L0 -> base level compaction, we need to set up other L0
- // files if needed.
- if (!SetupOtherL0FilesIfNeeded()) {
- return nullptr;
- }
- // Pick files in the output level and expand more files in the start level
- // if needed.
- if (!SetupOtherInputsIfNeeded()) {
- return nullptr;
- }
- // Form a compaction object containing the files we picked.
- Compaction* c = GetCompaction();
- TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c);
- return c;
- }
- Compaction* LevelCompactionBuilder::GetCompaction() {
- // TryPickL0TrivialMove() does not apply to the case when compacting L0 to an
- // empty output level. So L0 files is picked in PickFileToCompact() by
- // compaction score. We may still be able to do trivial move when this file
- // does not overlap with other L0s. This happens when
- // compaction_inputs_[0].size() == 1 since SetupOtherL0FilesIfNeeded() did not
- // pull in more L0s.
- assert(!compaction_inputs_.empty());
- bool l0_files_might_overlap =
- start_level_ == 0 && !is_l0_trivial_move_ &&
- (compaction_inputs_.size() > 1 || compaction_inputs_[0].size() > 1);
- auto c = new Compaction(
- vstorage_, ioptions_, mutable_cf_options_, mutable_db_options_,
- std::move(compaction_inputs_), output_level_,
- MaxFileSizeForLevel(mutable_cf_options_, output_level_,
- ioptions_.compaction_style, vstorage_->base_level(),
- ioptions_.level_compaction_dynamic_level_bytes),
- mutable_cf_options_.max_compaction_bytes,
- GetPathId(ioptions_, mutable_cf_options_, output_level_),
- GetCompressionType(vstorage_, mutable_cf_options_, output_level_,
- vstorage_->base_level()),
- GetCompressionOptions(mutable_cf_options_, vstorage_, output_level_),
- Temperature::kUnknown,
- /* max_subcompactions */ 0, std::move(grandparents_),
- /* earliest_snapshot */ std::nullopt, /* snapshot_checker */ nullptr,
- compaction_reason_,
- /* trim_ts */ "", start_level_score_, l0_files_might_overlap);
- // If it's level 0 compaction, make sure we don't execute any other level 0
- // compactions in parallel
- compaction_picker_->RegisterCompaction(c);
- // Creating a compaction influences the compaction score because the score
- // takes running compactions into account (by skipping files that are already
- // being compacted). Since we just changed compaction score, we recalculate it
- // here
- vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_);
- return c;
- }
- /*
- * Find the optimal path to place a file
- * Given a level, finds the path where levels up to it will fit in levels
- * up to and including this path
- */
- uint32_t LevelCompactionBuilder::GetPathId(
- const ImmutableCFOptions& ioptions,
- const MutableCFOptions& mutable_cf_options, int level) {
- uint32_t p = 0;
- assert(!ioptions.cf_paths.empty());
- // size remaining in the most recent path
- uint64_t current_path_size = ioptions.cf_paths[0].target_size;
- uint64_t level_size;
- int cur_level = 0;
- // max_bytes_for_level_base denotes L1 size.
- // We estimate L0 size to be the same as L1.
- level_size = mutable_cf_options.max_bytes_for_level_base;
- // Last path is the fallback
- while (p < ioptions.cf_paths.size() - 1) {
- if (level_size <= current_path_size) {
- if (cur_level == level) {
- // Does desired level fit in this path?
- return p;
- } else {
- current_path_size -= level_size;
- if (cur_level > 0) {
- if (ioptions.level_compaction_dynamic_level_bytes) {
- // Currently, level_compaction_dynamic_level_bytes is ignored when
- // multiple db paths are specified. https://github.com/facebook/
- // rocksdb/blob/main/db/column_family.cc.
- // Still, adding this check to avoid accidentally using
- // max_bytes_for_level_multiplier_additional
- level_size = static_cast<uint64_t>(
- level_size * mutable_cf_options.max_bytes_for_level_multiplier);
- } else {
- level_size = static_cast<uint64_t>(
- level_size * mutable_cf_options.max_bytes_for_level_multiplier *
- mutable_cf_options.MaxBytesMultiplerAdditional(cur_level));
- }
- }
- cur_level++;
- continue;
- }
- }
- p++;
- current_path_size = ioptions.cf_paths[p].target_size;
- }
- return p;
- }
- bool LevelCompactionBuilder::TryPickL0TrivialMove() {
- if (vstorage_->base_level() <= 0) {
- return false;
- }
- if (start_level_ == 0 && mutable_cf_options_.compression_per_level.empty() &&
- !vstorage_->LevelFiles(output_level_).empty() &&
- ioptions_.db_paths.size() <= 1) {
- // Try to pick trivial move from L0 to L1. We start from the oldest
- // file. We keep expanding to newer files if it would form a
- // trivial move.
- // For now we don't support it with
- // mutable_cf_options_.compression_per_level to prevent the logic
- // of determining whether L0 can be trivial moved to the next level.
- // We skip the case where output level is empty, since in this case, at
- // least the oldest file would qualify for trivial move, and this would
- // be a surprising behavior with few benefits.
- // We search from the oldest file from the newest. In theory, there are
- // files in the middle can form trivial move too, but it is probably
- // uncommon and we ignore these cases for simplicity.
- const std::vector<FileMetaData*>& level_files =
- vstorage_->LevelFiles(start_level_);
- InternalKey my_smallest, my_largest;
- for (auto it = level_files.rbegin(); it != level_files.rend(); ++it) {
- CompactionInputFiles output_level_inputs;
- output_level_inputs.level = output_level_;
- FileMetaData* file = *it;
- if (it == level_files.rbegin()) {
- my_smallest = file->smallest;
- my_largest = file->largest;
- } else {
- if (compaction_picker_->icmp()->Compare(file->largest, my_smallest) <
- 0) {
- my_smallest = file->smallest;
- } else if (compaction_picker_->icmp()->Compare(file->smallest,
- my_largest) > 0) {
- my_largest = file->largest;
- } else {
- break;
- }
- }
- vstorage_->GetOverlappingInputs(output_level_, &my_smallest, &my_largest,
- &output_level_inputs.files);
- if (output_level_inputs.empty()) {
- assert(!file->being_compacted);
- start_level_inputs_.files.push_back(file);
- } else {
- break;
- }
- }
- }
- if (!start_level_inputs_.empty()) {
- // Sort files by key range. Not sure it's 100% necessary but it's cleaner
- // to always keep files sorted by key the key ranges don't overlap.
- std::sort(start_level_inputs_.files.begin(),
- start_level_inputs_.files.end(),
- [icmp = compaction_picker_->icmp()](FileMetaData* f1,
- FileMetaData* f2) -> bool {
- return (icmp->Compare(f1->smallest, f2->smallest) < 0);
- });
- is_l0_trivial_move_ = true;
- return true;
- }
- return false;
- }
- bool LevelCompactionBuilder::TryExtendNonL0TrivialMove(int start_index,
- bool only_expand_right) {
- if (start_level_inputs_.size() == 1 &&
- (ioptions_.db_paths.empty() || ioptions_.db_paths.size() == 1) &&
- (mutable_cf_options_.compression_per_level.empty())) {
- // Only file of `index`, and it is likely a trivial move. Try to
- // expand if it is still a trivial move, but not beyond
- // max_compaction_bytes or 4 files, so that we don't create too
- // much compaction pressure for the next level.
- // Ignore if there are more than one DB path, as it would be hard
- // to predict whether it is a trivial move.
- const std::vector<FileMetaData*>& level_files =
- vstorage_->LevelFiles(start_level_);
- const size_t kMaxMultiTrivialMove = 4;
- FileMetaData* initial_file = start_level_inputs_.files[0];
- size_t total_size = initial_file->fd.GetFileSize();
- CompactionInputFiles output_level_inputs;
- output_level_inputs.level = output_level_;
- // Expand towards right
- for (int i = start_index + 1;
- i < static_cast<int>(level_files.size()) &&
- start_level_inputs_.size() < kMaxMultiTrivialMove;
- i++) {
- FileMetaData* next_file = level_files[i];
- if (next_file->being_compacted) {
- break;
- }
- vstorage_->GetOverlappingInputs(output_level_, &(initial_file->smallest),
- &(next_file->largest),
- &output_level_inputs.files);
- if (!output_level_inputs.empty()) {
- break;
- }
- if (i < static_cast<int>(level_files.size()) - 1 &&
- compaction_picker_->icmp()
- ->user_comparator()
- ->CompareWithoutTimestamp(
- next_file->largest.user_key(),
- level_files[i + 1]->smallest.user_key()) == 0) {
- TEST_SYNC_POINT_CALLBACK(
- "LevelCompactionBuilder::TryExtendNonL0TrivialMove:NoCleanCut",
- nullptr);
- // Not a clean up after adding the next file. Skip.
- break;
- }
- total_size += next_file->fd.GetFileSize();
- if (total_size > mutable_cf_options_.max_compaction_bytes) {
- break;
- }
- start_level_inputs_.files.push_back(next_file);
- }
- // Expand towards left
- if (!only_expand_right) {
- for (int i = start_index - 1;
- i >= 0 && start_level_inputs_.size() < kMaxMultiTrivialMove; i--) {
- FileMetaData* next_file = level_files[i];
- if (next_file->being_compacted) {
- break;
- }
- vstorage_->GetOverlappingInputs(output_level_, &(next_file->smallest),
- &(initial_file->largest),
- &output_level_inputs.files);
- if (!output_level_inputs.empty()) {
- break;
- }
- if (i > 0 && compaction_picker_->icmp()
- ->user_comparator()
- ->CompareWithoutTimestamp(
- next_file->smallest.user_key(),
- level_files[i - 1]->largest.user_key()) == 0) {
- // Not a clean up after adding the next file. Skip.
- break;
- }
- total_size += next_file->fd.GetFileSize();
- if (total_size > mutable_cf_options_.max_compaction_bytes) {
- break;
- }
- // keep `files` sorted in increasing order by key range
- start_level_inputs_.files.insert(start_level_inputs_.files.begin(),
- next_file);
- }
- }
- return start_level_inputs_.size() > 1;
- }
- return false;
- }
- bool LevelCompactionBuilder::PickFileToCompact() {
- // level 0 files are overlapping. So we cannot pick more
- // than one concurrent compactions at this level. This
- // could be made better by looking at key-ranges that are
- // being compacted at level 0.
- if (start_level_ == 0 &&
- !compaction_picker_->level0_compactions_in_progress()->empty()) {
- if (PickSizeBasedIntraL0Compaction()) {
- return true;
- }
- TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0");
- return false;
- }
- start_level_inputs_.clear();
- start_level_inputs_.level = start_level_;
- assert(start_level_ >= 0);
- if (TryPickL0TrivialMove()) {
- return true;
- }
- if (start_level_ == 0 && PickSizeBasedIntraL0Compaction()) {
- return true;
- }
- const std::vector<FileMetaData*>& level_files =
- vstorage_->LevelFiles(start_level_);
- // Pick the file with the highest score in this level that is not already
- // being compacted.
- const std::vector<int>& file_scores =
- vstorage_->FilesByCompactionPri(start_level_);
- unsigned int cmp_idx;
- for (cmp_idx = vstorage_->NextCompactionIndex(start_level_);
- cmp_idx < file_scores.size(); cmp_idx++) {
- int index = file_scores[cmp_idx];
- auto* f = level_files[index];
- // do not pick a file to compact if it is being compacted
- // from n-1 level.
- if (f->being_compacted) {
- if (ioptions_.compaction_pri == kRoundRobin) {
- // TODO(zichen): this file may be involved in one compaction from
- // an upper level, cannot advance the cursor for round-robin policy.
- // Currently, we do not pick any file to compact in this case. We
- // should fix this later to ensure a compaction is picked but the
- // cursor shall not be advanced.
- return false;
- }
- continue;
- }
- start_level_inputs_.files.push_back(f);
- if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
- &start_level_inputs_) ||
- compaction_picker_->FilesRangeOverlapWithCompaction(
- {start_level_inputs_}, output_level_,
- Compaction::EvaluateProximalLevel(vstorage_, mutable_cf_options_,
- ioptions_, start_level_,
- output_level_))) {
- // A locked (pending compaction) input-level file was pulled in due to
- // user-key overlap.
- start_level_inputs_.clear();
- if (ioptions_.compaction_pri == kRoundRobin) {
- return false;
- }
- continue;
- }
- // Now that input level is fully expanded, we check whether any output
- // files are locked due to pending compaction.
- //
- // Note we rely on ExpandInputsToCleanCut() to tell us whether any output-
- // level files are locked, not just the extra ones pulled in for user-key
- // overlap.
- InternalKey smallest, largest;
- compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest);
- CompactionInputFiles output_level_inputs;
- output_level_inputs.level = output_level_;
- vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
- &output_level_inputs.files);
- if (output_level_inputs.empty()) {
- if (start_level_ > 0 &&
- TryExtendNonL0TrivialMove(index,
- ioptions_.compaction_pri ==
- kRoundRobin /* only_expand_right */)) {
- break;
- }
- } else {
- if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
- &output_level_inputs)) {
- start_level_inputs_.clear();
- if (ioptions_.compaction_pri == kRoundRobin) {
- return false;
- }
- continue;
- }
- }
- base_index_ = index;
- break;
- }
- // store where to start the iteration in the next call to PickCompaction
- if (ioptions_.compaction_pri != kRoundRobin) {
- vstorage_->SetNextCompactionIndex(start_level_, cmp_idx);
- }
- return start_level_inputs_.size() > 0;
- }
- bool LevelCompactionBuilder::PickIntraL0Compaction() {
- start_level_inputs_.clear();
- const std::vector<FileMetaData*>& level_files =
- vstorage_->LevelFiles(0 /* level */);
- if (level_files.size() <
- static_cast<size_t>(
- mutable_cf_options_.level0_file_num_compaction_trigger + 2) ||
- level_files[0]->being_compacted) {
- // If L0 isn't accumulating much files beyond the regular trigger, don't
- // resort to L0->L0 compaction yet.
- return false;
- }
- return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction,
- std::numeric_limits<uint64_t>::max(),
- mutable_cf_options_.max_compaction_bytes,
- &start_level_inputs_);
- }
- bool LevelCompactionBuilder::PickSizeBasedIntraL0Compaction() {
- assert(start_level_ == 0);
- int base_level = vstorage_->base_level();
- if (base_level <= 0) {
- return false;
- }
- const std::vector<FileMetaData*>& l0_files =
- vstorage_->LevelFiles(/*level=*/0);
- size_t min_num_file =
- std::max(2, mutable_cf_options_.level0_file_num_compaction_trigger);
- if (l0_files.size() < min_num_file) {
- return false;
- }
- uint64_t l0_size = 0;
- for (const auto& file : l0_files) {
- assert(file->compensated_file_size >= file->fd.GetFileSize());
- // Compact down L0s with more deletions.
- l0_size += file->compensated_file_size;
- }
- // Avoid L0->Lbase compactions that are inefficient for write-amp.
- const double kMultiplier =
- std::max(10.0, mutable_cf_options_.max_bytes_for_level_multiplier) * 2;
- const uint64_t min_lbase_size = MultiplyCheckOverflow(l0_size, kMultiplier);
- assert(min_lbase_size >= l0_size);
- const std::vector<FileMetaData*>& lbase_files =
- vstorage_->LevelFiles(/*level=*/base_level);
- uint64_t lbase_size = 0;
- for (const auto& file : lbase_files) {
- lbase_size += file->fd.GetFileSize();
- if (lbase_size > min_lbase_size) {
- break;
- }
- }
- if (lbase_size <= min_lbase_size) {
- return false;
- }
- start_level_inputs_.clear();
- start_level_inputs_.level = 0;
- for (const auto& file : l0_files) {
- if (file->being_compacted) {
- break;
- }
- start_level_inputs_.files.push_back(file);
- }
- if (start_level_inputs_.files.size() < min_num_file) {
- start_level_inputs_.clear();
- return false;
- }
- output_level_ = 0;
- return true /* picked an intra-L0 compaction */;
- }
- } // namespace
- Compaction* LevelCompactionPicker::PickCompaction(
- const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
- const MutableDBOptions& mutable_db_options,
- const std::vector<SequenceNumber>& /*existing_snapshots */,
- const SnapshotChecker* /*snapshot_checker*/, VersionStorageInfo* vstorage,
- LogBuffer* log_buffer, bool /* require_max_output_level*/) {
- LevelCompactionBuilder builder(cf_name, vstorage, this, log_buffer,
- mutable_cf_options, ioptions_,
- mutable_db_options);
- return builder.PickCompaction();
- }
- } // namespace ROCKSDB_NAMESPACE
|