compaction_picker_level.cc 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <string>
  10. #include <utility>
  11. #include <vector>
  12. #include "db/compaction/compaction_picker_level.h"
  13. #include "logging/log_buffer.h"
  14. #include "test_util/sync_point.h"
  15. namespace ROCKSDB_NAMESPACE {
  16. bool LevelCompactionPicker::NeedsCompaction(
  17. const VersionStorageInfo* vstorage) const {
  18. if (!vstorage->ExpiredTtlFiles().empty()) {
  19. return true;
  20. }
  21. if (!vstorage->FilesMarkedForPeriodicCompaction().empty()) {
  22. return true;
  23. }
  24. if (!vstorage->BottommostFilesMarkedForCompaction().empty()) {
  25. return true;
  26. }
  27. if (!vstorage->FilesMarkedForCompaction().empty()) {
  28. return true;
  29. }
  30. for (int i = 0; i <= vstorage->MaxInputLevel(); i++) {
  31. if (vstorage->CompactionScore(i) >= 1) {
  32. return true;
  33. }
  34. }
  35. return false;
  36. }
  37. namespace {
  38. // A class to build a leveled compaction step-by-step.
  39. class LevelCompactionBuilder {
  40. public:
  41. LevelCompactionBuilder(const std::string& cf_name,
  42. VersionStorageInfo* vstorage,
  43. SequenceNumber earliest_mem_seqno,
  44. CompactionPicker* compaction_picker,
  45. LogBuffer* log_buffer,
  46. const MutableCFOptions& mutable_cf_options,
  47. const ImmutableCFOptions& ioptions)
  48. : cf_name_(cf_name),
  49. vstorage_(vstorage),
  50. earliest_mem_seqno_(earliest_mem_seqno),
  51. compaction_picker_(compaction_picker),
  52. log_buffer_(log_buffer),
  53. mutable_cf_options_(mutable_cf_options),
  54. ioptions_(ioptions) {}
  55. // Pick and return a compaction.
  56. Compaction* PickCompaction();
  57. // Pick the initial files to compact to the next level. (or together
  58. // in Intra-L0 compactions)
  59. void SetupInitialFiles();
  60. // If the initial files are from L0 level, pick other L0
  61. // files if needed.
  62. bool SetupOtherL0FilesIfNeeded();
  63. // Based on initial files, setup other files need to be compacted
  64. // in this compaction, accordingly.
  65. bool SetupOtherInputsIfNeeded();
  66. Compaction* GetCompaction();
  67. // For the specfied level, pick a file that we want to compact.
  68. // Returns false if there is no file to compact.
  69. // If it returns true, inputs->files.size() will be exactly one.
  70. // If level is 0 and there is already a compaction on that level, this
  71. // function will return false.
  72. bool PickFileToCompact();
  73. // For L0->L0, picks the longest span of files that aren't currently
  74. // undergoing compaction for which work-per-deleted-file decreases. The span
  75. // always starts from the newest L0 file.
  76. //
  77. // Intra-L0 compaction is independent of all other files, so it can be
  78. // performed even when L0->base_level compactions are blocked.
  79. //
  80. // Returns true if `inputs` is populated with a span of files to be compacted;
  81. // otherwise, returns false.
  82. bool PickIntraL0Compaction();
  83. void PickExpiredTtlFiles();
  84. void PickFilesMarkedForPeriodicCompaction();
  85. const std::string& cf_name_;
  86. VersionStorageInfo* vstorage_;
  87. SequenceNumber earliest_mem_seqno_;
  88. CompactionPicker* compaction_picker_;
  89. LogBuffer* log_buffer_;
  90. int start_level_ = -1;
  91. int output_level_ = -1;
  92. int parent_index_ = -1;
  93. int base_index_ = -1;
  94. double start_level_score_ = 0;
  95. bool is_manual_ = false;
  96. CompactionInputFiles start_level_inputs_;
  97. std::vector<CompactionInputFiles> compaction_inputs_;
  98. CompactionInputFiles output_level_inputs_;
  99. std::vector<FileMetaData*> grandparents_;
  100. CompactionReason compaction_reason_ = CompactionReason::kUnknown;
  101. const MutableCFOptions& mutable_cf_options_;
  102. const ImmutableCFOptions& ioptions_;
  103. // Pick a path ID to place a newly generated file, with its level
  104. static uint32_t GetPathId(const ImmutableCFOptions& ioptions,
  105. const MutableCFOptions& mutable_cf_options,
  106. int level);
  107. static const int kMinFilesForIntraL0Compaction = 4;
  108. };
  109. void LevelCompactionBuilder::PickExpiredTtlFiles() {
  110. if (vstorage_->ExpiredTtlFiles().empty()) {
  111. return;
  112. }
  113. auto continuation = [&](std::pair<int, FileMetaData*> level_file) {
  114. // If it's being compacted it has nothing to do here.
  115. // If this assert() fails that means that some function marked some
  116. // files as being_compacted, but didn't call ComputeCompactionScore()
  117. assert(!level_file.second->being_compacted);
  118. start_level_ = level_file.first;
  119. output_level_ =
  120. (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1;
  121. if ((start_level_ == vstorage_->num_non_empty_levels() - 1) ||
  122. (start_level_ == 0 &&
  123. !compaction_picker_->level0_compactions_in_progress()->empty())) {
  124. return false;
  125. }
  126. start_level_inputs_.files = {level_file.second};
  127. start_level_inputs_.level = start_level_;
  128. return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
  129. &start_level_inputs_);
  130. };
  131. for (auto& level_file : vstorage_->ExpiredTtlFiles()) {
  132. if (continuation(level_file)) {
  133. // found the compaction!
  134. return;
  135. }
  136. }
  137. start_level_inputs_.files.clear();
  138. }
  139. void LevelCompactionBuilder::PickFilesMarkedForPeriodicCompaction() {
  140. if (vstorage_->FilesMarkedForPeriodicCompaction().empty()) {
  141. return;
  142. }
  143. auto continuation = [&](std::pair<int, FileMetaData*> level_file) {
  144. // If it's being compacted it has nothing to do here.
  145. // If this assert() fails that means that some function marked some
  146. // files as being_compacted, but didn't call ComputeCompactionScore()
  147. assert(!level_file.second->being_compacted);
  148. output_level_ = start_level_ = level_file.first;
  149. if (start_level_ == 0 &&
  150. !compaction_picker_->level0_compactions_in_progress()->empty()) {
  151. return false;
  152. }
  153. start_level_inputs_.files = {level_file.second};
  154. start_level_inputs_.level = start_level_;
  155. return compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
  156. &start_level_inputs_);
  157. };
  158. for (auto& level_file : vstorage_->FilesMarkedForPeriodicCompaction()) {
  159. if (continuation(level_file)) {
  160. // found the compaction!
  161. return;
  162. }
  163. }
  164. start_level_inputs_.files.clear();
  165. }
  166. void LevelCompactionBuilder::SetupInitialFiles() {
  167. // Find the compactions by size on all levels.
  168. bool skipped_l0_to_base = false;
  169. for (int i = 0; i < compaction_picker_->NumberLevels() - 1; i++) {
  170. start_level_score_ = vstorage_->CompactionScore(i);
  171. start_level_ = vstorage_->CompactionScoreLevel(i);
  172. assert(i == 0 || start_level_score_ <= vstorage_->CompactionScore(i - 1));
  173. if (start_level_score_ >= 1) {
  174. if (skipped_l0_to_base && start_level_ == vstorage_->base_level()) {
  175. // If L0->base_level compaction is pending, don't schedule further
  176. // compaction from base level. Otherwise L0->base_level compaction
  177. // may starve.
  178. continue;
  179. }
  180. output_level_ =
  181. (start_level_ == 0) ? vstorage_->base_level() : start_level_ + 1;
  182. if (PickFileToCompact()) {
  183. // found the compaction!
  184. if (start_level_ == 0) {
  185. // L0 score = `num L0 files` / `level0_file_num_compaction_trigger`
  186. compaction_reason_ = CompactionReason::kLevelL0FilesNum;
  187. } else {
  188. // L1+ score = `Level files size` / `MaxBytesForLevel`
  189. compaction_reason_ = CompactionReason::kLevelMaxLevelSize;
  190. }
  191. break;
  192. } else {
  193. // didn't find the compaction, clear the inputs
  194. start_level_inputs_.clear();
  195. if (start_level_ == 0) {
  196. skipped_l0_to_base = true;
  197. // L0->base_level may be blocked due to ongoing L0->base_level
  198. // compactions. It may also be blocked by an ongoing compaction from
  199. // base_level downwards.
  200. //
  201. // In these cases, to reduce L0 file count and thus reduce likelihood
  202. // of write stalls, we can attempt compacting a span of files within
  203. // L0.
  204. if (PickIntraL0Compaction()) {
  205. output_level_ = 0;
  206. compaction_reason_ = CompactionReason::kLevelL0FilesNum;
  207. break;
  208. }
  209. }
  210. }
  211. }
  212. }
  213. // if we didn't find a compaction, check if there are any files marked for
  214. // compaction
  215. if (start_level_inputs_.empty()) {
  216. parent_index_ = base_index_ = -1;
  217. compaction_picker_->PickFilesMarkedForCompaction(
  218. cf_name_, vstorage_, &start_level_, &output_level_,
  219. &start_level_inputs_);
  220. if (!start_level_inputs_.empty()) {
  221. is_manual_ = true;
  222. compaction_reason_ = CompactionReason::kFilesMarkedForCompaction;
  223. return;
  224. }
  225. }
  226. // Bottommost Files Compaction on deleting tombstones
  227. if (start_level_inputs_.empty()) {
  228. size_t i;
  229. for (i = 0; i < vstorage_->BottommostFilesMarkedForCompaction().size();
  230. ++i) {
  231. auto& level_and_file = vstorage_->BottommostFilesMarkedForCompaction()[i];
  232. assert(!level_and_file.second->being_compacted);
  233. start_level_inputs_.level = output_level_ = start_level_ =
  234. level_and_file.first;
  235. start_level_inputs_.files = {level_and_file.second};
  236. if (compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
  237. &start_level_inputs_)) {
  238. break;
  239. }
  240. }
  241. if (i == vstorage_->BottommostFilesMarkedForCompaction().size()) {
  242. start_level_inputs_.clear();
  243. } else {
  244. assert(!start_level_inputs_.empty());
  245. compaction_reason_ = CompactionReason::kBottommostFiles;
  246. return;
  247. }
  248. }
  249. // TTL Compaction
  250. if (start_level_inputs_.empty()) {
  251. PickExpiredTtlFiles();
  252. if (!start_level_inputs_.empty()) {
  253. compaction_reason_ = CompactionReason::kTtl;
  254. return;
  255. }
  256. }
  257. // Periodic Compaction
  258. if (start_level_inputs_.empty()) {
  259. PickFilesMarkedForPeriodicCompaction();
  260. if (!start_level_inputs_.empty()) {
  261. compaction_reason_ = CompactionReason::kPeriodicCompaction;
  262. return;
  263. }
  264. }
  265. }
  266. bool LevelCompactionBuilder::SetupOtherL0FilesIfNeeded() {
  267. if (start_level_ == 0 && output_level_ != 0) {
  268. return compaction_picker_->GetOverlappingL0Files(
  269. vstorage_, &start_level_inputs_, output_level_, &parent_index_);
  270. }
  271. return true;
  272. }
  273. bool LevelCompactionBuilder::SetupOtherInputsIfNeeded() {
  274. // Setup input files from output level. For output to L0, we only compact
  275. // spans of files that do not interact with any pending compactions, so don't
  276. // need to consider other levels.
  277. if (output_level_ != 0) {
  278. output_level_inputs_.level = output_level_;
  279. if (!compaction_picker_->SetupOtherInputs(
  280. cf_name_, mutable_cf_options_, vstorage_, &start_level_inputs_,
  281. &output_level_inputs_, &parent_index_, base_index_)) {
  282. return false;
  283. }
  284. compaction_inputs_.push_back(start_level_inputs_);
  285. if (!output_level_inputs_.empty()) {
  286. compaction_inputs_.push_back(output_level_inputs_);
  287. }
  288. // In some edge cases we could pick a compaction that will be compacting
  289. // a key range that overlap with another running compaction, and both
  290. // of them have the same output level. This could happen if
  291. // (1) we are running a non-exclusive manual compaction
  292. // (2) AddFile ingest a new file into the LSM tree
  293. // We need to disallow this from happening.
  294. if (compaction_picker_->FilesRangeOverlapWithCompaction(compaction_inputs_,
  295. output_level_)) {
  296. // This compaction output could potentially conflict with the output
  297. // of a currently running compaction, we cannot run it.
  298. return false;
  299. }
  300. compaction_picker_->GetGrandparents(vstorage_, start_level_inputs_,
  301. output_level_inputs_, &grandparents_);
  302. } else {
  303. compaction_inputs_.push_back(start_level_inputs_);
  304. }
  305. return true;
  306. }
  307. Compaction* LevelCompactionBuilder::PickCompaction() {
  308. // Pick up the first file to start compaction. It may have been extended
  309. // to a clean cut.
  310. SetupInitialFiles();
  311. if (start_level_inputs_.empty()) {
  312. return nullptr;
  313. }
  314. assert(start_level_ >= 0 && output_level_ >= 0);
  315. // If it is a L0 -> base level compaction, we need to set up other L0
  316. // files if needed.
  317. if (!SetupOtherL0FilesIfNeeded()) {
  318. return nullptr;
  319. }
  320. // Pick files in the output level and expand more files in the start level
  321. // if needed.
  322. if (!SetupOtherInputsIfNeeded()) {
  323. return nullptr;
  324. }
  325. // Form a compaction object containing the files we picked.
  326. Compaction* c = GetCompaction();
  327. TEST_SYNC_POINT_CALLBACK("LevelCompactionPicker::PickCompaction:Return", c);
  328. return c;
  329. }
  330. Compaction* LevelCompactionBuilder::GetCompaction() {
  331. auto c = new Compaction(
  332. vstorage_, ioptions_, mutable_cf_options_, std::move(compaction_inputs_),
  333. output_level_,
  334. MaxFileSizeForLevel(mutable_cf_options_, output_level_,
  335. ioptions_.compaction_style, vstorage_->base_level(),
  336. ioptions_.level_compaction_dynamic_level_bytes),
  337. mutable_cf_options_.max_compaction_bytes,
  338. GetPathId(ioptions_, mutable_cf_options_, output_level_),
  339. GetCompressionType(ioptions_, vstorage_, mutable_cf_options_,
  340. output_level_, vstorage_->base_level()),
  341. GetCompressionOptions(ioptions_, vstorage_, output_level_),
  342. /* max_subcompactions */ 0, std::move(grandparents_), is_manual_,
  343. start_level_score_, false /* deletion_compaction */, compaction_reason_);
  344. // If it's level 0 compaction, make sure we don't execute any other level 0
  345. // compactions in parallel
  346. compaction_picker_->RegisterCompaction(c);
  347. // Creating a compaction influences the compaction score because the score
  348. // takes running compactions into account (by skipping files that are already
  349. // being compacted). Since we just changed compaction score, we recalculate it
  350. // here
  351. vstorage_->ComputeCompactionScore(ioptions_, mutable_cf_options_);
  352. return c;
  353. }
  354. /*
  355. * Find the optimal path to place a file
  356. * Given a level, finds the path where levels up to it will fit in levels
  357. * up to and including this path
  358. */
  359. uint32_t LevelCompactionBuilder::GetPathId(
  360. const ImmutableCFOptions& ioptions,
  361. const MutableCFOptions& mutable_cf_options, int level) {
  362. uint32_t p = 0;
  363. assert(!ioptions.cf_paths.empty());
  364. // size remaining in the most recent path
  365. uint64_t current_path_size = ioptions.cf_paths[0].target_size;
  366. uint64_t level_size;
  367. int cur_level = 0;
  368. // max_bytes_for_level_base denotes L1 size.
  369. // We estimate L0 size to be the same as L1.
  370. level_size = mutable_cf_options.max_bytes_for_level_base;
  371. // Last path is the fallback
  372. while (p < ioptions.cf_paths.size() - 1) {
  373. if (level_size <= current_path_size) {
  374. if (cur_level == level) {
  375. // Does desired level fit in this path?
  376. return p;
  377. } else {
  378. current_path_size -= level_size;
  379. if (cur_level > 0) {
  380. if (ioptions.level_compaction_dynamic_level_bytes) {
  381. // Currently, level_compaction_dynamic_level_bytes is ignored when
  382. // multiple db paths are specified. https://github.com/facebook/
  383. // rocksdb/blob/master/db/column_family.cc.
  384. // Still, adding this check to avoid accidentally using
  385. // max_bytes_for_level_multiplier_additional
  386. level_size = static_cast<uint64_t>(
  387. level_size * mutable_cf_options.max_bytes_for_level_multiplier);
  388. } else {
  389. level_size = static_cast<uint64_t>(
  390. level_size * mutable_cf_options.max_bytes_for_level_multiplier *
  391. mutable_cf_options.MaxBytesMultiplerAdditional(cur_level));
  392. }
  393. }
  394. cur_level++;
  395. continue;
  396. }
  397. }
  398. p++;
  399. current_path_size = ioptions.cf_paths[p].target_size;
  400. }
  401. return p;
  402. }
  403. bool LevelCompactionBuilder::PickFileToCompact() {
  404. // level 0 files are overlapping. So we cannot pick more
  405. // than one concurrent compactions at this level. This
  406. // could be made better by looking at key-ranges that are
  407. // being compacted at level 0.
  408. if (start_level_ == 0 &&
  409. !compaction_picker_->level0_compactions_in_progress()->empty()) {
  410. TEST_SYNC_POINT("LevelCompactionPicker::PickCompactionBySize:0");
  411. return false;
  412. }
  413. start_level_inputs_.clear();
  414. assert(start_level_ >= 0);
  415. // Pick the largest file in this level that is not already
  416. // being compacted
  417. const std::vector<int>& file_size =
  418. vstorage_->FilesByCompactionPri(start_level_);
  419. const std::vector<FileMetaData*>& level_files =
  420. vstorage_->LevelFiles(start_level_);
  421. unsigned int cmp_idx;
  422. for (cmp_idx = vstorage_->NextCompactionIndex(start_level_);
  423. cmp_idx < file_size.size(); cmp_idx++) {
  424. int index = file_size[cmp_idx];
  425. auto* f = level_files[index];
  426. // do not pick a file to compact if it is being compacted
  427. // from n-1 level.
  428. if (f->being_compacted) {
  429. continue;
  430. }
  431. start_level_inputs_.files.push_back(f);
  432. start_level_inputs_.level = start_level_;
  433. if (!compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
  434. &start_level_inputs_) ||
  435. compaction_picker_->FilesRangeOverlapWithCompaction(
  436. {start_level_inputs_}, output_level_)) {
  437. // A locked (pending compaction) input-level file was pulled in due to
  438. // user-key overlap.
  439. start_level_inputs_.clear();
  440. continue;
  441. }
  442. // Now that input level is fully expanded, we check whether any output files
  443. // are locked due to pending compaction.
  444. //
  445. // Note we rely on ExpandInputsToCleanCut() to tell us whether any output-
  446. // level files are locked, not just the extra ones pulled in for user-key
  447. // overlap.
  448. InternalKey smallest, largest;
  449. compaction_picker_->GetRange(start_level_inputs_, &smallest, &largest);
  450. CompactionInputFiles output_level_inputs;
  451. output_level_inputs.level = output_level_;
  452. vstorage_->GetOverlappingInputs(output_level_, &smallest, &largest,
  453. &output_level_inputs.files);
  454. if (!output_level_inputs.empty() &&
  455. !compaction_picker_->ExpandInputsToCleanCut(cf_name_, vstorage_,
  456. &output_level_inputs)) {
  457. start_level_inputs_.clear();
  458. continue;
  459. }
  460. base_index_ = index;
  461. break;
  462. }
  463. // store where to start the iteration in the next call to PickCompaction
  464. vstorage_->SetNextCompactionIndex(start_level_, cmp_idx);
  465. return start_level_inputs_.size() > 0;
  466. }
  467. bool LevelCompactionBuilder::PickIntraL0Compaction() {
  468. start_level_inputs_.clear();
  469. const std::vector<FileMetaData*>& level_files =
  470. vstorage_->LevelFiles(0 /* level */);
  471. if (level_files.size() <
  472. static_cast<size_t>(
  473. mutable_cf_options_.level0_file_num_compaction_trigger + 2) ||
  474. level_files[0]->being_compacted) {
  475. // If L0 isn't accumulating much files beyond the regular trigger, don't
  476. // resort to L0->L0 compaction yet.
  477. return false;
  478. }
  479. return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction,
  480. port::kMaxUint64,
  481. mutable_cf_options_.max_compaction_bytes,
  482. &start_level_inputs_, earliest_mem_seqno_);
  483. }
  484. } // namespace
  485. Compaction* LevelCompactionPicker::PickCompaction(
  486. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  487. VersionStorageInfo* vstorage, LogBuffer* log_buffer,
  488. SequenceNumber earliest_mem_seqno) {
  489. LevelCompactionBuilder builder(cf_name, vstorage, earliest_mem_seqno, this,
  490. log_buffer, mutable_cf_options, ioptions_);
  491. return builder.PickCompaction();
  492. }
  493. } // namespace ROCKSDB_NAMESPACE