compaction_picker.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <memory>
  11. #include <set>
  12. #include <string>
  13. #include <unordered_set>
  14. #include <vector>
  15. #include "db/compaction/compaction.h"
  16. #include "db/snapshot_checker.h"
  17. #include "db/version_set.h"
  18. #include "options/cf_options.h"
  19. #include "rocksdb/env.h"
  20. #include "rocksdb/options.h"
  21. #include "rocksdb/status.h"
  22. namespace ROCKSDB_NAMESPACE {
  23. // The file contains an abstract class CompactionPicker, and its two
  24. // sub-classes LevelCompactionPicker and NullCompactionPicker, as
  25. // well as some helper functions used by them.
  26. class LogBuffer;
  27. class Compaction;
  28. class VersionStorageInfo;
  29. struct CompactionInputFiles;
  30. // An abstract class to pick compactions from an existing LSM-tree.
  31. //
  32. // Each compaction style inherits the class and implement the
  33. // interface to form automatic compactions. If NeedCompaction() is true,
  34. // then call PickCompaction() to find what files need to be compacted
  35. // and where to put the output files.
  36. //
  37. // Non-virtual functions CompactRange() and CompactFiles() are used to
  38. // pick files to compact based on users' DB::CompactRange() and
  39. // DB::CompactFiles() requests, respectively. There is little
  40. // compaction style specific logic for them.
  41. class CompactionPicker {
  42. public:
  43. CompactionPicker(const ImmutableOptions& ioptions,
  44. const InternalKeyComparator* icmp);
  45. virtual ~CompactionPicker();
  46. // Pick level and inputs for a new compaction.
  47. //
  48. // Returns nullptr if there is no compaction to be done.
  49. // Otherwise returns a pointer to a heap-allocated object that
  50. // describes the compaction. Caller should delete the result.
  51. // Currently, only universal compaction will query existing snapshots and
  52. // pass it to aid compaction picking. And it's only passed when user-defined
  53. // timestamps is not enabled. The other compaction styles do not pass or use
  54. // `existing_snapshots` or `snapshot_checker`.
  55. virtual Compaction* PickCompaction(
  56. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  57. const MutableDBOptions& mutable_db_options,
  58. const std::vector<SequenceNumber>& existing_snapshots,
  59. const SnapshotChecker* snapshot_checker, VersionStorageInfo* vstorage,
  60. LogBuffer* log_buffer, bool require_max_output_level) = 0;
  61. // The returned Compaction might not include the whole requested range.
  62. // In that case, compaction_end will be set to the next key that needs
  63. // compacting. In case the compaction will compact the whole range,
  64. // compaction_end will be set to nullptr.
  65. // Client is responsible for compaction_end storage -- when called,
  66. // *compaction_end should point to valid InternalKey!
  67. // REQUIRES: If not compacting all levels (input_level == kCompactAllLevels),
  68. // then levels between input_level and output_level should be empty.
  69. virtual Compaction* PickCompactionForCompactRange(
  70. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  71. const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
  72. int input_level, int output_level,
  73. const CompactRangeOptions& compact_range_options,
  74. const InternalKey* begin, const InternalKey* end,
  75. InternalKey** compaction_end, bool* manual_conflict,
  76. uint64_t max_file_num_to_ignore, const std::string& trim_ts);
  77. // The maximum allowed output level. Default value is NumberLevels() - 1.
  78. virtual int MaxOutputLevel() const { return NumberLevels() - 1; }
  79. virtual bool NeedsCompaction(const VersionStorageInfo* vstorage) const = 0;
  80. // Sanitize the input set of compaction input files and convert it to
  81. // `std::vector<CompactionInputFiles>` in the output parameter
  82. // `converted_input_files`.
  83. // When the input parameters do not describe a valid
  84. // compaction, the function will try to fix the input_files by adding
  85. // necessary files. If it's not possible to convert an invalid input_files
  86. // into a valid one by adding more files, the function will return a
  87. // non-ok status with specific reason.
  88. //
  89. Status SanitizeAndConvertCompactionInputFiles(
  90. std::unordered_set<uint64_t>* input_files, const int output_level,
  91. Version* version,
  92. std::vector<CompactionInputFiles>* converted_input_files) const;
  93. // Free up the files that participated in a compaction
  94. //
  95. // Requirement: DB mutex held
  96. void ReleaseCompactionFiles(Compaction* c, const Status& status);
  97. // Returns true if any one of the specified files are being compacted
  98. bool AreFilesInCompaction(const std::vector<FileMetaData*>& files);
  99. // Takes a list of CompactionInputFiles and returns a (manual) Compaction
  100. // object.
  101. //
  102. // Caller must provide a set of input files that has been passed through
  103. // `SanitizeAndConvertCompactionInputFiles` earlier. The lock should not be
  104. // released between that call and this one.
  105. //
  106. // TODO - Remove default values for earliest_snapshot and snapshot_checker
  107. // and require all callers to pass them in so that DB::CompactFiles() can
  108. // also benefit from Standalone Range Tombstone Optimization
  109. Compaction* PickCompactionForCompactFiles(
  110. const CompactionOptions& compact_options,
  111. const std::vector<CompactionInputFiles>& input_files, int output_level,
  112. VersionStorageInfo* vstorage, const MutableCFOptions& mutable_cf_options,
  113. const MutableDBOptions& mutable_db_options, uint32_t output_path_id,
  114. std::optional<SequenceNumber> earliest_snapshot = std::nullopt,
  115. const SnapshotChecker* snapshot_checker = nullptr);
  116. // Converts a set of compaction input file numbers into
  117. // a list of CompactionInputFiles.
  118. // TODO(hx235): remove the unused paramter `compact_options`
  119. Status GetCompactionInputsFromFileNumbers(
  120. std::vector<CompactionInputFiles>* input_files,
  121. std::unordered_set<uint64_t>* input_set,
  122. const VersionStorageInfo* vstorage,
  123. const CompactionOptions& compact_options) const;
  124. // Is there currently a compaction involving level 0 taking place
  125. bool IsLevel0CompactionInProgress() const {
  126. return !level0_compactions_in_progress_.empty();
  127. }
  128. // Is any compaction in progress
  129. bool IsCompactionInProgress() const {
  130. return !(level0_compactions_in_progress_.empty() &&
  131. compactions_in_progress_.empty());
  132. }
  133. // Return true if the passed key range overlap with a compaction output
  134. // that is currently running.
  135. bool RangeOverlapWithCompaction(const Slice& smallest_user_key,
  136. const Slice& largest_user_key,
  137. int level) const;
  138. // Stores the minimal range that covers all entries in inputs in
  139. // *smallest, *largest.
  140. // REQUIRES: inputs is not empty
  141. void GetRange(const CompactionInputFiles& inputs, InternalKey* smallest,
  142. InternalKey* largest) const;
  143. // Stores the minimal range that covers all entries in inputs1 and inputs2
  144. // in *smallest, *largest.
  145. // REQUIRES: inputs is not empty
  146. void GetRange(const CompactionInputFiles& inputs1,
  147. const CompactionInputFiles& inputs2, InternalKey* smallest,
  148. InternalKey* largest) const;
  149. // Stores the minimal range that covers all entries in inputs
  150. // in *smallest, *largest.
  151. // REQUIRES: inputs is not empty (at least on entry have one file)
  152. void GetRange(const std::vector<CompactionInputFiles>& inputs,
  153. InternalKey* smallest, InternalKey* largest,
  154. int exclude_level) const;
  155. int NumberLevels() const { return ioptions_.num_levels; }
  156. // Add more files to the inputs on "level" to make sure that
  157. // no newer version of a key is compacted to "level+1" while leaving an older
  158. // version in a "level". Otherwise, any Get() will search "level" first,
  159. // and will likely return an old/stale value for the key, since it always
  160. // searches in increasing order of level to find the value. This could
  161. // also scramble the order of merge operands. This function should be
  162. // called any time a new Compaction is created, and its inputs_[0] are
  163. // populated.
  164. //
  165. // Will return false if it is impossible to apply this compaction.
  166. bool ExpandInputsToCleanCut(const std::string& cf_name,
  167. VersionStorageInfo* vstorage,
  168. CompactionInputFiles* inputs,
  169. InternalKey** next_smallest = nullptr);
  170. // Returns true if any one of the parent files are being compacted
  171. bool IsRangeInCompaction(VersionStorageInfo* vstorage,
  172. const InternalKey* smallest,
  173. const InternalKey* largest, int level, int* index);
  174. // Returns true if the key range that `inputs` files cover overlap with the
  175. // key range of a currently running compaction.
  176. bool FilesRangeOverlapWithCompaction(
  177. const std::vector<CompactionInputFiles>& inputs, int level,
  178. int proximal_level) const;
  179. // @param starting_l0_file If not null, restricts L0 file selection to only
  180. // include files at or older than starting_l0_file.
  181. bool SetupOtherInputs(const std::string& cf_name,
  182. const MutableCFOptions& mutable_cf_options,
  183. VersionStorageInfo* vstorage,
  184. CompactionInputFiles* inputs,
  185. CompactionInputFiles* output_level_inputs,
  186. int* parent_index, int base_index,
  187. bool only_expand_towards_right = false,
  188. const FileMetaData* starting_l0_file = nullptr);
  189. void GetGrandparents(VersionStorageInfo* vstorage,
  190. const CompactionInputFiles& inputs,
  191. const CompactionInputFiles& output_level_inputs,
  192. std::vector<FileMetaData*>* grandparents);
  193. void PickFilesMarkedForCompaction(
  194. const std::string& cf_name, VersionStorageInfo* vstorage,
  195. int* start_level, int* output_level,
  196. CompactionInputFiles* start_level_inputs,
  197. std::function<bool(const FileMetaData*)> skip_marked_file);
  198. // @param starting_l0_file If not null, restricts L0 file selection to only
  199. // include files at or older than starting_l0_file.
  200. bool GetOverlappingL0Files(VersionStorageInfo* vstorage,
  201. CompactionInputFiles* start_level_inputs,
  202. int output_level, int* parent_index,
  203. const FileMetaData* starting_l0_file = nullptr);
  204. // Register this compaction in the set of running compactions
  205. void RegisterCompaction(Compaction* c);
  206. // Remove this compaction from the set of running compactions
  207. void UnregisterCompaction(Compaction* c);
  208. std::set<Compaction*>* level0_compactions_in_progress() {
  209. return &level0_compactions_in_progress_;
  210. }
  211. std::unordered_set<Compaction*>* compactions_in_progress() {
  212. return &compactions_in_progress_;
  213. }
  214. const InternalKeyComparator* icmp() const { return icmp_; }
  215. protected:
  216. const ImmutableOptions& ioptions_;
  217. // A helper function to SanitizeAndConvertCompactionInputFiles() that
  218. // sanitizes "input_files" by adding necessary files.
  219. virtual Status SanitizeCompactionInputFilesForAllLevels(
  220. std::unordered_set<uint64_t>* input_files,
  221. const ColumnFamilyMetaData& cf_meta, const int output_level) const;
  222. // Keeps track of all compactions that are running on Level0.
  223. // Protected by DB mutex
  224. std::set<Compaction*> level0_compactions_in_progress_;
  225. // Keeps track of all compactions that are running.
  226. // Protected by DB mutex
  227. std::unordered_set<Compaction*> compactions_in_progress_;
  228. const InternalKeyComparator* const icmp_;
  229. };
  230. // A dummy compaction that never triggers any automatic
  231. // compaction.
  232. class NullCompactionPicker : public CompactionPicker {
  233. public:
  234. NullCompactionPicker(const ImmutableOptions& ioptions,
  235. const InternalKeyComparator* icmp)
  236. : CompactionPicker(ioptions, icmp) {}
  237. virtual ~NullCompactionPicker() {}
  238. // Always return "nullptr"
  239. Compaction* PickCompaction(
  240. const std::string& /*cf_name*/,
  241. const MutableCFOptions& /*mutable_cf_options*/,
  242. const MutableDBOptions& /*mutable_db_options*/,
  243. const std::vector<SequenceNumber>& /*existing_snapshots*/,
  244. const SnapshotChecker* /*snapshot_checker*/,
  245. VersionStorageInfo* /*vstorage*/, LogBuffer* /* log_buffer */,
  246. bool /*require_max_output_level*/ = false) override {
  247. return nullptr;
  248. }
  249. // Always return "nullptr"
  250. Compaction* PickCompactionForCompactRange(
  251. const std::string& /*cf_name*/,
  252. const MutableCFOptions& /*mutable_cf_options*/,
  253. const MutableDBOptions& /*mutable_db_options*/,
  254. VersionStorageInfo* /*vstorage*/, int /*input_level*/,
  255. int /*output_level*/,
  256. const CompactRangeOptions& /*compact_range_options*/,
  257. const InternalKey* /*begin*/, const InternalKey* /*end*/,
  258. InternalKey** /*compaction_end*/, bool* /*manual_conflict*/,
  259. uint64_t /*max_file_num_to_ignore*/,
  260. const std::string& /*trim_ts*/) override {
  261. return nullptr;
  262. }
  263. // Always returns false.
  264. bool NeedsCompaction(const VersionStorageInfo* /*vstorage*/) const override {
  265. return false;
  266. }
  267. };
  268. // Attempts to find an intra L0 compaction conforming to the given parameters.
  269. //
  270. // @param level_files Metadata for L0 files.
  271. // @param min_files_to_compact Minimum number of files required to
  272. // do the compaction.
  273. // @param max_compact_bytes_per_del_file Maximum average size in bytes per
  274. // file that is going to get deleted by
  275. // the compaction.
  276. // @param max_compaction_bytes Maximum total size in bytes (in terms
  277. // of compensated file size) for files
  278. // to be compacted.
  279. // @param [out] comp_inputs If a compaction was found, will be
  280. // initialized with corresponding input
  281. // files. Cannot be nullptr.
  282. //
  283. // @return true iff compaction was found.
  284. bool FindIntraL0Compaction(const std::vector<FileMetaData*>& level_files,
  285. size_t min_files_to_compact,
  286. uint64_t max_compact_bytes_per_del_file,
  287. uint64_t max_compaction_bytes,
  288. CompactionInputFiles* comp_inputs);
  289. CompressionType GetCompressionType(const VersionStorageInfo* vstorage,
  290. const MutableCFOptions& mutable_cf_options,
  291. int level, int base_level,
  292. const bool enable_compression = true);
  293. CompressionOptions GetCompressionOptions(
  294. const MutableCFOptions& mutable_cf_options,
  295. const VersionStorageInfo* vstorage, int level,
  296. const bool enable_compression = true);
  297. } // namespace ROCKSDB_NAMESPACE