compaction.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include "db/version_set.h"
  11. #include "memory/arena.h"
  12. #include "options/cf_options.h"
  13. #include "util/autovector.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. // The file contains class Compaction, as well as some helper functions
  16. // and data structures used by the class.
  17. // Utility for comparing sstable boundary keys. Returns -1 if either a or b is
  18. // null which provides the property that a==null indicates a key that is less
  19. // than any key and b==null indicates a key that is greater than any key. Note
  20. // that the comparison is performed primarily on the user-key portion of the
  21. // key. If the user-keys compare equal, an additional test is made to sort
  22. // range tombstone sentinel keys before other keys with the same user-key. The
  23. // result is that 2 user-keys will compare equal if they differ purely on
  24. // their sequence number and value, but the range tombstone sentinel for that
  25. // user-key will compare not equal. This is necessary because the range
  26. // tombstone sentinel key is set as the largest key for an sstable even though
  27. // that key never appears in the database. We don't want adjacent sstables to
  28. // be considered overlapping if they are separated by the range tombstone
  29. // sentinel.
  30. int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
  31. const InternalKey& b);
  32. int sstableKeyCompare(const Comparator* user_cmp, const InternalKey* a,
  33. const InternalKey& b);
  34. int sstableKeyCompare(const Comparator* user_cmp, const InternalKey& a,
  35. const InternalKey* b);
  36. // An AtomicCompactionUnitBoundary represents a range of keys [smallest,
  37. // largest] that exactly spans one ore more neighbouring SSTs on the same
  38. // level. Every pair of SSTs in this range "overlap" (i.e., the largest
  39. // user key of one file is the smallest user key of the next file). These
  40. // boundaries are propagated down to RangeDelAggregator during compaction
  41. // to provide safe truncation boundaries for range tombstones.
  42. struct AtomicCompactionUnitBoundary {
  43. const InternalKey* smallest = nullptr;
  44. const InternalKey* largest = nullptr;
  45. };
  46. // The structure that manages compaction input files associated
  47. // with the same physical level.
  48. struct CompactionInputFiles {
  49. int level;
  50. std::vector<FileMetaData*> files;
  51. std::vector<AtomicCompactionUnitBoundary> atomic_compaction_unit_boundaries;
  52. inline bool empty() const { return files.empty(); }
  53. inline size_t size() const { return files.size(); }
  54. inline void clear() { files.clear(); }
  55. inline FileMetaData* operator[](size_t i) const { return files[i]; }
  56. };
  57. class Version;
  58. class ColumnFamilyData;
  59. class VersionStorageInfo;
  60. class CompactionFilter;
  61. // A Compaction encapsulates metadata about a compaction.
  62. class Compaction {
  63. public:
  64. Compaction(VersionStorageInfo* input_version,
  65. const ImmutableCFOptions& immutable_cf_options,
  66. const MutableCFOptions& mutable_cf_options,
  67. std::vector<CompactionInputFiles> inputs, int output_level,
  68. uint64_t target_file_size, uint64_t max_compaction_bytes,
  69. uint32_t output_path_id, CompressionType compression,
  70. CompressionOptions compression_opts, uint32_t max_subcompactions,
  71. std::vector<FileMetaData*> grandparents,
  72. bool manual_compaction = false, double score = -1,
  73. bool deletion_compaction = false,
  74. CompactionReason compaction_reason = CompactionReason::kUnknown);
  75. // No copying allowed
  76. Compaction(const Compaction&) = delete;
  77. void operator=(const Compaction&) = delete;
  78. ~Compaction();
  79. // Returns the level associated to the specified compaction input level.
  80. // If compaction_input_level is not specified, then input_level is set to 0.
  81. int level(size_t compaction_input_level = 0) const {
  82. return inputs_[compaction_input_level].level;
  83. }
  84. int start_level() const { return start_level_; }
  85. // Outputs will go to this level
  86. int output_level() const { return output_level_; }
  87. // Returns the number of input levels in this compaction.
  88. size_t num_input_levels() const { return inputs_.size(); }
  89. // Return the object that holds the edits to the descriptor done
  90. // by this compaction.
  91. VersionEdit* edit() { return &edit_; }
  92. // Returns the number of input files associated to the specified
  93. // compaction input level.
  94. // The function will return 0 if when "compaction_input_level" < 0
  95. // or "compaction_input_level" >= "num_input_levels()".
  96. size_t num_input_files(size_t compaction_input_level) const {
  97. if (compaction_input_level < inputs_.size()) {
  98. return inputs_[compaction_input_level].size();
  99. }
  100. return 0;
  101. }
  102. // Returns input version of the compaction
  103. Version* input_version() const { return input_version_; }
  104. // Returns the ColumnFamilyData associated with the compaction.
  105. ColumnFamilyData* column_family_data() const { return cfd_; }
  106. // Returns the file meta data of the 'i'th input file at the
  107. // specified compaction input level.
  108. // REQUIREMENT: "compaction_input_level" must be >= 0 and
  109. // < "input_levels()"
  110. FileMetaData* input(size_t compaction_input_level, size_t i) const {
  111. assert(compaction_input_level < inputs_.size());
  112. return inputs_[compaction_input_level][i];
  113. }
  114. const std::vector<AtomicCompactionUnitBoundary>* boundaries(
  115. size_t compaction_input_level) const {
  116. assert(compaction_input_level < inputs_.size());
  117. return &inputs_[compaction_input_level].atomic_compaction_unit_boundaries;
  118. }
  119. // Returns the list of file meta data of the specified compaction
  120. // input level.
  121. // REQUIREMENT: "compaction_input_level" must be >= 0 and
  122. // < "input_levels()"
  123. const std::vector<FileMetaData*>* inputs(
  124. size_t compaction_input_level) const {
  125. assert(compaction_input_level < inputs_.size());
  126. return &inputs_[compaction_input_level].files;
  127. }
  128. const std::vector<CompactionInputFiles>* inputs() { return &inputs_; }
  129. // Returns the LevelFilesBrief of the specified compaction input level.
  130. const LevelFilesBrief* input_levels(size_t compaction_input_level) const {
  131. return &input_levels_[compaction_input_level];
  132. }
  133. // Maximum size of files to build during this compaction.
  134. uint64_t max_output_file_size() const { return max_output_file_size_; }
  135. // What compression for output
  136. CompressionType output_compression() const { return output_compression_; }
  137. // What compression options for output
  138. CompressionOptions output_compression_opts() const {
  139. return output_compression_opts_;
  140. }
  141. // Whether need to write output file to second DB path.
  142. uint32_t output_path_id() const { return output_path_id_; }
  143. // Is this a trivial compaction that can be implemented by just
  144. // moving a single input file to the next level (no merging or splitting)
  145. bool IsTrivialMove() const;
  146. // If true, then the compaction can be done by simply deleting input files.
  147. bool deletion_compaction() const { return deletion_compaction_; }
  148. // Add all inputs to this compaction as delete operations to *edit.
  149. void AddInputDeletions(VersionEdit* edit);
  150. // Returns true if the available information we have guarantees that
  151. // the input "user_key" does not exist in any level beyond "output_level()".
  152. bool KeyNotExistsBeyondOutputLevel(const Slice& user_key,
  153. std::vector<size_t>* level_ptrs) const;
  154. // Clear all files to indicate that they are not being compacted
  155. // Delete this compaction from the list of running compactions.
  156. //
  157. // Requirement: DB mutex held
  158. void ReleaseCompactionFiles(Status status);
  159. // Returns the summary of the compaction in "output" with maximum "len"
  160. // in bytes. The caller is responsible for the memory management of
  161. // "output".
  162. void Summary(char* output, int len);
  163. // Return the score that was used to pick this compaction run.
  164. double score() const { return score_; }
  165. // Is this compaction creating a file in the bottom most level?
  166. bool bottommost_level() const { return bottommost_level_; }
  167. // Does this compaction include all sst files?
  168. bool is_full_compaction() const { return is_full_compaction_; }
  169. // Was this compaction triggered manually by the client?
  170. bool is_manual_compaction() const { return is_manual_compaction_; }
  171. // Used when allow_trivial_move option is set in
  172. // Universal compaction. If all the input files are
  173. // non overlapping, then is_trivial_move_ variable
  174. // will be set true, else false
  175. void set_is_trivial_move(bool trivial_move) {
  176. is_trivial_move_ = trivial_move;
  177. }
  178. // Used when allow_trivial_move option is set in
  179. // Universal compaction. Returns true, if the input files
  180. // are non-overlapping and can be trivially moved.
  181. bool is_trivial_move() const { return is_trivial_move_; }
  182. // How many total levels are there?
  183. int number_levels() const { return number_levels_; }
  184. // Return the ImmutableCFOptions that should be used throughout the compaction
  185. // procedure
  186. const ImmutableCFOptions* immutable_cf_options() const {
  187. return &immutable_cf_options_;
  188. }
  189. // Return the MutableCFOptions that should be used throughout the compaction
  190. // procedure
  191. const MutableCFOptions* mutable_cf_options() const {
  192. return &mutable_cf_options_;
  193. }
  194. // Returns the size in bytes that the output file should be preallocated to.
  195. // In level compaction, that is max_file_size_. In universal compaction, that
  196. // is the sum of all input file sizes.
  197. uint64_t OutputFilePreallocationSize() const;
  198. void SetInputVersion(Version* input_version);
  199. struct InputLevelSummaryBuffer {
  200. char buffer[128];
  201. };
  202. const char* InputLevelSummary(InputLevelSummaryBuffer* scratch) const;
  203. uint64_t CalculateTotalInputSize() const;
  204. // In case of compaction error, reset the nextIndex that is used
  205. // to pick up the next file to be compacted from files_by_size_
  206. void ResetNextCompactionIndex();
  207. // Create a CompactionFilter from compaction_filter_factory
  208. std::unique_ptr<CompactionFilter> CreateCompactionFilter() const;
  209. // Is the input level corresponding to output_level_ empty?
  210. bool IsOutputLevelEmpty() const;
  211. // Should this compaction be broken up into smaller ones run in parallel?
  212. bool ShouldFormSubcompactions() const;
  213. // test function to validate the functionality of IsBottommostLevel()
  214. // function -- determines if compaction with inputs and storage is bottommost
  215. static bool TEST_IsBottommostLevel(
  216. int output_level, VersionStorageInfo* vstorage,
  217. const std::vector<CompactionInputFiles>& inputs);
  218. TablePropertiesCollection GetOutputTableProperties() const {
  219. return output_table_properties_;
  220. }
  221. void SetOutputTableProperties(TablePropertiesCollection tp) {
  222. output_table_properties_ = std::move(tp);
  223. }
  224. Slice GetSmallestUserKey() const { return smallest_user_key_; }
  225. Slice GetLargestUserKey() const { return largest_user_key_; }
  226. int GetInputBaseLevel() const;
  227. CompactionReason compaction_reason() { return compaction_reason_; }
  228. const std::vector<FileMetaData*>& grandparents() const {
  229. return grandparents_;
  230. }
  231. uint64_t max_compaction_bytes() const { return max_compaction_bytes_; }
  232. uint32_t max_subcompactions() const { return max_subcompactions_; }
  233. uint64_t MinInputFileOldestAncesterTime() const;
  234. private:
  235. // mark (or clear) all files that are being compacted
  236. void MarkFilesBeingCompacted(bool mark_as_compacted);
  237. // get the smallest and largest key present in files to be compacted
  238. static void GetBoundaryKeys(VersionStorageInfo* vstorage,
  239. const std::vector<CompactionInputFiles>& inputs,
  240. Slice* smallest_key, Slice* largest_key);
  241. // Get the atomic file boundaries for all files in the compaction. Necessary
  242. // in order to avoid the scenario described in
  243. // https://github.com/facebook/rocksdb/pull/4432#discussion_r221072219 and plumb
  244. // down appropriate key boundaries to RangeDelAggregator during compaction.
  245. static std::vector<CompactionInputFiles> PopulateWithAtomicBoundaries(
  246. VersionStorageInfo* vstorage, std::vector<CompactionInputFiles> inputs);
  247. // helper function to determine if compaction with inputs and storage is
  248. // bottommost
  249. static bool IsBottommostLevel(
  250. int output_level, VersionStorageInfo* vstorage,
  251. const std::vector<CompactionInputFiles>& inputs);
  252. static bool IsFullCompaction(VersionStorageInfo* vstorage,
  253. const std::vector<CompactionInputFiles>& inputs);
  254. VersionStorageInfo* input_vstorage_;
  255. const int start_level_; // the lowest level to be compacted
  256. const int output_level_; // levels to which output files are stored
  257. uint64_t max_output_file_size_;
  258. uint64_t max_compaction_bytes_;
  259. uint32_t max_subcompactions_;
  260. const ImmutableCFOptions immutable_cf_options_;
  261. const MutableCFOptions mutable_cf_options_;
  262. Version* input_version_;
  263. VersionEdit edit_;
  264. const int number_levels_;
  265. ColumnFamilyData* cfd_;
  266. Arena arena_; // Arena used to allocate space for file_levels_
  267. const uint32_t output_path_id_;
  268. CompressionType output_compression_;
  269. CompressionOptions output_compression_opts_;
  270. // If true, then the comaction can be done by simply deleting input files.
  271. const bool deletion_compaction_;
  272. // Compaction input files organized by level. Constant after construction
  273. const std::vector<CompactionInputFiles> inputs_;
  274. // A copy of inputs_, organized more closely in memory
  275. autovector<LevelFilesBrief, 2> input_levels_;
  276. // State used to check for number of overlapping grandparent files
  277. // (grandparent == "output_level_ + 1")
  278. std::vector<FileMetaData*> grandparents_;
  279. const double score_; // score that was used to pick this compaction.
  280. // Is this compaction creating a file in the bottom most level?
  281. const bool bottommost_level_;
  282. // Does this compaction include all sst files?
  283. const bool is_full_compaction_;
  284. // Is this compaction requested by the client?
  285. const bool is_manual_compaction_;
  286. // True if we can do trivial move in Universal multi level
  287. // compaction
  288. bool is_trivial_move_;
  289. // Does input compression match the output compression?
  290. bool InputCompressionMatchesOutput() const;
  291. // table properties of output files
  292. TablePropertiesCollection output_table_properties_;
  293. // smallest user keys in compaction
  294. Slice smallest_user_key_;
  295. // largest user keys in compaction
  296. Slice largest_user_key_;
  297. // Reason for compaction
  298. CompactionReason compaction_reason_;
  299. };
  300. // Return sum of sizes of all files in `files`.
  301. extern uint64_t TotalFileSize(const std::vector<FileMetaData*>& files);
  302. } // namespace ROCKSDB_NAMESPACE