compaction_picker_fifo.cc 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "db/compaction/compaction_picker_fifo.h"
  10. #include <cinttypes>
  11. #include <string>
  12. #include <vector>
  13. #include "db/column_family.h"
  14. #include "logging/log_buffer.h"
  15. #include "logging/logging.h"
  16. #include "options/options_helper.h"
  17. #include "rocksdb/listener.h"
  18. #include "rocksdb/statistics.h"
  19. #include "rocksdb/status.h"
  20. #include "util/string_util.h"
  21. namespace ROCKSDB_NAMESPACE {
  22. namespace {
  23. uint64_t GetTotalFilesSize(const std::vector<FileMetaData*>& files) {
  24. uint64_t total_size = 0;
  25. for (const auto& f : files) {
  26. total_size += f->fd.file_size;
  27. }
  28. return total_size;
  29. }
  30. } // anonymous namespace
  31. bool FIFOCompactionPicker::NeedsCompaction(
  32. const VersionStorageInfo* vstorage) const {
  33. const int kLevel0 = 0;
  34. return vstorage->CompactionScore(kLevel0) >= 1;
  35. }
  36. Compaction* FIFOCompactionPicker::PickTTLCompaction(
  37. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  38. const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
  39. LogBuffer* log_buffer) {
  40. assert(mutable_cf_options.ttl > 0);
  41. const int kLevel0 = 0;
  42. const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0);
  43. uint64_t total_size = GetTotalFilesSize(level_files);
  44. int64_t _current_time;
  45. auto status = ioptions_.clock->GetCurrentTime(&_current_time);
  46. if (!status.ok()) {
  47. ROCKS_LOG_BUFFER(log_buffer,
  48. "[%s] FIFO compaction: Couldn't get current time: %s. "
  49. "Not doing compactions based on TTL. ",
  50. cf_name.c_str(), status.ToString().c_str());
  51. return nullptr;
  52. }
  53. const uint64_t current_time = static_cast<uint64_t>(_current_time);
  54. if (!level0_compactions_in_progress_.empty()) {
  55. ROCKS_LOG_BUFFER(
  56. log_buffer,
  57. "[%s] FIFO compaction: Already executing compaction. No need "
  58. "to run parallel compactions since compactions are very fast",
  59. cf_name.c_str());
  60. return nullptr;
  61. }
  62. std::vector<CompactionInputFiles> inputs;
  63. inputs.emplace_back();
  64. inputs[0].level = 0;
  65. // avoid underflow
  66. if (current_time > mutable_cf_options.ttl) {
  67. for (auto ritr = level_files.rbegin(); ritr != level_files.rend(); ++ritr) {
  68. FileMetaData* f = *ritr;
  69. assert(f);
  70. if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
  71. uint64_t newest_key_time = f->TryGetNewestKeyTime();
  72. uint64_t creation_time =
  73. f->fd.table_reader->GetTableProperties()->creation_time;
  74. uint64_t est_newest_key_time = newest_key_time == kUnknownNewestKeyTime
  75. ? creation_time
  76. : newest_key_time;
  77. if (est_newest_key_time == kUnknownNewestKeyTime ||
  78. est_newest_key_time >= (current_time - mutable_cf_options.ttl)) {
  79. break;
  80. }
  81. }
  82. total_size -= f->fd.file_size;
  83. inputs[0].files.push_back(f);
  84. }
  85. }
  86. // Return a nullptr and proceed to size-based FIFO compaction if:
  87. // 1. there are no files older than ttl OR
  88. // 2. there are a few files older than ttl, but deleting them will not bring
  89. // the total size to be less than max_table_files_size threshold.
  90. if (inputs[0].files.empty() ||
  91. total_size >
  92. mutable_cf_options.compaction_options_fifo.max_table_files_size) {
  93. return nullptr;
  94. }
  95. for (const auto& f : inputs[0].files) {
  96. assert(f);
  97. uint64_t newest_key_time = f->TryGetNewestKeyTime();
  98. uint64_t creation_time = 0;
  99. if (f->fd.table_reader && f->fd.table_reader->GetTableProperties()) {
  100. creation_time = f->fd.table_reader->GetTableProperties()->creation_time;
  101. }
  102. uint64_t est_newest_key_time = newest_key_time == kUnknownNewestKeyTime
  103. ? creation_time
  104. : newest_key_time;
  105. ROCKS_LOG_BUFFER(log_buffer,
  106. "[%s] FIFO compaction: picking file %" PRIu64
  107. " with estimated newest key time %" PRIu64 " for deletion",
  108. cf_name.c_str(), f->fd.GetNumber(), est_newest_key_time);
  109. }
  110. Compaction* c = new Compaction(
  111. vstorage, ioptions_, mutable_cf_options, mutable_db_options,
  112. std::move(inputs), 0, 0, 0, 0, kNoCompression,
  113. mutable_cf_options.compression_opts, Temperature::kUnknown,
  114. /* max_subcompactions */ 0, {}, /* earliest_snapshot */ std::nullopt,
  115. /* snapshot_checker */ nullptr, CompactionReason::kFIFOTtl,
  116. /* trim_ts */ "", vstorage->CompactionScore(0),
  117. /* l0_files_might_overlap */ true);
  118. return c;
  119. }
  120. // The size-based compaction picker for FIFO.
  121. //
  122. // When the entire column family size exceeds max_table_files_size, FIFO will
  123. // try to delete the oldest sst file(s) until the resulting column family size
  124. // is smaller than max_table_files_size.
  125. //
  126. // This function also takes care the case where a DB is migrating from level /
  127. // universal compaction to FIFO compaction. During the migration, the column
  128. // family will also have non-L0 files while FIFO can only create L0 files.
  129. // In this case, this function will first purge the sst files in the bottom-
  130. // most non-empty level first, and the DB will eventually converge to the
  131. // regular FIFO case where there're only L0 files. Note that during the
  132. // migration case, the purge order will only be an approximation of "FIFO"
  133. // as entries inside lower-level files might sometimes be newer than some
  134. // entries inside upper-level files.
  135. Compaction* FIFOCompactionPicker::PickSizeCompaction(
  136. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  137. const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
  138. LogBuffer* log_buffer) {
  139. // compute the total size and identify the last non-empty level
  140. int last_level = 0;
  141. uint64_t total_size = 0;
  142. for (int level = 0; level < vstorage->num_levels(); ++level) {
  143. auto level_size = GetTotalFilesSize(vstorage->LevelFiles(level));
  144. total_size += level_size;
  145. if (level_size > 0) {
  146. last_level = level;
  147. }
  148. }
  149. const std::vector<FileMetaData*>& last_level_files =
  150. vstorage->LevelFiles(last_level);
  151. if (last_level == 0 &&
  152. total_size <=
  153. mutable_cf_options.compaction_options_fifo.max_table_files_size) {
  154. // total size not exceeded, try to find intra level 0 compaction if enabled
  155. const std::vector<FileMetaData*>& level0_files = vstorage->LevelFiles(0);
  156. if (mutable_cf_options.compaction_options_fifo.allow_compaction &&
  157. level0_files.size() > 0) {
  158. CompactionInputFiles comp_inputs;
  159. // try to prevent same files from being compacted multiple times, which
  160. // could produce large files that may never TTL-expire. Achieve this by
  161. // disallowing compactions with files larger than memtable (inflate its
  162. // size by 10% to account for uncompressed L0 files that may have size
  163. // slightly greater than memtable size limit).
  164. size_t max_compact_bytes_per_del_file =
  165. static_cast<size_t>(MultiplyCheckOverflow(
  166. static_cast<uint64_t>(mutable_cf_options.write_buffer_size),
  167. 1.1));
  168. if (FindIntraL0Compaction(
  169. level0_files,
  170. mutable_cf_options
  171. .level0_file_num_compaction_trigger /* min_files_to_compact */
  172. ,
  173. max_compact_bytes_per_del_file,
  174. mutable_cf_options.max_compaction_bytes, &comp_inputs)) {
  175. Compaction* c = new Compaction(
  176. vstorage, ioptions_, mutable_cf_options, mutable_db_options,
  177. {comp_inputs}, 0, 16 * 1024 * 1024 /* output file size limit */,
  178. 0 /* max compaction bytes, not applicable */,
  179. 0 /* output path ID */, mutable_cf_options.compression,
  180. mutable_cf_options.compression_opts, Temperature::kUnknown,
  181. 0 /* max_subcompactions */, {},
  182. /* earliest_snapshot */ std::nullopt,
  183. /* snapshot_checker */ nullptr,
  184. CompactionReason::kFIFOReduceNumFiles,
  185. /* trim_ts */ "", vstorage->CompactionScore(0),
  186. /* l0_files_might_overlap */ true);
  187. return c;
  188. }
  189. }
  190. ROCKS_LOG_BUFFER(
  191. log_buffer,
  192. "[%s] FIFO compaction: nothing to do. Total size %" PRIu64
  193. ", max size %" PRIu64 "\n",
  194. cf_name.c_str(), total_size,
  195. mutable_cf_options.compaction_options_fifo.max_table_files_size);
  196. return nullptr;
  197. }
  198. if (!level0_compactions_in_progress_.empty()) {
  199. ROCKS_LOG_BUFFER(
  200. log_buffer,
  201. "[%s] FIFO compaction: Already executing compaction. No need "
  202. "to run parallel compactions since compactions are very fast",
  203. cf_name.c_str());
  204. return nullptr;
  205. }
  206. std::vector<CompactionInputFiles> inputs;
  207. inputs.emplace_back();
  208. inputs[0].level = last_level;
  209. if (last_level == 0) {
  210. // In L0, right-most files are the oldest files.
  211. for (auto ritr = last_level_files.rbegin(); ritr != last_level_files.rend();
  212. ++ritr) {
  213. auto f = *ritr;
  214. total_size -= f->fd.file_size;
  215. inputs[0].files.push_back(f);
  216. char tmp_fsize[16];
  217. AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
  218. ROCKS_LOG_BUFFER(log_buffer,
  219. "[%s] FIFO compaction: picking file %" PRIu64
  220. " with size %s for deletion",
  221. cf_name.c_str(), f->fd.GetNumber(), tmp_fsize);
  222. if (total_size <=
  223. mutable_cf_options.compaction_options_fifo.max_table_files_size) {
  224. break;
  225. }
  226. }
  227. } else if (total_size >
  228. mutable_cf_options.compaction_options_fifo.max_table_files_size) {
  229. // If the last level is non-L0, we actually don't know which file is
  230. // logically the oldest since the file creation time only represents
  231. // when this file was compacted to this level, which is independent
  232. // to when the entries in this file were first inserted.
  233. //
  234. // As a result, we delete files from the left instead. This means the sst
  235. // file with the smallest key will be deleted first. This design decision
  236. // better serves a major type of FIFO use cases where smaller keys are
  237. // associated with older data.
  238. for (const auto& f : last_level_files) {
  239. if (f->being_compacted) {
  240. continue;
  241. }
  242. total_size -= f->fd.file_size;
  243. inputs[0].files.push_back(f);
  244. char tmp_fsize[16];
  245. AppendHumanBytes(f->fd.GetFileSize(), tmp_fsize, sizeof(tmp_fsize));
  246. ROCKS_LOG_BUFFER(
  247. log_buffer,
  248. "[%s] FIFO compaction: picking file %" PRIu64
  249. " with size %s for deletion under total size %" PRIu64
  250. " vs max table files size %" PRIu64,
  251. cf_name.c_str(), f->fd.GetNumber(), tmp_fsize, total_size,
  252. mutable_cf_options.compaction_options_fifo.max_table_files_size);
  253. if (total_size <=
  254. mutable_cf_options.compaction_options_fifo.max_table_files_size) {
  255. break;
  256. }
  257. }
  258. } else {
  259. ROCKS_LOG_BUFFER(
  260. log_buffer,
  261. "[%s] FIFO compaction: nothing to do. Total size %" PRIu64
  262. ", max size %" PRIu64 "\n",
  263. cf_name.c_str(), total_size,
  264. mutable_cf_options.compaction_options_fifo.max_table_files_size);
  265. return nullptr;
  266. }
  267. Compaction* c = new Compaction(
  268. vstorage, ioptions_, mutable_cf_options, mutable_db_options,
  269. std::move(inputs), last_level,
  270. /* target_file_size */ 0,
  271. /* max_compaction_bytes */ 0,
  272. /* output_path_id */ 0, kNoCompression,
  273. mutable_cf_options.compression_opts, Temperature::kUnknown,
  274. /* max_subcompactions */ 0, {}, /* earliest_snapshot */ std::nullopt,
  275. /* snapshot_checker */ nullptr, CompactionReason::kFIFOMaxSize,
  276. /* trim_ts */ "", vstorage->CompactionScore(0),
  277. /* l0_files_might_overlap */ true);
  278. return c;
  279. }
  280. Compaction* FIFOCompactionPicker::PickTemperatureChangeCompaction(
  281. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  282. const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
  283. LogBuffer* log_buffer) const {
  284. const std::vector<FileTemperatureAge>& ages =
  285. mutable_cf_options.compaction_options_fifo
  286. .file_temperature_age_thresholds;
  287. if (ages.empty()) {
  288. return nullptr;
  289. }
  290. // Does not apply to multi-level FIFO.
  291. if (vstorage->num_levels() > 1) {
  292. return nullptr;
  293. }
  294. const int kLevel0 = 0;
  295. const std::vector<FileMetaData*>& level_files = vstorage->LevelFiles(kLevel0);
  296. if (level_files.empty()) {
  297. return nullptr;
  298. }
  299. int64_t _current_time;
  300. auto status = ioptions_.clock->GetCurrentTime(&_current_time);
  301. if (!status.ok()) {
  302. ROCKS_LOG_BUFFER(
  303. log_buffer,
  304. "[%s] FIFO compaction: Couldn't get current time: %s. "
  305. "Not doing compactions based on file temperature-age threshold. ",
  306. cf_name.c_str(), status.ToString().c_str());
  307. return nullptr;
  308. }
  309. const uint64_t current_time = static_cast<uint64_t>(_current_time);
  310. if (!level0_compactions_in_progress_.empty()) {
  311. ROCKS_LOG_BUFFER(
  312. log_buffer,
  313. "[%s] FIFO compaction: Already executing compaction. Parallel "
  314. "compactions are not supported",
  315. cf_name.c_str());
  316. return nullptr;
  317. }
  318. std::vector<CompactionInputFiles> inputs;
  319. inputs.emplace_back();
  320. inputs[0].level = 0;
  321. // avoid underflow
  322. uint64_t min_age = ages[0].age;
  323. // kLastTemperature means target temperature is to be determined.
  324. Temperature compaction_target_temp = Temperature::kLastTemperature;
  325. if (current_time > min_age) {
  326. uint64_t create_time_threshold = current_time - min_age;
  327. assert(level_files.size() >= 1);
  328. for (size_t index = level_files.size(); index >= 1; --index) {
  329. // Try to add cur_file to compaction inputs.
  330. FileMetaData* cur_file = level_files[index - 1];
  331. FileMetaData* prev_file = index < 2 ? nullptr : level_files[index - 2];
  332. if (cur_file->being_compacted) {
  333. // Should not happen since we check for
  334. // `level0_compactions_in_progress_` above. Here we simply just don't
  335. // schedule anything.
  336. return nullptr;
  337. }
  338. uint64_t est_newest_key_time = cur_file->TryGetNewestKeyTime(prev_file);
  339. // Newer file could have newest_key_time populated
  340. if (est_newest_key_time == kUnknownNewestKeyTime) {
  341. continue;
  342. }
  343. if (est_newest_key_time > create_time_threshold) {
  344. break;
  345. }
  346. Temperature cur_target_temp = ages[0].temperature;
  347. for (size_t i = 1; i < ages.size(); ++i) {
  348. if (current_time >= ages[i].age &&
  349. est_newest_key_time <= current_time - ages[i].age) {
  350. cur_target_temp = ages[i].temperature;
  351. }
  352. }
  353. if (cur_file->temperature == cur_target_temp) {
  354. continue;
  355. }
  356. // cur_file needs to change temperature
  357. assert(compaction_target_temp == Temperature::kLastTemperature);
  358. compaction_target_temp = cur_target_temp;
  359. inputs[0].files.push_back(cur_file);
  360. ROCKS_LOG_BUFFER(log_buffer,
  361. "[%s] FIFO compaction: picking file %" PRIu64
  362. " with estimated newest key time %" PRIu64
  363. " and temperature %s for temperature %s.",
  364. cf_name.c_str(), cur_file->fd.GetNumber(),
  365. est_newest_key_time,
  366. temperature_to_string[cur_file->temperature].c_str(),
  367. temperature_to_string[cur_target_temp].c_str());
  368. break;
  369. }
  370. }
  371. if (inputs[0].files.empty()) {
  372. return nullptr;
  373. }
  374. assert(compaction_target_temp != Temperature::kLastTemperature);
  375. // Only compact one file at a time.
  376. assert(inputs.size() == 1);
  377. assert(inputs[0].size() == 1);
  378. Compaction* c = new Compaction(
  379. vstorage, ioptions_, mutable_cf_options, mutable_db_options,
  380. std::move(inputs), 0, 0 /* output file size limit */,
  381. 0 /* max compaction bytes, not applicable */, 0 /* output path ID */,
  382. mutable_cf_options.compression, mutable_cf_options.compression_opts,
  383. compaction_target_temp,
  384. /* max_subcompactions */ 0, {}, /* earliest_snapshot */ std::nullopt,
  385. /* snapshot_checker */ nullptr, CompactionReason::kChangeTemperature,
  386. /* trim_ts */ "", vstorage->CompactionScore(0),
  387. /* l0_files_might_overlap */ true);
  388. return c;
  389. }
  390. Compaction* FIFOCompactionPicker::PickCompaction(
  391. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  392. const MutableDBOptions& mutable_db_options,
  393. const std::vector<SequenceNumber>& /* existing_snapshots */,
  394. const SnapshotChecker* /* snapshot_checker */, VersionStorageInfo* vstorage,
  395. LogBuffer* log_buffer, bool /* require_max_output_level*/) {
  396. Compaction* c = nullptr;
  397. if (mutable_cf_options.ttl > 0) {
  398. c = PickTTLCompaction(cf_name, mutable_cf_options, mutable_db_options,
  399. vstorage, log_buffer);
  400. }
  401. if (c == nullptr) {
  402. c = PickSizeCompaction(cf_name, mutable_cf_options, mutable_db_options,
  403. vstorage, log_buffer);
  404. }
  405. if (c == nullptr) {
  406. c = PickTemperatureChangeCompaction(
  407. cf_name, mutable_cf_options, mutable_db_options, vstorage, log_buffer);
  408. }
  409. RegisterCompaction(c);
  410. return c;
  411. }
  412. Compaction* FIFOCompactionPicker::PickCompactionForCompactRange(
  413. const std::string& cf_name, const MutableCFOptions& mutable_cf_options,
  414. const MutableDBOptions& mutable_db_options, VersionStorageInfo* vstorage,
  415. int input_level, int output_level,
  416. const CompactRangeOptions& /*compact_range_options*/,
  417. const InternalKey* /*begin*/, const InternalKey* /*end*/,
  418. InternalKey** compaction_end, bool* /*manual_conflict*/,
  419. uint64_t /*max_file_num_to_ignore*/, const std::string& /*trim_ts*/) {
  420. #ifdef NDEBUG
  421. (void)input_level;
  422. (void)output_level;
  423. #endif
  424. assert(input_level == 0);
  425. assert(output_level == 0);
  426. *compaction_end = nullptr;
  427. LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL, ioptions_.logger);
  428. Compaction* c =
  429. PickCompaction(cf_name, mutable_cf_options, mutable_db_options,
  430. /*existing_snapshots*/ {}, /*snapshot_checker*/ nullptr,
  431. vstorage, &log_buffer);
  432. log_buffer.FlushBufferToLog();
  433. return c;
  434. }
  435. } // namespace ROCKSDB_NAMESPACE