delete_scheduler.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <map>
  7. #include <optional>
  8. #include <queue>
  9. #include <string>
  10. #include <thread>
  11. #include "monitoring/instrumented_mutex.h"
  12. #include "port/port.h"
  13. #include "rocksdb/status.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. class Env;
  16. class FileSystem;
  17. class Logger;
  18. class SstFileManagerImpl;
  19. class SystemClock;
  20. // DeleteScheduler allows the DB to enforce a rate limit on file deletion,
  21. // Instead of deleteing files immediately, files are marked as trash
  22. // and deleted in a background thread that apply sleep penalty between deletes
  23. // if they are happening in a rate faster than rate_bytes_per_sec,
  24. //
  25. // Rate limiting can be turned off by setting rate_bytes_per_sec = 0, In this
  26. // case DeleteScheduler will delete files immediately.
  27. class DeleteScheduler {
  28. public:
  29. DeleteScheduler(SystemClock* clock, FileSystem* fs,
  30. int64_t rate_bytes_per_sec, Logger* info_log,
  31. SstFileManagerImpl* sst_file_manager,
  32. double max_trash_db_ratio, uint64_t bytes_max_delete_chunk);
  33. ~DeleteScheduler();
  34. // Return delete rate limit in bytes per second
  35. int64_t GetRateBytesPerSecond() { return rate_bytes_per_sec_.load(); }
  36. // Set delete rate limit in bytes per second
  37. void SetRateBytesPerSecond(int64_t bytes_per_sec) {
  38. rate_bytes_per_sec_.store(bytes_per_sec);
  39. MaybeCreateBackgroundThread();
  40. }
  41. // Delete an accounted file that is tracked by `SstFileManager` and should be
  42. // tracked by this `DeleteScheduler` when it's deleted.
  43. // The file is deleted immediately if slow deletion is disabled. If force_bg
  44. // is not set and trash to db size ratio exceeded the configured threshold,
  45. // it is immediately deleted too. In all other cases, the file will be moved
  46. // to a trash directory and scheduled for deletion by a background thread.
  47. Status DeleteFile(const std::string& fname, const std::string& dir_to_sync,
  48. const bool force_bg = false);
  49. // Delete an unaccounted file that is not tracked by `SstFileManager` and
  50. // should not be tracked by this `DeleteScheduler` when it's deleted.
  51. // The file is deleted immediately if slow deletion is disabled. If force_bg
  52. // is not set and the file have more than 1 hard link, it is immediately
  53. // deleted too. In all other cases, the file will be moved to a trash
  54. // directory and scheduled for deletion by a background thread.
  55. // This API also supports assign a file to a specified bucket created by
  56. // `NewTrashBucket` when delete files in the background. So the caller can
  57. // wait for a specific bucket to be empty by checking the
  58. // `WaitForEmptyTrashBucket` API.
  59. Status DeleteUnaccountedFile(const std::string& file_path,
  60. const std::string& dir_to_sync,
  61. const bool force_bg = false,
  62. std::optional<int32_t> bucket = std::nullopt);
  63. // Wait for all files being deleted in the background to finish or for
  64. // destructor to be called.
  65. void WaitForEmptyTrash();
  66. // Creates a new trash bucket. A bucket is only created and returned when slow
  67. // deletion is enabled.
  68. // For each bucket that is created, the user should also call
  69. // `WaitForEmptyTrashBucket` after scheduling file deletions to make sure the
  70. // trash files are all cleared.
  71. std::optional<int32_t> NewTrashBucket();
  72. // Wait for all the files in the specified bucket to be deleted in the
  73. // background or for the destructor to be called.
  74. void WaitForEmptyTrashBucket(int32_t bucket);
  75. // Return a map containing errors that happened in BackgroundEmptyTrash
  76. // file_path => error status
  77. std::map<std::string, Status> GetBackgroundErrors();
  78. uint64_t GetTotalTrashSize() { return total_trash_size_.load(); }
  79. // Return trash/DB size ratio where new files will be deleted immediately
  80. double GetMaxTrashDBRatio() { return max_trash_db_ratio_.load(); }
  81. // Update trash/DB size ratio where new files will be deleted immediately
  82. void SetMaxTrashDBRatio(double r) {
  83. assert(r >= 0);
  84. max_trash_db_ratio_.store(r);
  85. }
  86. static const std::string kTrashExtension;
  87. static bool IsTrashFile(const std::string& file_path);
  88. // Check if there are any .trash files in path, and schedule their deletion
  89. // Or delete immediately if sst_file_manager is nullptr
  90. static Status CleanupDirectory(Env* env, SstFileManagerImpl* sfm,
  91. const std::string& path);
  92. void SetStatisticsPtr(const std::shared_ptr<Statistics>& stats) {
  93. InstrumentedMutexLock l(&mu_);
  94. stats_ = stats;
  95. }
  96. private:
  97. Status DeleteFileImmediately(const std::string& file_path, bool accounted);
  98. Status AddFileToDeletionQueue(const std::string& file_path,
  99. const std::string& dir_to_sync,
  100. std::optional<int32_t> bucket, bool accounted);
  101. Status MarkAsTrash(const std::string& file_path, bool accounted,
  102. std::string* path_in_trash);
  103. Status DeleteTrashFile(const std::string& path_in_trash,
  104. const std::string& dir_to_sync, bool accounted,
  105. uint64_t* deleted_bytes, bool* is_complete);
  106. Status OnDeleteFile(const std::string& file_path, bool accounted);
  107. void BackgroundEmptyTrash();
  108. void MaybeCreateBackgroundThread();
  109. SystemClock* clock_;
  110. FileSystem* fs_;
  111. // total size of trash files
  112. std::atomic<uint64_t> total_trash_size_;
  113. // Maximum number of bytes that should be deleted per second
  114. std::atomic<int64_t> rate_bytes_per_sec_;
  115. // Mutex to protect queue_, pending_files_, next_trash_bucket_,
  116. // pending_files_in_buckets_, bg_errors_, closing_, stats_
  117. InstrumentedMutex mu_;
  118. struct FileAndDir {
  119. FileAndDir(const std::string& _fname, const std::string& _dir,
  120. bool _accounted, std::optional<int32_t> _bucket)
  121. : fname(_fname), dir(_dir), accounted(_accounted), bucket(_bucket) {}
  122. std::string fname;
  123. std::string dir; // empty will be skipped.
  124. bool accounted;
  125. std::optional<int32_t> bucket;
  126. };
  127. // Queue of trash files that need to be deleted
  128. std::queue<FileAndDir> queue_;
  129. // Number of trash files that are waiting to be deleted
  130. int32_t pending_files_;
  131. // Next trash bucket that can be created
  132. int32_t next_trash_bucket_;
  133. // A mapping from trash bucket to number of pending files in the bucket
  134. std::map<int32_t, int32_t> pending_files_in_buckets_;
  135. uint64_t bytes_max_delete_chunk_;
  136. // Errors that happened in BackgroundEmptyTrash (file_path => error)
  137. std::map<std::string, Status> bg_errors_;
  138. bool num_link_error_printed_ = false;
  139. // Set to true in ~DeleteScheduler() to force BackgroundEmptyTrash to stop
  140. bool closing_;
  141. // Condition variable signaled in these conditions
  142. // - pending_files_ value change from 0 => 1
  143. // - pending_files_ value change from 1 => 0
  144. // - a value in pending_files_in_buckets change from 1 => 0
  145. // - closing_ value is set to true
  146. InstrumentedCondVar cv_;
  147. // Background thread running BackgroundEmptyTrash
  148. std::unique_ptr<port::Thread> bg_thread_;
  149. // Mutex to protect threads from file name conflicts
  150. InstrumentedMutex file_move_mu_;
  151. Logger* info_log_;
  152. SstFileManagerImpl* sst_file_manager_;
  153. // If the trash size constitutes for more than this fraction of the total DB
  154. // size we will start deleting new files passed to DeleteScheduler
  155. // immediately
  156. // Unaccounted files passed for deletion will not cause change in
  157. // total_trash_size_ or affect the DeleteScheduler::total_trash_size_ over
  158. // SstFileManager::total_size_ ratio. Their slow deletion is not subject to
  159. // this configured threshold either.
  160. std::atomic<double> max_trash_db_ratio_;
  161. static const uint64_t kMicrosInSecond = 1000 * 1000LL;
  162. std::shared_ptr<Statistics> stats_;
  163. };
  164. } // namespace ROCKSDB_NAMESPACE