sst_file_manager_impl.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #ifndef ROCKSDB_LITE
  7. #include <string>
  8. #include "port/port.h"
  9. #include "db/compaction/compaction.h"
  10. #include "db/error_handler.h"
  11. #include "file/delete_scheduler.h"
  12. #include "rocksdb/file_system.h"
  13. #include "rocksdb/sst_file_manager.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. class Env;
  16. class Logger;
  17. // SstFileManager is used to track SST files in the DB and control there
  18. // deletion rate.
  19. // All SstFileManager public functions are thread-safe.
  20. class SstFileManagerImpl : public SstFileManager {
  21. public:
  22. explicit SstFileManagerImpl(Env* env, std::shared_ptr<FileSystem> fs,
  23. std::shared_ptr<Logger> logger,
  24. int64_t rate_bytes_per_sec,
  25. double max_trash_db_ratio,
  26. uint64_t bytes_max_delete_chunk);
  27. ~SstFileManagerImpl();
  28. // DB will call OnAddFile whenever a new sst file is added.
  29. Status OnAddFile(const std::string& file_path, bool compaction = false);
  30. // Overload where size of the file is provided by the caller rather than
  31. // queried from the filesystem. This is an optimization.
  32. Status OnAddFile(const std::string& file_path, uint64_t file_size,
  33. bool compaction);
  34. // DB will call OnDeleteFile whenever an sst file is deleted.
  35. Status OnDeleteFile(const std::string& file_path);
  36. // DB will call OnMoveFile whenever an sst file is move to a new path.
  37. Status OnMoveFile(const std::string& old_path, const std::string& new_path,
  38. uint64_t* file_size = nullptr);
  39. // Update the maximum allowed space that should be used by RocksDB, if
  40. // the total size of the SST files exceeds max_allowed_space, writes to
  41. // RocksDB will fail.
  42. //
  43. // Setting max_allowed_space to 0 will disable this feature, maximum allowed
  44. // space will be infinite (Default value).
  45. //
  46. // thread-safe.
  47. void SetMaxAllowedSpaceUsage(uint64_t max_allowed_space) override;
  48. void SetCompactionBufferSize(uint64_t compaction_buffer_size) override;
  49. // Return true if the total size of SST files exceeded the maximum allowed
  50. // space usage.
  51. //
  52. // thread-safe.
  53. bool IsMaxAllowedSpaceReached() override;
  54. bool IsMaxAllowedSpaceReachedIncludingCompactions() override;
  55. // Returns true is there is enough (approximate) space for the specified
  56. // compaction. Space is approximate because this function conservatively
  57. // estimates how much space is currently being used by compactions (i.e.
  58. // if a compaction has started, this function bumps the used space by
  59. // the full compaction size).
  60. bool EnoughRoomForCompaction(ColumnFamilyData* cfd,
  61. const std::vector<CompactionInputFiles>& inputs,
  62. Status bg_error);
  63. // Bookkeeping so total_file_sizes_ goes back to normal after compaction
  64. // finishes
  65. void OnCompactionCompletion(Compaction* c);
  66. uint64_t GetCompactionsReservedSize();
  67. // Return the total size of all tracked files.
  68. uint64_t GetTotalSize() override;
  69. // Return a map containing all tracked files and there corresponding sizes.
  70. std::unordered_map<std::string, uint64_t> GetTrackedFiles() override;
  71. // Return delete rate limit in bytes per second.
  72. virtual int64_t GetDeleteRateBytesPerSecond() override;
  73. // Update the delete rate limit in bytes per second.
  74. virtual void SetDeleteRateBytesPerSecond(int64_t delete_rate) override;
  75. // Return trash/DB size ratio where new files will be deleted immediately
  76. virtual double GetMaxTrashDBRatio() override;
  77. // Update trash/DB size ratio where new files will be deleted immediately
  78. virtual void SetMaxTrashDBRatio(double ratio) override;
  79. // Return the total size of trash files
  80. uint64_t GetTotalTrashSize() override;
  81. // Called by each DB instance using this sst file manager to reserve
  82. // disk buffer space for recovery from out of space errors
  83. void ReserveDiskBuffer(uint64_t buffer, const std::string& path);
  84. // Set a flag upon encountering disk full. May enqueue the ErrorHandler
  85. // instance for background polling and recovery
  86. void StartErrorRecovery(ErrorHandler* db, Status bg_error);
  87. // Remove the given Errorhandler instance from the recovery queue. Its
  88. // not guaranteed
  89. bool CancelErrorRecovery(ErrorHandler* db);
  90. // Mark file as trash and schedule it's deletion. If force_bg is set, it
  91. // forces the file to be deleting in the background regardless of DB size,
  92. // except when rate limited delete is disabled
  93. virtual Status ScheduleFileDeletion(const std::string& file_path,
  94. const std::string& dir_to_sync,
  95. const bool force_bg = false);
  96. // Wait for all files being deleteing in the background to finish or for
  97. // destructor to be called.
  98. virtual void WaitForEmptyTrash();
  99. DeleteScheduler* delete_scheduler() { return &delete_scheduler_; }
  100. // Stop the error recovery background thread. This should be called only
  101. // once in the object's lifetime, and before the destructor
  102. void Close();
  103. private:
  104. // REQUIRES: mutex locked
  105. void OnAddFileImpl(const std::string& file_path, uint64_t file_size,
  106. bool compaction);
  107. // REQUIRES: mutex locked
  108. void OnDeleteFileImpl(const std::string& file_path);
  109. void ClearError();
  110. bool CheckFreeSpace() {
  111. return bg_err_.severity() == Status::Severity::kSoftError;
  112. }
  113. Env* env_;
  114. std::shared_ptr<FileSystem> fs_;
  115. std::shared_ptr<Logger> logger_;
  116. // Mutex to protect tracked_files_, total_files_size_
  117. port::Mutex mu_;
  118. // The summation of the sizes of all files in tracked_files_ map
  119. uint64_t total_files_size_;
  120. // The summation of all output files of in-progress compactions
  121. uint64_t in_progress_files_size_;
  122. // Compactions should only execute if they can leave at least
  123. // this amount of buffer space for logs and flushes
  124. uint64_t compaction_buffer_size_;
  125. // Estimated size of the current ongoing compactions
  126. uint64_t cur_compactions_reserved_size_;
  127. // A map containing all tracked files and there sizes
  128. // file_path => file_size
  129. std::unordered_map<std::string, uint64_t> tracked_files_;
  130. // A set of files belonging to in-progress compactions
  131. std::unordered_set<std::string> in_progress_files_;
  132. // The maximum allowed space (in bytes) for sst files.
  133. uint64_t max_allowed_space_;
  134. // DeleteScheduler used to throttle file deletition.
  135. DeleteScheduler delete_scheduler_;
  136. port::CondVar cv_;
  137. // Flag to force error recovery thread to exit
  138. bool closing_;
  139. // Background error recovery thread
  140. std::unique_ptr<port::Thread> bg_thread_;
  141. // A path in the filesystem corresponding to this SFM. This is used for
  142. // calling Env::GetFreeSpace. Posix requires a path in the filesystem
  143. std::string path_;
  144. // Save the current background error
  145. Status bg_err_;
  146. // Amount of free disk headroom before allowing recovery from hard errors
  147. uint64_t reserved_disk_buffer_;
  148. // For soft errors, amount of free disk space before we can allow
  149. // compactions to run full throttle. If disk space is below this trigger,
  150. // compactions will be gated by free disk space > input size
  151. uint64_t free_space_trigger_;
  152. // List of database error handler instances tracked by this sst file manager
  153. std::list<ErrorHandler*> error_handler_list_;
  154. // Pointer to ErrorHandler instance that is currently processing recovery
  155. ErrorHandler* cur_instance_;
  156. };
  157. } // namespace ROCKSDB_NAMESPACE
  158. #endif // ROCKSDB_LITE