random_access_file_reader.h 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <atomic>
  11. #include <sstream>
  12. #include <string>
  13. #include "env/file_system_tracer.h"
  14. #include "port/port.h"
  15. #include "rocksdb/file_system.h"
  16. #include "rocksdb/listener.h"
  17. #include "rocksdb/options.h"
  18. #include "rocksdb/rate_limiter.h"
  19. #include "util/aligned_buffer.h"
  20. namespace ROCKSDB_NAMESPACE {
  21. class Statistics;
  22. class HistogramImpl;
  23. class SystemClock;
  24. using AlignedBuf = FSAllocationPtr;
  25. // Align the request r according to alignment and return the aligned result.
  26. FSReadRequest Align(const FSReadRequest& r, size_t alignment);
  27. // Try to merge src to dest if they have overlap.
  28. //
  29. // Each request represents an inclusive interval [offset, offset + len].
  30. // If the intervals have overlap, update offset and len to represent the
  31. // merged interval, and return true.
  32. // Otherwise, do nothing and return false.
  33. bool TryMerge(FSReadRequest* dest, const FSReadRequest& src);
  34. // RandomAccessFileReader is a wrapper on top of FSRandomAccessFile. It is
  35. // responsible for:
  36. // - Handling Buffered and Direct reads appropriately.
  37. // - Rate limiting compaction reads.
  38. // - Notifying any interested listeners on the completion of a read.
  39. // - Updating IO stats.
  40. class RandomAccessFileReader {
  41. private:
  42. void NotifyOnFileReadFinish(
  43. uint64_t offset, size_t length,
  44. const FileOperationInfo::StartTimePoint& start_ts,
  45. const FileOperationInfo::FinishTimePoint& finish_ts,
  46. const Status& status) const {
  47. FileOperationInfo info(FileOperationType::kRead, file_name_, start_ts,
  48. finish_ts, status, file_temperature_);
  49. info.offset = offset;
  50. info.length = length;
  51. for (auto& listener : listeners_) {
  52. listener->OnFileReadFinish(info);
  53. }
  54. info.status.PermitUncheckedError();
  55. }
  56. void NotifyOnIOError(const IOStatus& io_status, FileOperationType operation,
  57. const std::string& file_path, size_t length,
  58. uint64_t offset) const {
  59. if (listeners_.empty()) {
  60. return;
  61. }
  62. IOErrorInfo io_error_info(io_status, operation, file_path, length, offset);
  63. for (auto& listener : listeners_) {
  64. listener->OnIOError(io_error_info);
  65. }
  66. io_status.PermitUncheckedError();
  67. }
  68. bool ShouldNotifyListeners() const { return !listeners_.empty(); }
  69. FSRandomAccessFilePtr file_;
  70. std::string file_name_;
  71. SystemClock* clock_;
  72. Statistics* stats_;
  73. uint32_t hist_type_;
  74. HistogramImpl* file_read_hist_;
  75. RateLimiter* rate_limiter_;
  76. std::vector<std::shared_ptr<EventListener>> listeners_;
  77. const Temperature file_temperature_;
  78. const bool is_last_level_;
  79. struct ReadAsyncInfo {
  80. ReadAsyncInfo(std::function<void(FSReadRequest&, void*)> cb, void* cb_arg,
  81. uint64_t start_time)
  82. : cb_(cb),
  83. cb_arg_(cb_arg),
  84. start_time_(start_time),
  85. user_scratch_(nullptr),
  86. user_aligned_buf_(nullptr),
  87. user_offset_(0),
  88. user_len_(0),
  89. is_aligned_(false) {}
  90. std::function<void(FSReadRequest&, void*)> cb_;
  91. void* cb_arg_;
  92. uint64_t start_time_;
  93. FileOperationInfo::StartTimePoint fs_start_ts_;
  94. // Below fields stores the parameters passed by caller in case of direct_io.
  95. char* user_scratch_;
  96. AlignedBuf* user_aligned_buf_;
  97. uint64_t user_offset_;
  98. size_t user_len_;
  99. Slice user_result_;
  100. // Used in case of direct_io
  101. AlignedBuffer buf_;
  102. bool is_aligned_;
  103. };
  104. public:
  105. explicit RandomAccessFileReader(
  106. std::unique_ptr<FSRandomAccessFile>&& raf, const std::string& _file_name,
  107. SystemClock* clock = nullptr,
  108. const std::shared_ptr<IOTracer>& io_tracer = nullptr,
  109. Statistics* stats = nullptr,
  110. uint32_t hist_type = Histograms::HISTOGRAM_ENUM_MAX,
  111. HistogramImpl* file_read_hist = nullptr,
  112. RateLimiter* rate_limiter = nullptr,
  113. const std::vector<std::shared_ptr<EventListener>>& listeners = {},
  114. Temperature file_temperature = Temperature::kUnknown,
  115. bool is_last_level = false)
  116. : file_(std::move(raf), io_tracer, _file_name),
  117. file_name_(std::move(_file_name)),
  118. clock_(clock),
  119. stats_(stats),
  120. hist_type_(hist_type),
  121. file_read_hist_(file_read_hist),
  122. rate_limiter_(rate_limiter),
  123. listeners_(),
  124. file_temperature_(file_temperature),
  125. is_last_level_(is_last_level) {
  126. std::for_each(listeners.begin(), listeners.end(),
  127. [this](const std::shared_ptr<EventListener>& e) {
  128. if (e->ShouldBeNotifiedOnFileIO()) {
  129. listeners_.emplace_back(e);
  130. }
  131. });
  132. }
  133. static IOStatus Create(const std::shared_ptr<FileSystem>& fs,
  134. const std::string& fname, const FileOptions& file_opts,
  135. std::unique_ptr<RandomAccessFileReader>* reader,
  136. IODebugContext* dbg);
  137. RandomAccessFileReader(const RandomAccessFileReader&) = delete;
  138. RandomAccessFileReader& operator=(const RandomAccessFileReader&) = delete;
  139. // In non-direct IO mode,
  140. // 1. if using mmap, result is stored in a buffer other than scratch;
  141. // 2. if not using mmap, result is stored in the buffer starting from scratch.
  142. //
  143. // In direct IO mode, an aligned buffer is allocated internally.
  144. // 1. If aligned_buf is null, then results are copied to the buffer
  145. // starting from scratch;
  146. // 2. Otherwise, scratch is not used and can be null, the aligned_buf owns
  147. // the internally allocated buffer on return, and the result refers to a
  148. // region in aligned_buf.
  149. IOStatus Read(const IOOptions& opts, uint64_t offset, size_t n, Slice* result,
  150. char* scratch, AlignedBuf* aligned_buf,
  151. IODebugContext* dbg = nullptr) const;
  152. // REQUIRES:
  153. // num_reqs > 0, reqs do not overlap, and offsets in reqs are increasing.
  154. // In non-direct IO mode, aligned_buf should be null;
  155. // In direct IO mode, aligned_buf stores the aligned buffer allocated inside
  156. // MultiRead, the result Slices in reqs refer to aligned_buf.
  157. IOStatus MultiRead(const IOOptions& opts, FSReadRequest* reqs,
  158. size_t num_reqs, AlignedBuf* aligned_buf,
  159. IODebugContext* dbg = nullptr) const;
  160. IOStatus Prefetch(const IOOptions& opts, uint64_t offset, size_t n,
  161. IODebugContext* dbg = nullptr) const {
  162. return file_->Prefetch(offset, n, opts, dbg);
  163. }
  164. FSRandomAccessFile* file() { return file_.get(); }
  165. const std::string& file_name() const { return file_name_; }
  166. bool use_direct_io() const { return file_->use_direct_io(); }
  167. IOStatus PrepareIOOptions(const ReadOptions& ro, IOOptions& opts,
  168. IODebugContext* dbg = nullptr) const;
  169. IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
  170. std::function<void(FSReadRequest&, void*)> cb,
  171. void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
  172. AlignedBuf* aligned_buf, IODebugContext* dbg = nullptr);
  173. void ReadAsyncCallback(FSReadRequest& req, void* cb_arg);
  174. };
  175. } // namespace ROCKSDB_NAMESPACE