block_cache_tracer.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <atomic>
  7. #include <fstream>
  8. #include "monitoring/instrumented_mutex.h"
  9. #include "rocksdb/block_cache_trace_writer.h"
  10. #include "rocksdb/options.h"
  11. #include "rocksdb/table_reader_caller.h"
  12. #include "rocksdb/trace_reader_writer.h"
  13. #include "trace_replay/trace_replay.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. class Env;
  16. class SystemClock;
  17. extern const uint64_t kMicrosInSecond;
  18. extern const uint64_t kSecondInMinute;
  19. extern const uint64_t kSecondInHour;
  20. struct BlockCacheTraceRecord;
  21. class BlockCacheTraceHelper {
  22. public:
  23. static bool IsGetOrMultiGetOnDataBlock(TraceType block_type,
  24. TableReaderCaller caller);
  25. static bool IsGetOrMultiGet(TableReaderCaller caller);
  26. static bool IsUserAccess(TableReaderCaller caller);
  27. // Row key is a concatenation of the access's fd_number and the referenced
  28. // user key.
  29. static std::string ComputeRowKey(const BlockCacheTraceRecord& access);
  30. // The first four bytes of the referenced key in a Get request is the table
  31. // id.
  32. static uint64_t GetTableId(const BlockCacheTraceRecord& access);
  33. // The sequence number of a get request is the last part of the referenced
  34. // key.
  35. static uint64_t GetSequenceNumber(const BlockCacheTraceRecord& access);
  36. // Block offset in a file is the last varint64 in the block key.
  37. static uint64_t GetBlockOffsetInFile(const BlockCacheTraceRecord& access);
  38. static const std::string kUnknownColumnFamilyName;
  39. static const uint64_t kReservedGetId;
  40. };
  41. // Lookup context for tracing block cache accesses.
  42. // We trace block accesses at five places:
  43. // 1. BlockBasedTable::GetFilter
  44. // 2. BlockBasedTable::GetUncompressedDict.
  45. // 3. BlockBasedTable::MaybeReadAndLoadToCache. (To trace access on data, index,
  46. // and range deletion block.)
  47. // 4. BlockBasedTable::Get. (To trace the referenced key and whether the
  48. // referenced key exists in a fetched data block.)
  49. // 5. BlockBasedTable::MultiGet. (To trace the referenced key and whether the
  50. // referenced key exists in a fetched data block.)
  51. // The context is created at:
  52. // 1. BlockBasedTable::Get. (kUserGet)
  53. // 2. BlockBasedTable::MultiGet. (kUserMGet)
  54. // 3. BlockBasedTable::NewIterator. (either kUserIterator, kCompaction, or
  55. // external SST ingestion calls this function.)
  56. // 4. BlockBasedTable::Open. (kPrefetch)
  57. // 5. Index/Filter::CacheDependencies. (kPrefetch)
  58. // 6. BlockBasedTable::ApproximateOffsetOf. (kCompaction or
  59. // kUserApproximateSize).
  60. struct BlockCacheLookupContext {
  61. BlockCacheLookupContext(const TableReaderCaller& _caller) : caller(_caller) {}
  62. BlockCacheLookupContext(const TableReaderCaller& _caller, uint64_t _get_id,
  63. bool _get_from_user_specified_snapshot)
  64. : caller(_caller),
  65. get_id(_get_id),
  66. get_from_user_specified_snapshot(_get_from_user_specified_snapshot) {}
  67. const TableReaderCaller caller;
  68. // These are populated when we perform lookup/insert on block cache. The block
  69. // cache tracer uses these inforation when logging the block access at
  70. // BlockBasedTable::GET and BlockBasedTable::MultiGet.
  71. bool is_cache_hit = false;
  72. bool no_insert = false;
  73. TraceType block_type = TraceType::kTraceMax;
  74. uint64_t block_size = 0;
  75. std::string block_key;
  76. uint64_t num_keys_in_block = 0;
  77. // The unique id associated with Get and MultiGet. This enables us to track
  78. // how many blocks a Get/MultiGet request accesses. We can also measure the
  79. // impact of row cache vs block cache.
  80. uint64_t get_id = 0;
  81. std::string referenced_key;
  82. bool get_from_user_specified_snapshot = false;
  83. void FillLookupContext(bool _is_cache_hit, bool _no_insert,
  84. TraceType _block_type, uint64_t _block_size,
  85. const std::string& _block_key,
  86. uint64_t _num_keys_in_block) {
  87. is_cache_hit = _is_cache_hit;
  88. no_insert = _no_insert;
  89. block_type = _block_type;
  90. block_size = _block_size;
  91. block_key = _block_key;
  92. num_keys_in_block = _num_keys_in_block;
  93. }
  94. };
  95. struct BlockCacheTraceHeader {
  96. uint64_t start_time;
  97. uint32_t rocksdb_major_version;
  98. uint32_t rocksdb_minor_version;
  99. };
  100. // BlockCacheTraceWriter captures all RocksDB block cache accesses using a
  101. // user-provided TraceWriter. Every RocksDB operation is written as a single
  102. // trace. Each trace will have a timestamp and type, followed by the trace
  103. // payload.
  104. class BlockCacheTraceWriterImpl : public BlockCacheTraceWriter {
  105. public:
  106. BlockCacheTraceWriterImpl(SystemClock* clock,
  107. const BlockCacheTraceWriterOptions& trace_options,
  108. std::unique_ptr<TraceWriter>&& trace_writer);
  109. ~BlockCacheTraceWriterImpl() = default;
  110. // No copy and move.
  111. BlockCacheTraceWriterImpl(const BlockCacheTraceWriterImpl&) = delete;
  112. BlockCacheTraceWriterImpl& operator=(const BlockCacheTraceWriterImpl&) =
  113. delete;
  114. BlockCacheTraceWriterImpl(BlockCacheTraceWriterImpl&&) = delete;
  115. BlockCacheTraceWriterImpl& operator=(BlockCacheTraceWriterImpl&&) = delete;
  116. // Pass Slice references to avoid copy.
  117. Status WriteBlockAccess(const BlockCacheTraceRecord& record,
  118. const Slice& block_key, const Slice& cf_name,
  119. const Slice& referenced_key) override;
  120. // Write a trace header at the beginning, typically on initiating a trace,
  121. // with some metadata like a magic number and RocksDB version.
  122. Status WriteHeader() override;
  123. private:
  124. SystemClock* clock_;
  125. BlockCacheTraceWriterOptions trace_options_;
  126. std::unique_ptr<TraceWriter> trace_writer_;
  127. };
  128. // Write a trace record in human readable format, see
  129. // https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
  130. // for details.
  131. class BlockCacheHumanReadableTraceWriter {
  132. public:
  133. ~BlockCacheHumanReadableTraceWriter();
  134. Status NewWritableFile(const std::string& human_readable_trace_file_path,
  135. ROCKSDB_NAMESPACE::Env* env);
  136. Status WriteHumanReadableTraceRecord(const BlockCacheTraceRecord& access,
  137. uint64_t block_id, uint64_t get_key_id);
  138. private:
  139. char trace_record_buffer_[1024 * 1024];
  140. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
  141. human_readable_trace_file_writer_;
  142. };
  143. // BlockCacheTraceReader helps read the trace file generated by
  144. // BlockCacheTraceWriter using a user provided TraceReader.
  145. class BlockCacheTraceReader {
  146. public:
  147. BlockCacheTraceReader(std::unique_ptr<TraceReader>&& reader);
  148. virtual ~BlockCacheTraceReader() = default;
  149. // No copy and move.
  150. BlockCacheTraceReader(const BlockCacheTraceReader&) = delete;
  151. BlockCacheTraceReader& operator=(const BlockCacheTraceReader&) = delete;
  152. BlockCacheTraceReader(BlockCacheTraceReader&&) = delete;
  153. BlockCacheTraceReader& operator=(BlockCacheTraceReader&&) = delete;
  154. Status ReadHeader(BlockCacheTraceHeader* header);
  155. Status ReadAccess(BlockCacheTraceRecord* record);
  156. private:
  157. std::unique_ptr<TraceReader> trace_reader_;
  158. };
  159. // Read a trace record in human readable format, see
  160. // https://github.com/facebook/rocksdb/wiki/Block-cache-analysis-and-simulation-tools#trace-format
  161. // for detailed.
  162. class BlockCacheHumanReadableTraceReader : public BlockCacheTraceReader {
  163. public:
  164. BlockCacheHumanReadableTraceReader(const std::string& trace_file_path);
  165. ~BlockCacheHumanReadableTraceReader();
  166. Status ReadHeader(BlockCacheTraceHeader* header);
  167. Status ReadAccess(BlockCacheTraceRecord* record);
  168. private:
  169. std::ifstream human_readable_trace_reader_;
  170. };
  171. // A block cache tracer. It downsamples the accesses according to
  172. // trace_options and uses BlockCacheTraceWriter to write the access record to
  173. // the trace file.
  174. class BlockCacheTracer {
  175. public:
  176. BlockCacheTracer();
  177. ~BlockCacheTracer();
  178. // No copy and move.
  179. BlockCacheTracer(const BlockCacheTracer&) = delete;
  180. BlockCacheTracer& operator=(const BlockCacheTracer&) = delete;
  181. BlockCacheTracer(BlockCacheTracer&&) = delete;
  182. BlockCacheTracer& operator=(BlockCacheTracer&&) = delete;
  183. // Start writing block cache accesses to the trace_writer.
  184. Status StartTrace(const BlockCacheTraceOptions& trace_options,
  185. std::unique_ptr<BlockCacheTraceWriter>&& trace_writer);
  186. // Stop writing block cache accesses to the trace_writer.
  187. void EndTrace();
  188. bool is_tracing_enabled() const {
  189. return writer_.load(std::memory_order_relaxed);
  190. }
  191. Status WriteBlockAccess(const BlockCacheTraceRecord& record,
  192. const Slice& block_key, const Slice& cf_name,
  193. const Slice& referenced_key);
  194. // GetId cycles from 1 to std::numeric_limits<uint64_t>::max().
  195. uint64_t NextGetId();
  196. private:
  197. BlockCacheTraceOptions trace_options_;
  198. // A mutex protects the writer_.
  199. InstrumentedMutex trace_writer_mutex_;
  200. std::atomic<BlockCacheTraceWriter*> writer_;
  201. std::atomic<uint64_t> get_id_counter_;
  202. };
  203. } // namespace ROCKSDB_NAMESPACE