io_tracer.h 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <atomic>
  7. #include <fstream>
  8. #include "monitoring/instrumented_mutex.h"
  9. #include "port/lang.h"
  10. #include "rocksdb/file_system.h"
  11. #include "rocksdb/options.h"
  12. #include "rocksdb/trace_record.h"
  13. #include "trace_replay/trace_replay.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. class SystemClock;
  16. class TraceReader;
  17. class TraceWriter;
  18. /* In order to log new data in trace record for specified operations, do
  19. following:
  20. 1. Add new data in IOTraceOP (say kIONewData= 3)
  21. 2. Log it in IOTraceWriter::WriteIOOp, and read that in
  22. IOTraceReader::ReadIOOp and
  23. IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case.
  24. 3. In the FileSystemTracer APIs where this data will be logged with, update
  25. io_op_data |= (1 << IOTraceOp::kIONewData).
  26. */
  27. enum IOTraceOp : char {
  28. // The value of each enum represents the bitwise position for
  29. // IOTraceRecord.io_op_data.
  30. kIOFileSize = 0,
  31. kIOLen = 1,
  32. kIOOffset = 2,
  33. };
  34. struct IOTraceRecord {
  35. // Required fields for all accesses.
  36. uint64_t access_timestamp = 0;
  37. TraceType trace_type = TraceType::kTraceMax;
  38. // Each bit in io_op_data stores which corresponding info from IOTraceOp will
  39. // be added in the trace. Foreg, if bit at position 1 is set then
  40. // IOTraceOp::kIOLen (length) will be logged in the record.
  41. uint64_t io_op_data = 0;
  42. std::string file_operation;
  43. uint64_t latency = 0;
  44. std::string io_status;
  45. // Stores file name instead of full path.
  46. std::string file_name;
  47. // Fields added to record based on IO operation.
  48. uint64_t len = 0;
  49. uint64_t offset = 0;
  50. uint64_t file_size = 0;
  51. // Additional information passed in IODebugContext.
  52. uint64_t trace_data = 0;
  53. std::string request_id;
  54. IOTraceRecord() {}
  55. IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
  56. const uint64_t& _io_op_data, const std::string& _file_operation,
  57. const uint64_t& _latency, const std::string& _io_status,
  58. const std::string& _file_name, const uint64_t& _file_size = 0)
  59. : access_timestamp(_access_timestamp),
  60. trace_type(_trace_type),
  61. io_op_data(_io_op_data),
  62. file_operation(_file_operation),
  63. latency(_latency),
  64. io_status(_io_status),
  65. file_name(_file_name),
  66. file_size(_file_size) {}
  67. IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
  68. const uint64_t& _io_op_data, const std::string& _file_operation,
  69. const uint64_t& _latency, const std::string& _io_status,
  70. const std::string& _file_name, const uint64_t& _len,
  71. const uint64_t& _offset)
  72. : access_timestamp(_access_timestamp),
  73. trace_type(_trace_type),
  74. io_op_data(_io_op_data),
  75. file_operation(_file_operation),
  76. latency(_latency),
  77. io_status(_io_status),
  78. file_name(_file_name),
  79. len(_len),
  80. offset(_offset) {}
  81. };
  82. struct IOTraceHeader {
  83. uint64_t start_time;
  84. uint32_t rocksdb_major_version;
  85. uint32_t rocksdb_minor_version;
  86. };
  87. // IOTraceWriter writes IO operation as a single trace. Each trace will have a
  88. // timestamp and type, followed by the trace payload.
  89. class IOTraceWriter {
  90. public:
  91. IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options,
  92. std::unique_ptr<TraceWriter>&& trace_writer);
  93. ~IOTraceWriter() = default;
  94. // No copy and move.
  95. IOTraceWriter(const IOTraceWriter&) = delete;
  96. IOTraceWriter& operator=(const IOTraceWriter&) = delete;
  97. IOTraceWriter(IOTraceWriter&&) = delete;
  98. IOTraceWriter& operator=(IOTraceWriter&&) = delete;
  99. Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
  100. // Write a trace header at the beginning, typically on initiating a trace,
  101. // with some metadata like a magic number and RocksDB version.
  102. Status WriteHeader();
  103. private:
  104. SystemClock* clock_;
  105. TraceOptions trace_options_;
  106. std::unique_ptr<TraceWriter> trace_writer_;
  107. };
  108. // IOTraceReader helps read the trace file generated by IOTraceWriter.
  109. class IOTraceReader {
  110. public:
  111. explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader);
  112. ~IOTraceReader() = default;
  113. // No copy and move.
  114. IOTraceReader(const IOTraceReader&) = delete;
  115. IOTraceReader& operator=(const IOTraceReader&) = delete;
  116. IOTraceReader(IOTraceReader&&) = delete;
  117. IOTraceReader& operator=(IOTraceReader&&) = delete;
  118. Status ReadHeader(IOTraceHeader* header);
  119. Status ReadIOOp(IOTraceRecord* record);
  120. private:
  121. std::unique_ptr<TraceReader> trace_reader_;
  122. };
  123. // An IO tracer. It uses IOTraceWriter to write the access record to the
  124. // trace file.
  125. class IOTracer {
  126. public:
  127. IOTracer();
  128. ~IOTracer();
  129. // No copy and move.
  130. IOTracer(const IOTracer&) = delete;
  131. IOTracer& operator=(const IOTracer&) = delete;
  132. IOTracer(IOTracer&&) = delete;
  133. IOTracer& operator=(IOTracer&&) = delete;
  134. // no_sanitize is added for tracing_enabled. writer_ is protected under mutex
  135. // so even if user call Start/EndIOTrace and tracing_enabled is not updated in
  136. // the meanwhile, WriteIOOp will anyways check the writer_ protected under
  137. // mutex and ignore the operation if writer_is null. So its ok if
  138. // tracing_enabled shows non updated value.
  139. // Start writing IO operations to the trace_writer.
  140. TSAN_SUPPRESSION Status
  141. StartIOTrace(SystemClock* clock, const TraceOptions& trace_options,
  142. std::unique_ptr<TraceWriter>&& trace_writer);
  143. // Stop writing IO operations to the trace_writer.
  144. TSAN_SUPPRESSION void EndIOTrace();
  145. TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; }
  146. void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
  147. private:
  148. TraceOptions trace_options_;
  149. // A mutex protects the writer_.
  150. InstrumentedMutex trace_writer_mutex_;
  151. std::atomic<IOTraceWriter*> writer_;
  152. // bool tracing_enabled is added to avoid costly operation of checking atomic
  153. // variable 'writer_' is nullptr or not in is_tracing_enabled().
  154. // is_tracing_enabled() is invoked multiple times by FileSystem classes.
  155. bool tracing_enabled;
  156. };
  157. } // namespace ROCKSDB_NAMESPACE