io_tracer.cc 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #include "trace_replay/io_tracer.h"
  6. #include <cinttypes>
  7. #include <cstdio>
  8. #include <cstdlib>
  9. #include "db/db_impl/db_impl.h"
  10. #include "db/dbformat.h"
  11. #include "rocksdb/slice.h"
  12. #include "rocksdb/system_clock.h"
  13. #include "rocksdb/trace_reader_writer.h"
  14. #include "util/coding.h"
  15. #include "util/hash.h"
  16. #include "util/string_util.h"
  17. namespace ROCKSDB_NAMESPACE {
  18. IOTraceWriter::IOTraceWriter(SystemClock* clock,
  19. const TraceOptions& trace_options,
  20. std::unique_ptr<TraceWriter>&& trace_writer)
  21. : clock_(clock),
  22. trace_options_(trace_options),
  23. trace_writer_(std::move(trace_writer)) {}
  24. Status IOTraceWriter::WriteIOOp(const IOTraceRecord& record,
  25. IODebugContext* dbg) {
  26. uint64_t trace_file_size = trace_writer_->GetFileSize();
  27. if (trace_file_size > trace_options_.max_trace_file_size) {
  28. return Status::OK();
  29. }
  30. Trace trace;
  31. trace.ts = record.access_timestamp;
  32. trace.type = record.trace_type;
  33. PutFixed64(&trace.payload, record.io_op_data);
  34. Slice file_operation(record.file_operation);
  35. PutLengthPrefixedSlice(&trace.payload, file_operation);
  36. PutFixed64(&trace.payload, record.latency);
  37. Slice io_status(record.io_status);
  38. PutLengthPrefixedSlice(&trace.payload, io_status);
  39. Slice file_name(record.file_name);
  40. PutLengthPrefixedSlice(&trace.payload, file_name);
  41. // Each bit in io_op_data stores which corresponding info from IOTraceOp will
  42. // be added in the trace. Foreg, if bit at position 1 is set then
  43. // IOTraceOp::kIOLen (length) will be logged in the record (Since
  44. // IOTraceOp::kIOLen = 1 in the enum). So find all the set positions in
  45. // io_op_data one by one and, update corresponsing info in the trace record,
  46. // unset that bit to find other set bits until io_op_data = 0.
  47. /* Write remaining options based on io_op_data set by file operation */
  48. int64_t io_op_data = static_cast<int64_t>(record.io_op_data);
  49. while (io_op_data) {
  50. // Find the rightmost set bit.
  51. uint32_t set_pos = static_cast<uint32_t>(log2(io_op_data & -io_op_data));
  52. switch (set_pos) {
  53. case IOTraceOp::kIOFileSize:
  54. PutFixed64(&trace.payload, record.file_size);
  55. break;
  56. case IOTraceOp::kIOLen:
  57. PutFixed64(&trace.payload, record.len);
  58. break;
  59. case IOTraceOp::kIOOffset:
  60. PutFixed64(&trace.payload, record.offset);
  61. break;
  62. default:
  63. assert(false);
  64. }
  65. // unset the rightmost bit.
  66. io_op_data &= (io_op_data - 1);
  67. }
  68. int64_t trace_data = 0;
  69. if (dbg) {
  70. trace_data = static_cast<int64_t>(dbg->trace_data);
  71. }
  72. PutFixed64(&trace.payload, trace_data);
  73. while (trace_data) {
  74. // Find the rightmost set bit.
  75. uint32_t set_pos = static_cast<uint32_t>(log2(trace_data & -trace_data));
  76. switch (set_pos) {
  77. case IODebugContext::TraceData::kRequestID: {
  78. Slice request_id(*dbg->request_id);
  79. PutLengthPrefixedSlice(&trace.payload, request_id);
  80. } break;
  81. default:
  82. assert(false);
  83. }
  84. // unset the rightmost bit.
  85. trace_data &= (trace_data - 1);
  86. }
  87. std::string encoded_trace;
  88. TracerHelper::EncodeTrace(trace, &encoded_trace);
  89. return trace_writer_->Write(encoded_trace);
  90. }
  91. Status IOTraceWriter::WriteHeader() {
  92. Trace trace;
  93. trace.ts = clock_->NowMicros();
  94. trace.type = TraceType::kTraceBegin;
  95. PutLengthPrefixedSlice(&trace.payload, kTraceMagic);
  96. PutFixed32(&trace.payload, kMajorVersion);
  97. PutFixed32(&trace.payload, kMinorVersion);
  98. std::string encoded_trace;
  99. TracerHelper::EncodeTrace(trace, &encoded_trace);
  100. return trace_writer_->Write(encoded_trace);
  101. }
  102. IOTraceReader::IOTraceReader(std::unique_ptr<TraceReader>&& reader)
  103. : trace_reader_(std::move(reader)) {}
  104. Status IOTraceReader::ReadHeader(IOTraceHeader* header) {
  105. assert(header != nullptr);
  106. std::string encoded_trace;
  107. Status s = trace_reader_->Read(&encoded_trace);
  108. if (!s.ok()) {
  109. return s;
  110. }
  111. Trace trace;
  112. s = TracerHelper::DecodeTrace(encoded_trace, &trace);
  113. if (!s.ok()) {
  114. return s;
  115. }
  116. header->start_time = trace.ts;
  117. Slice enc_slice = Slice(trace.payload);
  118. Slice magic_number;
  119. if (!GetLengthPrefixedSlice(&enc_slice, &magic_number)) {
  120. return Status::Corruption(
  121. "Corrupted header in the trace file: Failed to read the magic number.");
  122. }
  123. if (magic_number.ToString() != kTraceMagic) {
  124. return Status::Corruption(
  125. "Corrupted header in the trace file: Magic number does not match.");
  126. }
  127. if (!GetFixed32(&enc_slice, &header->rocksdb_major_version)) {
  128. return Status::Corruption(
  129. "Corrupted header in the trace file: Failed to read rocksdb major "
  130. "version number.");
  131. }
  132. if (!GetFixed32(&enc_slice, &header->rocksdb_minor_version)) {
  133. return Status::Corruption(
  134. "Corrupted header in the trace file: Failed to read rocksdb minor "
  135. "version number.");
  136. }
  137. // We should have retrieved all information in the header.
  138. if (!enc_slice.empty()) {
  139. return Status::Corruption(
  140. "Corrupted header in the trace file: The length of header is too "
  141. "long.");
  142. }
  143. return Status::OK();
  144. }
  145. Status IOTraceReader::ReadIOOp(IOTraceRecord* record) {
  146. assert(record);
  147. std::string encoded_trace;
  148. Status s = trace_reader_->Read(&encoded_trace);
  149. if (!s.ok()) {
  150. return s;
  151. }
  152. Trace trace;
  153. s = TracerHelper::DecodeTrace(encoded_trace, &trace);
  154. if (!s.ok()) {
  155. return s;
  156. }
  157. record->access_timestamp = trace.ts;
  158. record->trace_type = trace.type;
  159. Slice enc_slice = Slice(trace.payload);
  160. if (!GetFixed64(&enc_slice, &record->io_op_data)) {
  161. return Status::Incomplete(
  162. "Incomplete access record: Failed to read trace data.");
  163. }
  164. Slice file_operation;
  165. if (!GetLengthPrefixedSlice(&enc_slice, &file_operation)) {
  166. return Status::Incomplete(
  167. "Incomplete access record: Failed to read file operation.");
  168. }
  169. record->file_operation = file_operation.ToString();
  170. if (!GetFixed64(&enc_slice, &record->latency)) {
  171. return Status::Incomplete(
  172. "Incomplete access record: Failed to read latency.");
  173. }
  174. Slice io_status;
  175. if (!GetLengthPrefixedSlice(&enc_slice, &io_status)) {
  176. return Status::Incomplete(
  177. "Incomplete access record: Failed to read IO status.");
  178. }
  179. record->io_status = io_status.ToString();
  180. Slice file_name;
  181. if (!GetLengthPrefixedSlice(&enc_slice, &file_name)) {
  182. return Status::Incomplete(
  183. "Incomplete access record: Failed to read file name.");
  184. }
  185. record->file_name = file_name.ToString();
  186. // Each bit in io_op_data stores which corresponding info from IOTraceOp will
  187. // be added in the trace. Foreg, if bit at position 1 is set then
  188. // IOTraceOp::kIOLen (length) will be logged in the record (Since
  189. // IOTraceOp::kIOLen = 1 in the enum). So find all the set positions in
  190. // io_op_data one by one and, update corresponsing info in the trace record,
  191. // unset that bit to find other set bits until io_op_data = 0.
  192. /* Read remaining options based on io_op_data set by file operation */
  193. // Assuming 63 bits will be used at max.
  194. int64_t io_op_data = static_cast<int64_t>(record->io_op_data);
  195. while (io_op_data) {
  196. // Find the rightmost set bit.
  197. uint32_t set_pos = static_cast<uint32_t>(log2(io_op_data & -io_op_data));
  198. switch (set_pos) {
  199. case IOTraceOp::kIOFileSize:
  200. if (!GetFixed64(&enc_slice, &record->file_size)) {
  201. return Status::Incomplete(
  202. "Incomplete access record: Failed to read file size.");
  203. }
  204. break;
  205. case IOTraceOp::kIOLen:
  206. if (!GetFixed64(&enc_slice, &record->len)) {
  207. return Status::Incomplete(
  208. "Incomplete access record: Failed to read length.");
  209. }
  210. break;
  211. case IOTraceOp::kIOOffset:
  212. if (!GetFixed64(&enc_slice, &record->offset)) {
  213. return Status::Incomplete(
  214. "Incomplete access record: Failed to read offset.");
  215. }
  216. break;
  217. default:
  218. assert(false);
  219. }
  220. // unset the rightmost bit.
  221. io_op_data &= (io_op_data - 1);
  222. }
  223. if (!GetFixed64(&enc_slice, &record->trace_data)) {
  224. return Status::Incomplete(
  225. "Incomplete access record: Failed to read trace op.");
  226. }
  227. int64_t trace_data = static_cast<int64_t>(record->trace_data);
  228. while (trace_data) {
  229. // Find the rightmost set bit.
  230. uint32_t set_pos = static_cast<uint32_t>(log2(trace_data & -trace_data));
  231. switch (set_pos) {
  232. case IODebugContext::TraceData::kRequestID: {
  233. Slice request_id;
  234. if (!GetLengthPrefixedSlice(&enc_slice, &request_id)) {
  235. return Status::Incomplete(
  236. "Incomplete access record: Failed to request id.");
  237. }
  238. record->request_id = request_id.ToString();
  239. } break;
  240. default:
  241. assert(false);
  242. }
  243. // unset the rightmost bit.
  244. trace_data &= (trace_data - 1);
  245. }
  246. return Status::OK();
  247. }
  248. IOTracer::IOTracer() : tracing_enabled(false) { writer_.store(nullptr); }
  249. IOTracer::~IOTracer() { EndIOTrace(); }
  250. Status IOTracer::StartIOTrace(SystemClock* clock,
  251. const TraceOptions& trace_options,
  252. std::unique_ptr<TraceWriter>&& trace_writer) {
  253. InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
  254. if (writer_.load()) {
  255. return Status::Busy();
  256. }
  257. trace_options_ = trace_options;
  258. writer_.store(
  259. new IOTraceWriter(clock, trace_options, std::move(trace_writer)));
  260. tracing_enabled = true;
  261. return writer_.load()->WriteHeader();
  262. }
  263. void IOTracer::EndIOTrace() {
  264. InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
  265. if (!writer_.load()) {
  266. return;
  267. }
  268. delete writer_.load();
  269. writer_.store(nullptr);
  270. tracing_enabled = false;
  271. }
  272. void IOTracer::WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg) {
  273. if (!writer_.load()) {
  274. return;
  275. }
  276. InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
  277. if (!writer_.load()) {
  278. return;
  279. }
  280. writer_.load()->WriteIOOp(record, dbg).PermitUncheckedError();
  281. }
  282. } // namespace ROCKSDB_NAMESPACE