trace_replay.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <atomic>
  7. #include <memory>
  8. #include <mutex>
  9. #include <unordered_map>
  10. #include <utility>
  11. #include "rocksdb/options.h"
  12. #include "rocksdb/rocksdb_namespace.h"
  13. #include "rocksdb/status.h"
  14. #include "rocksdb/trace_record.h"
  15. #include "rocksdb/utilities/replayer.h"
  16. namespace ROCKSDB_NAMESPACE {
  17. // This file contains Tracer and Replayer classes that enable capturing and
  18. // replaying RocksDB traces.
  19. class ColumnFamilyHandle;
  20. class ColumnFamilyData;
  21. class DB;
  22. class DBImpl;
  23. class Env;
  24. class Slice;
  25. class SystemClock;
  26. class TraceReader;
  27. class TraceWriter;
  28. class WriteBatch;
  29. struct ReadOptions;
  30. struct TraceOptions;
  31. struct WriteOptions;
  32. extern const std::string kTraceMagic;
  33. const unsigned int kTraceTimestampSize = 8;
  34. const unsigned int kTraceTypeSize = 1;
  35. const unsigned int kTracePayloadLengthSize = 4;
  36. const unsigned int kTraceMetadataSize =
  37. kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;
  38. static const int kTraceFileMajorVersion = 0;
  39. static const int kTraceFileMinorVersion = 2;
  40. // The data structure that defines a single trace.
  41. struct Trace {
  42. uint64_t ts; // timestamp
  43. TraceType type;
  44. // Each bit in payload_map stores which corresponding struct member added in
  45. // the payload. Each TraceType has its corresponding payload struct. For
  46. // example, if bit at position 0 is set in write payload, then the write batch
  47. // will be addedd.
  48. uint64_t payload_map = 0;
  49. // Each trace type has its own payload_struct, which will be serialized in the
  50. // payload.
  51. std::string payload;
  52. void reset() {
  53. ts = 0;
  54. type = kTraceMax;
  55. payload_map = 0;
  56. payload.clear();
  57. }
  58. };
  59. enum TracePayloadType : char {
  60. // Each member of all query payload structs should have a corresponding flag
  61. // here. Make sure to add them sequentially in the order of it is added.
  62. kEmptyPayload = 0,
  63. kWriteBatchData = 1,
  64. kGetCFID = 2,
  65. kGetKey = 3,
  66. kIterCFID = 4,
  67. kIterKey = 5,
  68. kIterLowerBound = 6,
  69. kIterUpperBound = 7,
  70. kMultiGetSize = 8,
  71. kMultiGetCFIDs = 9,
  72. kMultiGetKeys = 10,
  73. };
  74. class TracerHelper {
  75. public:
  76. // Parse the string with major and minor version only
  77. static Status ParseVersionStr(std::string& v_string, int* v_num);
  78. // Parse the trace file version and db version in trace header
  79. static Status ParseTraceHeader(const Trace& header, int* trace_version,
  80. int* db_version);
  81. // Encode a version 0.1 trace object into the given string.
  82. static void EncodeTrace(const Trace& trace, std::string* encoded_trace);
  83. // Decode a string into the given trace object.
  84. static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);
  85. // Decode a string into the given trace header.
  86. static Status DecodeHeader(const std::string& encoded_trace, Trace* header);
  87. // Set the payload map based on the payload type
  88. static bool SetPayloadMap(uint64_t& payload_map,
  89. const TracePayloadType payload_type);
  90. // Decode a Trace object into the corresponding TraceRecord.
  91. // Return Status::OK() if nothing is wrong, record will be set accordingly.
  92. // Return Status::NotSupported() if the trace type is not support, or the
  93. // corresponding error status, record will be set to nullptr.
  94. static Status DecodeTraceRecord(Trace* trace, int trace_file_version,
  95. std::unique_ptr<TraceRecord>* record);
  96. };
  97. // Tracer captures all RocksDB operations using a user-provided TraceWriter.
  98. // Every RocksDB operation is written as a single trace. Each trace will have a
  99. // timestamp and type, followed by the trace payload.
  100. class Tracer {
  101. public:
  102. Tracer(SystemClock* clock, const TraceOptions& trace_options,
  103. std::unique_ptr<TraceWriter>&& trace_writer);
  104. ~Tracer();
  105. // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
  106. Status Write(WriteBatch* write_batch);
  107. // Trace Get operations.
  108. Status Get(ColumnFamilyHandle* cfname, const Slice& key);
  109. // Trace Iterators.
  110. Status IteratorSeek(const uint32_t& cf_id, const Slice& key,
  111. const Slice& lower_bound, const Slice upper_bound);
  112. Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key,
  113. const Slice& lower_bound, const Slice upper_bound);
  114. // Trace MultiGet
  115. Status MultiGet(const size_t num_keys, ColumnFamilyHandle** column_families,
  116. const Slice* keys);
  117. Status MultiGet(const size_t num_keys, ColumnFamilyHandle* column_family,
  118. const Slice* keys);
  119. Status MultiGet(const std::vector<ColumnFamilyHandle*>& column_family,
  120. const std::vector<Slice>& keys);
  121. // Returns true if the trace is over the configured max trace file limit.
  122. // False otherwise.
  123. bool IsTraceFileOverMax();
  124. // Returns true if the order of write trace records must match the order of
  125. // the corresponding records logged to WAL and applied to the DB.
  126. bool IsWriteOrderPreserved() { return trace_options_.preserve_write_order; }
  127. // Writes a trace footer at the end of the tracing
  128. Status Close();
  129. private:
  130. // Write a trace header at the beginning, typically on initiating a trace,
  131. // with some metadata like a magic number, trace version, RocksDB version, and
  132. // trace format.
  133. Status WriteHeader();
  134. // Write a trace footer, typically on ending a trace, with some metadata.
  135. Status WriteFooter();
  136. // Write a single trace using the provided TraceWriter to the underlying
  137. // system, say, a filesystem or a streaming service.
  138. Status WriteTrace(const Trace& trace);
  139. // Helps in filtering and sampling of traces.
  140. // Returns true if a trace should be skipped, false otherwise.
  141. bool ShouldSkipTrace(const TraceType& type);
  142. SystemClock* clock_;
  143. TraceOptions trace_options_;
  144. std::unique_ptr<TraceWriter> trace_writer_;
  145. uint64_t trace_request_count_;
  146. Status trace_write_status_;
  147. };
  148. } // namespace ROCKSDB_NAMESPACE