trace_replay.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <memory>
  7. #include <unordered_map>
  8. #include <utility>
  9. #include "rocksdb/env.h"
  10. #include "rocksdb/options.h"
  11. #include "rocksdb/trace_reader_writer.h"
  12. namespace ROCKSDB_NAMESPACE {
  13. // This file contains Tracer and Replayer classes that enable capturing and
  14. // replaying RocksDB traces.
  15. class ColumnFamilyHandle;
  16. class ColumnFamilyData;
  17. class DB;
  18. class DBImpl;
  19. class Slice;
  20. class WriteBatch;
  21. extern const std::string kTraceMagic;
  22. const unsigned int kTraceTimestampSize = 8;
  23. const unsigned int kTraceTypeSize = 1;
  24. const unsigned int kTracePayloadLengthSize = 4;
  25. const unsigned int kTraceMetadataSize =
  26. kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;
  27. // Supported Trace types.
  28. enum TraceType : char {
  29. kTraceBegin = 1,
  30. kTraceEnd = 2,
  31. kTraceWrite = 3,
  32. kTraceGet = 4,
  33. kTraceIteratorSeek = 5,
  34. kTraceIteratorSeekForPrev = 6,
  35. // Block cache related types.
  36. kBlockTraceIndexBlock = 7,
  37. kBlockTraceFilterBlock = 8,
  38. kBlockTraceDataBlock = 9,
  39. kBlockTraceUncompressionDictBlock = 10,
  40. kBlockTraceRangeDeletionBlock = 11,
  41. // All trace types should be added before kTraceMax
  42. kTraceMax,
  43. };
  44. // TODO: This should also be made part of public interface to help users build
  45. // custom TracerReaders and TraceWriters.
  46. //
  47. // The data structure that defines a single trace.
  48. struct Trace {
  49. uint64_t ts; // timestamp
  50. TraceType type;
  51. std::string payload;
  52. void reset() {
  53. ts = 0;
  54. type = kTraceMax;
  55. payload.clear();
  56. }
  57. };
  58. class TracerHelper {
  59. public:
  60. // Encode a trace object into the given string.
  61. static void EncodeTrace(const Trace& trace, std::string* encoded_trace);
  62. // Decode a string into the given trace object.
  63. static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);
  64. };
  65. // Tracer captures all RocksDB operations using a user-provided TraceWriter.
  66. // Every RocksDB operation is written as a single trace. Each trace will have a
  67. // timestamp and type, followed by the trace payload.
  68. class Tracer {
  69. public:
  70. Tracer(Env* env, const TraceOptions& trace_options,
  71. std::unique_ptr<TraceWriter>&& trace_writer);
  72. ~Tracer();
  73. // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
  74. Status Write(WriteBatch* write_batch);
  75. // Trace Get operations.
  76. Status Get(ColumnFamilyHandle* cfname, const Slice& key);
  77. // Trace Iterators.
  78. Status IteratorSeek(const uint32_t& cf_id, const Slice& key);
  79. Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key);
  80. // Returns true if the trace is over the configured max trace file limit.
  81. // False otherwise.
  82. bool IsTraceFileOverMax();
  83. // Writes a trace footer at the end of the tracing
  84. Status Close();
  85. private:
  86. // Write a trace header at the beginning, typically on initiating a trace,
  87. // with some metadata like a magic number, trace version, RocksDB version, and
  88. // trace format.
  89. Status WriteHeader();
  90. // Write a trace footer, typically on ending a trace, with some metadata.
  91. Status WriteFooter();
  92. // Write a single trace using the provided TraceWriter to the underlying
  93. // system, say, a filesystem or a streaming service.
  94. Status WriteTrace(const Trace& trace);
  95. // Helps in filtering and sampling of traces.
  96. // Returns true if a trace should be skipped, false otherwise.
  97. bool ShouldSkipTrace(const TraceType& type);
  98. Env* env_;
  99. TraceOptions trace_options_;
  100. std::unique_ptr<TraceWriter> trace_writer_;
  101. uint64_t trace_request_count_;
  102. };
  103. // Replayer helps to replay the captured RocksDB operations, using a user
  104. // provided TraceReader.
  105. // The Replayer is instantiated via db_bench today, on using "replay" benchmark.
  106. class Replayer {
  107. public:
  108. Replayer(DB* db, const std::vector<ColumnFamilyHandle*>& handles,
  109. std::unique_ptr<TraceReader>&& reader);
  110. ~Replayer();
  111. // Replay all the traces from the provided trace stream, taking the delay
  112. // between the traces into consideration.
  113. Status Replay();
  114. // Replay the provide trace stream, which is the same as Replay(), with
  115. // multi-threads. Queries are scheduled in the thread pool job queue.
  116. // User can set the number of threads in the thread pool.
  117. Status MultiThreadReplay(uint32_t threads_num);
  118. // Enables fast forwarding a replay by reducing the delay between the ingested
  119. // traces.
  120. // fast_forward : Rate of replay speedup.
  121. // If 1, replay the operations at the same rate as in the trace stream.
  122. // If > 1, speed up the replay by this amount.
  123. Status SetFastForward(uint32_t fast_forward);
  124. private:
  125. Status ReadHeader(Trace* header);
  126. Status ReadFooter(Trace* footer);
  127. Status ReadTrace(Trace* trace);
  128. // The background function for MultiThreadReplay to execute Get query
  129. // based on the trace records.
  130. static void BGWorkGet(void* arg);
  131. // The background function for MultiThreadReplay to execute WriteBatch
  132. // (Put, Delete, SingleDelete, DeleteRange) based on the trace records.
  133. static void BGWorkWriteBatch(void* arg);
  134. // The background function for MultiThreadReplay to execute Iterator (Seek)
  135. // based on the trace records.
  136. static void BGWorkIterSeek(void* arg);
  137. // The background function for MultiThreadReplay to execute Iterator
  138. // (SeekForPrev) based on the trace records.
  139. static void BGWorkIterSeekForPrev(void* arg);
  140. DBImpl* db_;
  141. Env* env_;
  142. std::unique_ptr<TraceReader> trace_reader_;
  143. std::unordered_map<uint32_t, ColumnFamilyHandle*> cf_map_;
  144. uint32_t fast_forward_;
  145. };
  146. // The passin arg of MultiThreadRepkay for each trace record.
  147. struct ReplayerWorkerArg {
  148. DB* db;
  149. Trace trace_entry;
  150. std::unordered_map<uint32_t, ColumnFamilyHandle*>* cf_map;
  151. WriteOptions woptions;
  152. ReadOptions roptions;
  153. };
  154. } // namespace ROCKSDB_NAMESPACE