trace_analyzer_tool.h 9.9 KB


  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #ifndef ROCKSDB_LITE
  7. #include <list>
  8. #include <map>
  9. #include <queue>
  10. #include <set>
  11. #include <utility>
  12. #include <vector>
  13. #include "rocksdb/env.h"
  14. #include "rocksdb/trace_reader_writer.h"
  15. #include "rocksdb/write_batch.h"
  16. #include "trace_replay/trace_replay.h"
  17. namespace ROCKSDB_NAMESPACE {
  18. class DBImpl;
  19. class WriteBatch;
  20. enum TraceOperationType : int {
  21. kGet = 0,
  22. kPut = 1,
  23. kDelete = 2,
  24. kSingleDelete = 3,
  25. kRangeDelete = 4,
  26. kMerge = 5,
  27. kIteratorSeek = 6,
  28. kIteratorSeekForPrev = 7,
  29. kTaTypeNum = 8
  30. };
  31. struct TraceUnit {
  32. uint64_t ts;
  33. uint32_t type;
  34. uint32_t cf_id;
  35. size_t value_size;
  36. std::string key;
  37. };
  38. struct TypeCorrelation {
  39. uint64_t count;
  40. uint64_t total_ts;
  41. };
  42. struct StatsUnit {
  43. uint64_t key_id;
  44. uint64_t access_count;
  45. uint64_t latest_ts;
  46. uint64_t succ_count; // current only used to count Get if key found
  47. uint32_t cf_id;
  48. size_t value_size;
  49. std::vector<TypeCorrelation> v_correlation;
  50. };
  51. class AnalyzerOptions {
  52. public:
  53. std::vector<std::vector<int>> correlation_map;
  54. std::vector<std::pair<int, int>> correlation_list;
  55. AnalyzerOptions();
  56. ~AnalyzerOptions();
  57. void SparseCorrelationInput(const std::string& in_str);
  58. };
  59. // Note that, for the variable names in the trace_analyzer,
  60. // Starting with 'a_' means the variable is used for 'accessed_keys'.
  61. // Starting with 'w_' means it is used for 'the whole key space'.
  62. // Ending with '_f' means a file write or reader pointer.
  63. // For example, 'a_count' means 'accessed_keys_count',
  64. // 'w_key_f' means 'whole_key_space_file'.
  65. struct TraceStats {
  66. uint32_t cf_id;
  67. std::string cf_name;
  68. uint64_t a_count;
  69. uint64_t a_succ_count;
  70. uint64_t a_key_id;
  71. uint64_t a_key_size_sqsum;
  72. uint64_t a_key_size_sum;
  73. uint64_t a_key_mid;
  74. uint64_t a_value_size_sqsum;
  75. uint64_t a_value_size_sum;
  76. uint64_t a_value_mid;
  77. uint32_t a_peak_qps;
  78. double a_ave_qps;
  79. std::map<std::string, StatsUnit> a_key_stats;
  80. std::map<uint64_t, uint64_t> a_count_stats;
  81. std::map<uint64_t, uint64_t> a_key_size_stats;
  82. std::map<uint64_t, uint64_t> a_value_size_stats;
  83. std::map<uint32_t, uint32_t> a_qps_stats;
  84. std::map<uint32_t, std::map<std::string, uint32_t>> a_qps_prefix_stats;
  85. std::priority_queue<std::pair<uint64_t, std::string>,
  86. std::vector<std::pair<uint64_t, std::string>>,
  87. std::greater<std::pair<uint64_t, std::string>>>
  88. top_k_queue;
  89. std::priority_queue<std::pair<uint64_t, std::string>,
  90. std::vector<std::pair<uint64_t, std::string>>,
  91. std::greater<std::pair<uint64_t, std::string>>>
  92. top_k_prefix_access;
  93. std::priority_queue<std::pair<double, std::string>,
  94. std::vector<std::pair<double, std::string>>,
  95. std::greater<std::pair<double, std::string>>>
  96. top_k_prefix_ave;
  97. std::priority_queue<std::pair<uint32_t, uint32_t>,
  98. std::vector<std::pair<uint32_t, uint32_t>>,
  99. std::greater<std::pair<uint32_t, uint32_t>>>
  100. top_k_qps_sec;
  101. std::list<TraceUnit> time_series;
  102. std::vector<std::pair<uint64_t, uint64_t>> correlation_output;
  103. std::map<uint32_t, uint64_t> uni_key_num;
  104. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> time_series_f;
  105. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_f;
  106. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_count_dist_f;
  107. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_prefix_cut_f;
  108. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_value_size_f;
  109. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_size_f;
  110. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_num_f;
  111. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_qps_f;
  112. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_top_qps_prefix_f;
  113. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> w_key_f;
  114. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> w_prefix_cut_f;
  115. TraceStats();
  116. ~TraceStats();
  117. TraceStats(const TraceStats&) = delete;
  118. TraceStats& operator=(const TraceStats&) = delete;
  119. TraceStats(TraceStats&&) = default;
  120. TraceStats& operator=(TraceStats&&) = default;
  121. };
  122. struct TypeUnit {
  123. std::string type_name;
  124. bool enabled;
  125. uint64_t total_keys;
  126. uint64_t total_access;
  127. uint64_t total_succ_access;
  128. uint32_t sample_count;
  129. std::map<uint32_t, TraceStats> stats;
  130. TypeUnit() = default;
  131. ~TypeUnit() = default;
  132. TypeUnit(const TypeUnit&) = delete;
  133. TypeUnit& operator=(const TypeUnit&) = delete;
  134. TypeUnit(TypeUnit&&) = default;
  135. TypeUnit& operator=(TypeUnit&&) = default;
  136. };
  137. struct CfUnit {
  138. uint32_t cf_id;
  139. uint64_t w_count; // total keys in this cf if we use the whole key space
  140. uint64_t a_count; // the total keys in this cf that are accessed
  141. std::map<uint64_t, uint64_t> w_key_size_stats; // whole key space key size
  142. // statistic this cf
  143. std::map<uint32_t, uint32_t> cf_qps;
  144. };
  145. class TraceAnalyzer {
  146. public:
  147. TraceAnalyzer(std::string& trace_path, std::string& output_path,
  148. AnalyzerOptions _analyzer_opts);
  149. ~TraceAnalyzer();
  150. Status PrepareProcessing();
  151. Status StartProcessing();
  152. Status MakeStatistics();
  153. Status ReProcessing();
  154. Status EndProcessing();
  155. Status WriteTraceUnit(TraceUnit& unit);
  156. // The trace processing functions for different type
  157. Status HandleGet(uint32_t column_family_id, const std::string& key,
  158. const uint64_t& ts, const uint32_t& get_ret);
  159. Status HandlePut(uint32_t column_family_id, const Slice& key,
  160. const Slice& value);
  161. Status HandleDelete(uint32_t column_family_id, const Slice& key);
  162. Status HandleSingleDelete(uint32_t column_family_id, const Slice& key);
  163. Status HandleDeleteRange(uint32_t column_family_id, const Slice& begin_key,
  164. const Slice& end_key);
  165. Status HandleMerge(uint32_t column_family_id, const Slice& key,
  166. const Slice& value);
  167. Status HandleIter(uint32_t column_family_id, const std::string& key,
  168. const uint64_t& ts, TraceType& trace_type);
  169. std::vector<TypeUnit>& GetTaVector() { return ta_; }
  170. private:
  171. ROCKSDB_NAMESPACE::Env* env_;
  172. EnvOptions env_options_;
  173. std::unique_ptr<TraceReader> trace_reader_;
  174. size_t offset_;
  175. char buffer_[1024];
  176. uint64_t c_time_;
  177. std::string trace_name_;
  178. std::string output_path_;
  179. AnalyzerOptions analyzer_opts_;
  180. uint64_t total_requests_;
  181. uint64_t total_access_keys_;
  182. uint64_t total_gets_;
  183. uint64_t total_writes_;
  184. uint64_t trace_create_time_;
  185. uint64_t begin_time_;
  186. uint64_t end_time_;
  187. uint64_t time_series_start_;
  188. uint32_t sample_max_;
  189. uint32_t cur_time_sec_;
  190. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
  191. trace_sequence_f_; // readable trace
  192. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> qps_f_; // overall qps
  193. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
  194. cf_qps_f_; // The qps of each CF>
  195. std::vector<TypeUnit> ta_; // The main statistic collecting data structure
  196. std::map<uint32_t, CfUnit> cfs_; // All the cf_id appears in this trace;
  197. std::vector<uint32_t> qps_peak_;
  198. std::vector<double> qps_ave_;
  199. Status ReadTraceHeader(Trace* header);
  200. Status ReadTraceFooter(Trace* footer);
  201. Status ReadTraceRecord(Trace* trace);
  202. Status KeyStatsInsertion(const uint32_t& type, const uint32_t& cf_id,
  203. const std::string& key, const size_t value_size,
  204. const uint64_t ts);
  205. Status StatsUnitCorrelationUpdate(StatsUnit& unit, const uint32_t& type,
  206. const uint64_t& ts, const std::string& key);
  207. Status OpenStatsOutputFiles(const std::string& type, TraceStats& new_stats);
  208. Status CreateOutputFile(
  209. const std::string& type, const std::string& cf_name,
  210. const std::string& ending,
  211. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>* f_ptr);
  212. void CloseOutputFiles();
  213. void PrintStatistics();
  214. Status TraceUnitWriter(
  215. std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>& f_ptr, TraceUnit& unit);
  216. Status WriteTraceSequence(const uint32_t& type, const uint32_t& cf_id,
  217. const std::string& key, const size_t value_size,
  218. const uint64_t ts);
  219. Status MakeStatisticKeyStatsOrPrefix(TraceStats& stats);
  220. Status MakeStatisticCorrelation(TraceStats& stats, StatsUnit& unit);
  221. Status MakeStatisticQPS();
  222. };
  223. // write bach handler to be used for WriteBache iterator
  224. // when processing the write trace
  225. class TraceWriteHandler : public WriteBatch::Handler {
  226. public:
  227. TraceWriteHandler() { ta_ptr = nullptr; }
  228. explicit TraceWriteHandler(TraceAnalyzer* _ta_ptr) { ta_ptr = _ta_ptr; }
  229. ~TraceWriteHandler() {}
  230. virtual Status PutCF(uint32_t column_family_id, const Slice& key,
  231. const Slice& value) override {
  232. return ta_ptr->HandlePut(column_family_id, key, value);
  233. }
  234. virtual Status DeleteCF(uint32_t column_family_id,
  235. const Slice& key) override {
  236. return ta_ptr->HandleDelete(column_family_id, key);
  237. }
  238. virtual Status SingleDeleteCF(uint32_t column_family_id,
  239. const Slice& key) override {
  240. return ta_ptr->HandleSingleDelete(column_family_id, key);
  241. }
  242. virtual Status DeleteRangeCF(uint32_t column_family_id,
  243. const Slice& begin_key,
  244. const Slice& end_key) override {
  245. return ta_ptr->HandleDeleteRange(column_family_id, begin_key, end_key);
  246. }
  247. virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
  248. const Slice& value) override {
  249. return ta_ptr->HandleMerge(column_family_id, key, value);
  250. }
  251. private:
  252. TraceAnalyzer* ta_ptr;
  253. };
  254. int trace_analyzer_tool(int argc, char** argv);
  255. } // namespace ROCKSDB_NAMESPACE
  256. #endif // ROCKSDB_LITE