internal_stats.h 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. #pragma once
  11. #include <map>
  12. #include <memory>
  13. #include <string>
  14. #include <vector>
  15. #include "cache/cache_entry_roles.h"
  16. #include "db/version_set.h"
  17. #include "rocksdb/system_clock.h"
  18. #include "util/hash_containers.h"
  19. namespace ROCKSDB_NAMESPACE {
  20. template <class Stats>
  21. class CacheEntryStatsCollector;
  22. class DBImpl;
  23. class MemTableList;
  24. // Config for retrieving a property's value.
  25. struct DBPropertyInfo {
  26. bool need_out_of_mutex;
  27. // gcc had an internal error for initializing union of pointer-to-member-
  28. // functions. Workaround is to populate exactly one of the following function
  29. // pointers with a non-nullptr value.
  30. // @param value Value-result argument for storing the property's string value
  31. // @param suffix Argument portion of the property. For example, suffix would
  32. // be "5" for the property "rocksdb.num-files-at-level5". So far, only
  33. // certain string properties take an argument.
  34. bool (InternalStats::*handle_string)(std::string* value, Slice suffix);
  35. // @param value Value-result argument for storing the property's uint64 value
  36. // @param db Many of the int properties rely on DBImpl methods.
  37. // @param version Version is needed in case the property is retrieved without
  38. // holding db mutex, which is only supported for int properties.
  39. bool (InternalStats::*handle_int)(uint64_t* value, DBImpl* db,
  40. Version* version);
  41. // @param props Map of general properties to populate
  42. // @param suffix Argument portion of the property. (see handle_string)
  43. bool (InternalStats::*handle_map)(std::map<std::string, std::string>* props,
  44. Slice suffix);
  45. // handle the string type properties rely on DBImpl methods
  46. // @param value Value-result argument for storing the property's string value
  47. bool (DBImpl::*handle_string_dbimpl)(std::string* value);
  48. };
  49. const DBPropertyInfo* GetPropertyInfo(const Slice& property);
  50. #undef SCORE
  51. enum class LevelStatType {
  52. INVALID = 0,
  53. NUM_FILES,
  54. COMPACTED_FILES,
  55. SIZE_BYTES,
  56. SCORE,
  57. READ_GB,
  58. RN_GB,
  59. RNP1_GB,
  60. WRITE_GB,
  61. WRITE_PRE_COMP_GB,
  62. W_NEW_GB,
  63. MOVED_GB,
  64. WRITE_AMP,
  65. READ_MBPS,
  66. WRITE_MBPS,
  67. COMP_SEC,
  68. COMP_CPU_SEC,
  69. COMP_COUNT,
  70. AVG_SEC,
  71. KEY_IN,
  72. KEY_DROP,
  73. R_BLOB_GB,
  74. W_BLOB_GB,
  75. TOTAL // total number of types
  76. };
  77. struct LevelStat {
  78. // This what will be L?.property_name in the flat map returned to the user
  79. std::string property_name;
  80. // This will be what we will print in the header in the cli
  81. std::string header_name;
  82. };
  83. struct DBStatInfo {
  84. // This what will be property_name in the flat map returned to the user
  85. std::string property_name;
  86. };
  87. class InternalStats {
  88. public:
  89. static const std::map<LevelStatType, LevelStat> compaction_level_stats;
  90. enum InternalCFStatsType {
  91. MEMTABLE_LIMIT_DELAYS,
  92. MEMTABLE_LIMIT_STOPS,
  93. L0_FILE_COUNT_LIMIT_DELAYS,
  94. L0_FILE_COUNT_LIMIT_STOPS,
  95. PENDING_COMPACTION_BYTES_LIMIT_DELAYS,
  96. PENDING_COMPACTION_BYTES_LIMIT_STOPS,
  97. // Write slowdown caused by l0 file count limit while there is ongoing L0
  98. // compaction
  99. L0_FILE_COUNT_LIMIT_DELAYS_WITH_ONGOING_COMPACTION,
  100. // Write stop caused by l0 file count limit while there is ongoing L0
  101. // compaction
  102. L0_FILE_COUNT_LIMIT_STOPS_WITH_ONGOING_COMPACTION,
  103. WRITE_STALLS_ENUM_MAX,
  104. // End of all write stall stats
  105. BYTES_FLUSHED,
  106. BYTES_INGESTED_ADD_FILE,
  107. INGESTED_NUM_FILES_TOTAL,
  108. INGESTED_LEVEL0_NUM_FILES_TOTAL,
  109. INGESTED_NUM_KEYS_TOTAL,
  110. INTERNAL_CF_STATS_ENUM_MAX,
  111. };
  112. enum InternalDBStatsType {
  113. kIntStatsWalFileBytes,
  114. kIntStatsWalFileSynced,
  115. kIntStatsBytesWritten,
  116. kIntStatsNumKeysWritten,
  117. kIntStatsWriteDoneByOther,
  118. kIntStatsWriteDoneBySelf,
  119. kIntStatsWriteWithWal,
  120. // TODO(hx235): Currently `kIntStatsWriteStallMicros` only measures
  121. // "delayed" time of CF-scope write stalls, not including the "stopped" time
  122. // nor any DB-scope write stalls (e.g, ones triggered by
  123. // `WriteBufferManager`).
  124. //
  125. // However, the word "write stall" includes both "delayed" and "stopped"
  126. // (see `WriteStallCondition`) and DB-scope writes stalls (see
  127. // `WriteStallCause`).
  128. //
  129. // So we should improve, rename or clarify it
  130. kIntStatsWriteStallMicros,
  131. kIntStatsWriteBufferManagerLimitStopsCounts,
  132. kIntStatsNumMax,
  133. };
  134. static const std::map<InternalDBStatsType, DBStatInfo> db_stats_type_to_info;
  135. InternalStats(int num_levels, SystemClock* clock, ColumnFamilyData* cfd);
  136. // Per level compaction stats. comp_stats_[level] stores the stats for
  137. // compactions that produced data for the specified "level".
  138. struct CompactionStats {
  139. uint64_t micros;
  140. uint64_t cpu_micros;
  141. // The number of bytes read from all non-output levels (table files)
  142. uint64_t bytes_read_non_output_levels;
  143. // The number of bytes read from the compaction output level (table files)
  144. uint64_t bytes_read_output_level;
  145. // The number of bytes skipped from all non-output levels because the input
  146. // files are filtered by compaction optimizations.
  147. uint64_t bytes_skipped_non_output_levels;
  148. // The number of bytes skipped from the compaction output level because the
  149. // input files are filtered by compaction optimizations.
  150. uint64_t bytes_skipped_output_level;
  151. // The number of bytes read from blob files
  152. uint64_t bytes_read_blob;
  153. // Total number of bytes written to table files during compaction
  154. uint64_t bytes_written;
  155. // Total number of bytes written pre-compression during compaction
  156. uint64_t bytes_written_pre_comp;
  157. // Total number of bytes written to blob files during compaction
  158. uint64_t bytes_written_blob;
  159. // Total number of bytes moved to the output level (table files)
  160. uint64_t bytes_moved;
  161. // The number of compaction input files in all non-output levels (table
  162. // files)
  163. int num_input_files_in_non_output_levels;
  164. // The number of compaction input files in the output level (table files)
  165. int num_input_files_in_output_level;
  166. // The number of non output level compaction input files that are filtered
  167. // by compaction optimizations.
  168. int num_filtered_input_files_in_non_output_levels;
  169. // The number of output level compaction input files that are filtered by
  170. // compaction optimizations.
  171. int num_filtered_input_files_in_output_level;
  172. // The number of compaction output files (table files)
  173. int num_output_files;
  174. // The number of compaction output files (blob files)
  175. int num_output_files_blob;
  176. // Total incoming entries during compaction between levels N and N+1
  177. uint64_t num_input_records;
  178. // Accumulated diff number of entries
  179. // (num input entries - num output entries) for compaction levels N and N+1
  180. uint64_t num_dropped_records;
  181. // Total output entries from compaction
  182. uint64_t num_output_records;
  183. // Number of compactions done
  184. int count;
  185. // Number of compactions done per CompactionReason
  186. int counts[static_cast<int>(CompactionReason::kNumOfReasons)]{};
  187. explicit CompactionStats()
  188. : micros(0),
  189. cpu_micros(0),
  190. bytes_read_non_output_levels(0),
  191. bytes_read_output_level(0),
  192. bytes_skipped_non_output_levels(0),
  193. bytes_skipped_output_level(0),
  194. bytes_read_blob(0),
  195. bytes_written(0),
  196. bytes_written_pre_comp(0),
  197. bytes_written_blob(0),
  198. bytes_moved(0),
  199. num_input_files_in_non_output_levels(0),
  200. num_input_files_in_output_level(0),
  201. num_filtered_input_files_in_non_output_levels(0),
  202. num_filtered_input_files_in_output_level(0),
  203. num_output_files(0),
  204. num_output_files_blob(0),
  205. num_input_records(0),
  206. num_dropped_records(0),
  207. num_output_records(0),
  208. count(0) {
  209. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  210. for (int i = 0; i < num_of_reasons; i++) {
  211. counts[i] = 0;
  212. }
  213. }
  214. explicit CompactionStats(CompactionReason reason, int c)
  215. : micros(0),
  216. cpu_micros(0),
  217. bytes_read_non_output_levels(0),
  218. bytes_read_output_level(0),
  219. bytes_skipped_non_output_levels(0),
  220. bytes_skipped_output_level(0),
  221. bytes_read_blob(0),
  222. bytes_written(0),
  223. bytes_written_pre_comp(0),
  224. bytes_written_blob(0),
  225. bytes_moved(0),
  226. num_input_files_in_non_output_levels(0),
  227. num_input_files_in_output_level(0),
  228. num_filtered_input_files_in_non_output_levels(0),
  229. num_filtered_input_files_in_output_level(0),
  230. num_output_files(0),
  231. num_output_files_blob(0),
  232. num_input_records(0),
  233. num_dropped_records(0),
  234. num_output_records(0),
  235. count(c) {
  236. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  237. for (int i = 0; i < num_of_reasons; i++) {
  238. counts[i] = 0;
  239. }
  240. int r = static_cast<int>(reason);
  241. if (r >= 0 && r < num_of_reasons) {
  242. counts[r] = c;
  243. } else {
  244. count = 0;
  245. }
  246. }
  247. CompactionStats(const CompactionStats& c)
  248. : micros(c.micros),
  249. cpu_micros(c.cpu_micros),
  250. bytes_read_non_output_levels(c.bytes_read_non_output_levels),
  251. bytes_read_output_level(c.bytes_read_output_level),
  252. bytes_skipped_non_output_levels(c.bytes_skipped_non_output_levels),
  253. bytes_skipped_output_level(c.bytes_skipped_output_level),
  254. bytes_read_blob(c.bytes_read_blob),
  255. bytes_written(c.bytes_written),
  256. bytes_written_pre_comp(c.bytes_written_pre_comp),
  257. bytes_written_blob(c.bytes_written_blob),
  258. bytes_moved(c.bytes_moved),
  259. num_input_files_in_non_output_levels(
  260. c.num_input_files_in_non_output_levels),
  261. num_input_files_in_output_level(c.num_input_files_in_output_level),
  262. num_filtered_input_files_in_non_output_levels(
  263. c.num_filtered_input_files_in_non_output_levels),
  264. num_filtered_input_files_in_output_level(
  265. c.num_filtered_input_files_in_output_level),
  266. num_output_files(c.num_output_files),
  267. num_output_files_blob(c.num_output_files_blob),
  268. num_input_records(c.num_input_records),
  269. num_dropped_records(c.num_dropped_records),
  270. num_output_records(c.num_output_records),
  271. count(c.count) {
  272. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  273. for (int i = 0; i < num_of_reasons; i++) {
  274. counts[i] = c.counts[i];
  275. }
  276. }
  277. CompactionStats& operator=(const CompactionStats& c) {
  278. micros = c.micros;
  279. cpu_micros = c.cpu_micros;
  280. bytes_read_non_output_levels = c.bytes_read_non_output_levels;
  281. bytes_read_output_level = c.bytes_read_output_level;
  282. bytes_skipped_non_output_levels = c.bytes_skipped_non_output_levels;
  283. bytes_skipped_output_level = c.bytes_skipped_output_level;
  284. bytes_read_blob = c.bytes_read_blob;
  285. bytes_written = c.bytes_written;
  286. bytes_written_pre_comp = c.bytes_written_pre_comp;
  287. bytes_written_blob = c.bytes_written_blob;
  288. bytes_moved = c.bytes_moved;
  289. num_input_files_in_non_output_levels =
  290. c.num_input_files_in_non_output_levels;
  291. num_input_files_in_output_level = c.num_input_files_in_output_level;
  292. num_filtered_input_files_in_non_output_levels =
  293. c.num_filtered_input_files_in_non_output_levels;
  294. num_filtered_input_files_in_output_level =
  295. c.num_filtered_input_files_in_output_level;
  296. num_output_files = c.num_output_files;
  297. num_output_files_blob = c.num_output_files_blob;
  298. num_input_records = c.num_input_records;
  299. num_dropped_records = c.num_dropped_records;
  300. num_output_records = c.num_output_records;
  301. count = c.count;
  302. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  303. for (int i = 0; i < num_of_reasons; i++) {
  304. counts[i] = c.counts[i];
  305. }
  306. return *this;
  307. }
  308. void Clear() {
  309. this->micros = 0;
  310. this->cpu_micros = 0;
  311. this->bytes_read_non_output_levels = 0;
  312. this->bytes_read_output_level = 0;
  313. this->bytes_skipped_non_output_levels = 0;
  314. this->bytes_skipped_output_level = 0;
  315. this->bytes_read_blob = 0;
  316. this->bytes_written = 0;
  317. this->bytes_written_pre_comp = 0;
  318. this->bytes_written_blob = 0;
  319. this->bytes_moved = 0;
  320. this->num_input_files_in_non_output_levels = 0;
  321. this->num_input_files_in_output_level = 0;
  322. this->num_filtered_input_files_in_non_output_levels = 0;
  323. this->num_filtered_input_files_in_output_level = 0;
  324. this->num_output_files = 0;
  325. this->num_output_files_blob = 0;
  326. this->num_input_records = 0;
  327. this->num_dropped_records = 0;
  328. this->num_output_records = 0;
  329. this->count = 0;
  330. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  331. for (int i = 0; i < num_of_reasons; i++) {
  332. counts[i] = 0;
  333. }
  334. }
  335. void Add(const CompactionStats& c) {
  336. this->micros += c.micros;
  337. this->cpu_micros += c.cpu_micros;
  338. this->bytes_read_non_output_levels += c.bytes_read_non_output_levels;
  339. this->bytes_read_output_level += c.bytes_read_output_level;
  340. this->bytes_skipped_non_output_levels +=
  341. c.bytes_skipped_non_output_levels;
  342. this->bytes_skipped_output_level += c.bytes_skipped_output_level;
  343. this->bytes_read_blob += c.bytes_read_blob;
  344. this->bytes_written += c.bytes_written;
  345. this->bytes_written_pre_comp += c.bytes_written_pre_comp;
  346. this->bytes_written_blob += c.bytes_written_blob;
  347. this->bytes_moved += c.bytes_moved;
  348. this->num_input_files_in_non_output_levels +=
  349. c.num_input_files_in_non_output_levels;
  350. this->num_input_files_in_output_level +=
  351. c.num_input_files_in_output_level;
  352. this->num_filtered_input_files_in_non_output_levels +=
  353. c.num_filtered_input_files_in_non_output_levels;
  354. this->num_filtered_input_files_in_output_level +=
  355. c.num_filtered_input_files_in_output_level;
  356. this->num_output_files += c.num_output_files;
  357. this->num_output_files_blob += c.num_output_files_blob;
  358. this->num_input_records += c.num_input_records;
  359. this->num_dropped_records += c.num_dropped_records;
  360. this->num_output_records += c.num_output_records;
  361. this->count += c.count;
  362. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  363. for (int i = 0; i < num_of_reasons; i++) {
  364. counts[i] += c.counts[i];
  365. }
  366. }
  367. void Subtract(const CompactionStats& c) {
  368. this->micros -= c.micros;
  369. this->cpu_micros -= c.cpu_micros;
  370. this->bytes_read_non_output_levels -= c.bytes_read_non_output_levels;
  371. this->bytes_read_output_level -= c.bytes_read_output_level;
  372. this->bytes_skipped_non_output_levels -=
  373. c.bytes_skipped_non_output_levels;
  374. this->bytes_skipped_output_level -= c.bytes_skipped_output_level;
  375. this->bytes_read_blob -= c.bytes_read_blob;
  376. this->bytes_written -= c.bytes_written;
  377. this->bytes_written_pre_comp -= c.bytes_written_pre_comp;
  378. this->bytes_written_blob -= c.bytes_written_blob;
  379. this->bytes_moved -= c.bytes_moved;
  380. this->num_input_files_in_non_output_levels -=
  381. c.num_input_files_in_non_output_levels;
  382. this->num_input_files_in_output_level -=
  383. c.num_input_files_in_output_level;
  384. this->num_filtered_input_files_in_non_output_levels -=
  385. c.num_filtered_input_files_in_non_output_levels;
  386. this->num_filtered_input_files_in_output_level -=
  387. c.num_filtered_input_files_in_output_level;
  388. this->num_output_files -= c.num_output_files;
  389. this->num_output_files_blob -= c.num_output_files_blob;
  390. this->num_input_records -= c.num_input_records;
  391. this->num_dropped_records -= c.num_dropped_records;
  392. this->num_output_records -= c.num_output_records;
  393. this->count -= c.count;
  394. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  395. for (int i = 0; i < num_of_reasons; i++) {
  396. counts[i] -= c.counts[i];
  397. }
  398. }
  399. void ResetCompactionReason(CompactionReason reason) {
  400. int num_of_reasons = static_cast<int>(CompactionReason::kNumOfReasons);
  401. assert(count == 1); // only support update one compaction reason
  402. for (int i = 0; i < num_of_reasons; i++) {
  403. counts[i] = 0;
  404. }
  405. int r = static_cast<int>(reason);
  406. assert(r >= 0 && r < num_of_reasons);
  407. counts[r] = 1;
  408. }
  409. };
  410. // Compaction internal stats, for per_key_placement compaction, it includes 2
  411. // output level stats: the last level and the proximal level.
  412. struct CompactionStatsFull {
  413. // the stats for the target primary output level
  414. CompactionStats output_level_stats;
  415. // stats for proximal level output if exist
  416. bool has_proximal_level_output = false;
  417. CompactionStats proximal_level_stats;
  418. explicit CompactionStatsFull()
  419. : output_level_stats(), proximal_level_stats() {}
  420. explicit CompactionStatsFull(CompactionReason reason, int c)
  421. : output_level_stats(reason, c), proximal_level_stats(reason, c) {}
  422. uint64_t TotalBytesWritten() const {
  423. uint64_t bytes_written = output_level_stats.bytes_written +
  424. output_level_stats.bytes_written_blob;
  425. if (has_proximal_level_output) {
  426. bytes_written += proximal_level_stats.bytes_written +
  427. proximal_level_stats.bytes_written_blob;
  428. }
  429. return bytes_written;
  430. }
  431. uint64_t DroppedRecords() {
  432. uint64_t output_records = output_level_stats.num_output_records;
  433. if (has_proximal_level_output) {
  434. output_records += proximal_level_stats.num_output_records;
  435. }
  436. if (output_level_stats.num_input_records > output_records) {
  437. return output_level_stats.num_input_records - output_records;
  438. }
  439. return 0;
  440. }
  441. void SetMicros(uint64_t val) {
  442. output_level_stats.micros = val;
  443. proximal_level_stats.micros = val;
  444. }
  445. void AddCpuMicros(uint64_t val) {
  446. output_level_stats.cpu_micros += val;
  447. proximal_level_stats.cpu_micros += val;
  448. }
  449. };
  450. // For use with CacheEntryStatsCollector
  451. struct CacheEntryRoleStats {
  452. uint64_t cache_capacity = 0;
  453. uint64_t cache_usage = 0;
  454. size_t table_size = 0;
  455. size_t occupancy = 0;
  456. std::string cache_id;
  457. std::array<uint64_t, kNumCacheEntryRoles> total_charges;
  458. std::array<size_t, kNumCacheEntryRoles> entry_counts;
  459. uint32_t collection_count = 0;
  460. uint32_t copies_of_last_collection = 0;
  461. uint64_t last_start_time_micros_ = 0;
  462. uint64_t last_end_time_micros_ = 0;
  463. uint32_t hash_seed = 0;
  464. void Clear() {
  465. // Wipe everything except collection_count
  466. uint32_t saved_collection_count = collection_count;
  467. *this = CacheEntryRoleStats();
  468. collection_count = saved_collection_count;
  469. }
  470. void BeginCollection(Cache*, SystemClock*, uint64_t start_time_micros);
  471. std::function<void(const Slice& key, Cache::ObjectPtr value, size_t charge,
  472. const Cache::CacheItemHelper* helper)>
  473. GetEntryCallback();
  474. void EndCollection(Cache*, SystemClock*, uint64_t end_time_micros);
  475. void SkippedCollection();
  476. std::string ToString(SystemClock* clock) const;
  477. void ToMap(std::map<std::string, std::string>* values,
  478. SystemClock* clock) const;
  479. private:
  480. uint64_t GetLastDurationMicros() const;
  481. };
  482. void Clear() {
  483. for (int i = 0; i < kIntStatsNumMax; i++) {
  484. db_stats_[i].store(0);
  485. }
  486. for (int i = 0; i < INTERNAL_CF_STATS_ENUM_MAX; i++) {
  487. cf_stats_count_[i] = 0;
  488. cf_stats_value_[i] = 0;
  489. }
  490. for (auto& comp_stat : comp_stats_) {
  491. comp_stat.Clear();
  492. }
  493. per_key_placement_comp_stats_.Clear();
  494. for (auto& h : file_read_latency_) {
  495. h.Clear();
  496. }
  497. blob_file_read_latency_.Clear();
  498. cf_stats_snapshot_.Clear();
  499. db_stats_snapshot_.Clear();
  500. bg_error_count_ = 0;
  501. started_at_ = clock_->NowMicros();
  502. has_cf_change_since_dump_ = true;
  503. }
  504. void AddCompactionStats(int level, Env::Priority thread_pri,
  505. const CompactionStats& stats) {
  506. comp_stats_[level].Add(stats);
  507. comp_stats_by_pri_[thread_pri].Add(stats);
  508. }
  509. void AddCompactionStats(int level, Env::Priority thread_pri,
  510. const CompactionStatsFull& comp_stats_full) {
  511. AddCompactionStats(level, thread_pri, comp_stats_full.output_level_stats);
  512. if (comp_stats_full.has_proximal_level_output) {
  513. per_key_placement_comp_stats_.Add(comp_stats_full.proximal_level_stats);
  514. }
  515. }
  516. void IncBytesMoved(int level, uint64_t amount) {
  517. comp_stats_[level].bytes_moved += amount;
  518. }
  519. void AddCFStats(InternalCFStatsType type, uint64_t value) {
  520. has_cf_change_since_dump_ = true;
  521. cf_stats_value_[type] += value;
  522. ++cf_stats_count_[type];
  523. }
  524. void IncrNumRunningCompactionSortedRuns(uint64_t value) {
  525. num_running_compaction_sorted_runs_.fetch_add(value,
  526. std::memory_order_relaxed);
  527. }
  528. void DecrNumRunningCompactionSortedRuns(uint64_t value) {
  529. num_running_compaction_sorted_runs_.fetch_sub(value,
  530. std::memory_order_relaxed);
  531. }
  532. uint64_t NumRunningCompactionSortedRuns() {
  533. return num_running_compaction_sorted_runs_.load(std::memory_order_relaxed);
  534. }
  535. void AddDBStats(InternalDBStatsType type, uint64_t value,
  536. bool concurrent = false) {
  537. auto& v = db_stats_[type];
  538. if (concurrent) {
  539. v.fetch_add(value, std::memory_order_relaxed);
  540. } else {
  541. v.store(v.load(std::memory_order_relaxed) + value,
  542. std::memory_order_relaxed);
  543. }
  544. }
  545. uint64_t GetDBStats(InternalDBStatsType type) {
  546. return db_stats_[type].load(std::memory_order_relaxed);
  547. }
  548. HistogramImpl* GetFileReadHist(int level) {
  549. return &file_read_latency_[level];
  550. }
  551. HistogramImpl* GetBlobFileReadHist() { return &blob_file_read_latency_; }
  552. uint64_t GetBackgroundErrorCount() const { return bg_error_count_; }
  553. uint64_t BumpAndGetBackgroundErrorCount() { return ++bg_error_count_; }
  554. bool GetStringProperty(const DBPropertyInfo& property_info,
  555. const Slice& property, std::string* value);
  556. bool GetMapProperty(const DBPropertyInfo& property_info,
  557. const Slice& property,
  558. std::map<std::string, std::string>* value);
  559. bool GetIntProperty(const DBPropertyInfo& property_info, uint64_t* value,
  560. DBImpl* db);
  561. bool GetIntPropertyOutOfMutex(const DBPropertyInfo& property_info,
  562. Version* version, uint64_t* value);
  563. // Unless there is a recent enough collection of the stats, collect and
  564. // saved new cache entry stats. If `foreground`, require data to be more
  565. // recent to skip re-collection.
  566. //
  567. // This should only be called while NOT holding the DB mutex.
  568. void CollectCacheEntryStats(bool foreground);
  569. const uint64_t* TEST_GetCFStatsValue() const { return cf_stats_value_; }
  570. const std::vector<CompactionStats>& TEST_GetCompactionStats() const {
  571. return comp_stats_;
  572. }
  573. const CompactionStats& TEST_GetPerKeyPlacementCompactionStats() const {
  574. return per_key_placement_comp_stats_;
  575. }
  576. void TEST_GetCacheEntryRoleStats(CacheEntryRoleStats* stats, bool foreground);
  577. // Store a mapping from the user-facing DB::Properties string to our
  578. // DBPropertyInfo struct used internally for retrieving properties.
  579. static const UnorderedMap<std::string, DBPropertyInfo> ppt_name_to_info;
  580. static const std::string kPeriodicCFStats;
  581. private:
  582. void DumpDBMapStats(std::map<std::string, std::string>* db_stats);
  583. void DumpDBStats(std::string* value);
  584. void DumpDBMapStatsWriteStall(std::map<std::string, std::string>* value);
  585. void DumpDBStatsWriteStall(std::string* value);
  586. void DumpCFMapStats(std::map<std::string, std::string>* cf_stats);
  587. void DumpCFMapStats(
  588. const VersionStorageInfo* vstorage,
  589. std::map<int, std::map<LevelStatType, double>>* level_stats,
  590. CompactionStats* compaction_stats_sum);
  591. void DumpCFMapStatsByPriority(
  592. std::map<int, std::map<LevelStatType, double>>* priorities_stats);
  593. void DumpCFStats(std::string* value);
  594. // if is_periodic = true, it is an internal call by RocksDB periodically to
  595. // dump the status.
  596. void DumpCFStatsNoFileHistogram(bool is_periodic, std::string* value);
  597. // if is_periodic = true, it is an internal call by RocksDB periodically to
  598. // dump the status.
  599. void DumpCFFileHistogram(std::string* value);
  600. void DumpCFMapStatsWriteStall(std::map<std::string, std::string>* value);
  601. void DumpCFStatsWriteStall(std::string* value,
  602. uint64_t* total_stall_count = nullptr);
  603. Cache* GetBlockCacheForStats();
  604. Cache* GetBlobCacheForStats();
  605. // Per-DB stats
  606. std::atomic<uint64_t> db_stats_[kIntStatsNumMax];
  607. // Per-ColumnFamily stats
  608. uint64_t cf_stats_value_[INTERNAL_CF_STATS_ENUM_MAX];
  609. uint64_t cf_stats_count_[INTERNAL_CF_STATS_ENUM_MAX];
  610. // Initialize/reference the collector in constructor so that we don't need
  611. // additional synchronization in InternalStats, relying on synchronization
  612. // in CacheEntryStatsCollector::GetStats. This collector is pinned in cache
  613. // (through a shared_ptr) so that it does not get immediately ejected from
  614. // a full cache, which would force a re-scan on the next GetStats.
  615. std::shared_ptr<CacheEntryStatsCollector<CacheEntryRoleStats>>
  616. cache_entry_stats_collector_;
  617. // Per-column family and level compaction statistics, including flush and file
  618. // ingestion. These are treated as compactions to L0 or the level where the
  619. // file was ingested.
  620. std::vector<CompactionStats> comp_stats_;
  621. std::vector<CompactionStats> comp_stats_by_pri_;
  622. CompactionStats per_key_placement_comp_stats_;
  623. std::vector<HistogramImpl> file_read_latency_;
  624. HistogramImpl blob_file_read_latency_;
  625. bool has_cf_change_since_dump_;
  626. // How many periods of no change since the last time stats are dumped for
  627. // a periodic dump.
  628. int no_cf_change_period_since_dump_ = 0;
  629. uint64_t last_histogram_num = std::numeric_limits<uint64_t>::max();
  630. static const int kMaxNoChangePeriodSinceDump;
  631. // Used to compute per-interval statistics
  632. struct CFStatsSnapshot {
  633. // ColumnFamily-level stats
  634. CompactionStats comp_stats;
  635. uint64_t ingest_bytes_flush; // Bytes written to L0 (Flush)
  636. uint64_t stall_count; // Total counts of CF-scope write stalls
  637. // Stats from compaction jobs - bytes written, bytes read, duration.
  638. uint64_t compact_bytes_write;
  639. uint64_t compact_bytes_read;
  640. uint64_t compact_micros;
  641. double seconds_up;
  642. // AddFile specific stats
  643. uint64_t ingest_bytes_addfile; // Total Bytes ingested
  644. uint64_t ingest_files_addfile; // Total number of files ingested
  645. uint64_t ingest_l0_files_addfile; // Total number of files ingested to L0
  646. uint64_t ingest_keys_addfile; // Total number of keys ingested
  647. CFStatsSnapshot()
  648. : ingest_bytes_flush(0),
  649. stall_count(0),
  650. compact_bytes_write(0),
  651. compact_bytes_read(0),
  652. compact_micros(0),
  653. seconds_up(0),
  654. ingest_bytes_addfile(0),
  655. ingest_files_addfile(0),
  656. ingest_l0_files_addfile(0),
  657. ingest_keys_addfile(0) {}
  658. void Clear() {
  659. comp_stats.Clear();
  660. ingest_bytes_flush = 0;
  661. stall_count = 0;
  662. compact_bytes_write = 0;
  663. compact_bytes_read = 0;
  664. compact_micros = 0;
  665. seconds_up = 0;
  666. ingest_bytes_addfile = 0;
  667. ingest_files_addfile = 0;
  668. ingest_l0_files_addfile = 0;
  669. ingest_keys_addfile = 0;
  670. }
  671. } cf_stats_snapshot_;
  672. struct DBStatsSnapshot {
  673. // DB-level stats
  674. uint64_t ingest_bytes; // Bytes written by user
  675. uint64_t wal_bytes; // Bytes written to WAL
  676. uint64_t wal_synced; // Number of times WAL is synced
  677. uint64_t write_with_wal; // Number of writes that request WAL
  678. // These count the number of writes processed by the calling thread or
  679. // another thread.
  680. uint64_t write_other;
  681. uint64_t write_self;
  682. // Total number of keys written. write_self and write_other measure number
  683. // of write requests written, Each of the write request can contain updates
  684. // to multiple keys. num_keys_written is total number of keys updated by all
  685. // those writes.
  686. uint64_t num_keys_written;
  687. // Total time writes delayed by stalls.
  688. uint64_t write_stall_micros;
  689. double seconds_up;
  690. DBStatsSnapshot()
  691. : ingest_bytes(0),
  692. wal_bytes(0),
  693. wal_synced(0),
  694. write_with_wal(0),
  695. write_other(0),
  696. write_self(0),
  697. num_keys_written(0),
  698. write_stall_micros(0),
  699. seconds_up(0) {}
  700. void Clear() {
  701. ingest_bytes = 0;
  702. wal_bytes = 0;
  703. wal_synced = 0;
  704. write_with_wal = 0;
  705. write_other = 0;
  706. write_self = 0;
  707. num_keys_written = 0;
  708. write_stall_micros = 0;
  709. seconds_up = 0;
  710. }
  711. } db_stats_snapshot_;
  712. // Handler functions for getting property values. They use "value" as a value-
  713. // result argument, and return true upon successfully setting "value".
  714. bool HandleNumFilesAtLevel(std::string* value, Slice suffix);
  715. bool HandleCompressionRatioAtLevelPrefix(std::string* value, Slice suffix);
  716. bool HandleLevelStats(std::string* value, Slice suffix);
  717. bool HandleStats(std::string* value, Slice suffix);
  718. bool HandleCFMapStats(std::map<std::string, std::string>* compaction_stats,
  719. Slice suffix);
  720. bool HandleCFStats(std::string* value, Slice suffix);
  721. bool HandleCFStatsNoFileHistogram(std::string* value, Slice suffix);
  722. bool HandleCFFileHistogram(std::string* value, Slice suffix);
  723. bool HandleCFStatsPeriodic(std::string* value, Slice suffix);
  724. bool HandleCFWriteStallStats(std::string* value, Slice suffix);
  725. bool HandleCFWriteStallStatsMap(std::map<std::string, std::string>* values,
  726. Slice suffix);
  727. bool HandleDBMapStats(std::map<std::string, std::string>* compaction_stats,
  728. Slice suffix);
  729. bool HandleDBStats(std::string* value, Slice suffix);
  730. bool HandleDBWriteStallStats(std::string* value, Slice suffix);
  731. bool HandleDBWriteStallStatsMap(std::map<std::string, std::string>* values,
  732. Slice suffix);
  733. bool HandleSsTables(std::string* value, Slice suffix);
  734. bool HandleAggregatedTableProperties(std::string* value, Slice suffix);
  735. bool HandleAggregatedTablePropertiesAtLevel(std::string* value, Slice suffix);
  736. bool HandleAggregatedTablePropertiesMap(
  737. std::map<std::string, std::string>* values, Slice suffix);
  738. bool HandleAggregatedTablePropertiesAtLevelMap(
  739. std::map<std::string, std::string>* values, Slice suffix);
  740. bool HandleNumImmutableMemTable(uint64_t* value, DBImpl* db,
  741. Version* version);
  742. bool HandleNumImmutableMemTableFlushed(uint64_t* value, DBImpl* db,
  743. Version* version);
  744. bool HandleMemTableFlushPending(uint64_t* value, DBImpl* db,
  745. Version* version);
  746. bool HandleNumRunningFlushes(uint64_t* value, DBImpl* db, Version* version);
  747. bool HandleCompactionPending(uint64_t* value, DBImpl* db, Version* version);
  748. bool HandleNumRunningCompactions(uint64_t* value, DBImpl* db,
  749. Version* version);
  750. bool HandleNumRunningCompactionSortedRuns(uint64_t* value, DBImpl* db,
  751. Version* version);
  752. bool HandleBackgroundErrors(uint64_t* value, DBImpl* db, Version* version);
  753. bool HandleCurSizeActiveMemTable(uint64_t* value, DBImpl* db,
  754. Version* version);
  755. bool HandleCurSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version);
  756. bool HandleSizeAllMemTables(uint64_t* value, DBImpl* db, Version* version);
  757. bool HandleNumEntriesActiveMemTable(uint64_t* value, DBImpl* db,
  758. Version* version);
  759. bool HandleNumEntriesImmMemTables(uint64_t* value, DBImpl* db,
  760. Version* version);
  761. bool HandleNumDeletesActiveMemTable(uint64_t* value, DBImpl* db,
  762. Version* version);
  763. bool HandleNumDeletesImmMemTables(uint64_t* value, DBImpl* db,
  764. Version* version);
  765. bool HandleEstimateNumKeys(uint64_t* value, DBImpl* db, Version* version);
  766. bool HandleNumSnapshots(uint64_t* value, DBImpl* db, Version* version);
  767. bool HandleOldestSnapshotTime(uint64_t* value, DBImpl* db, Version* version);
  768. bool HandleOldestSnapshotSequence(uint64_t* value, DBImpl* db,
  769. Version* version);
  770. bool HandleNumLiveVersions(uint64_t* value, DBImpl* db, Version* version);
  771. bool HandleCurrentSuperVersionNumber(uint64_t* value, DBImpl* db,
  772. Version* version);
  773. bool HandleIsFileDeletionsEnabled(uint64_t* value, DBImpl* db,
  774. Version* version);
  775. bool HandleBaseLevel(uint64_t* value, DBImpl* db, Version* version);
  776. bool HandleTotalSstFilesSize(uint64_t* value, DBImpl* db, Version* version);
  777. bool HandleLiveSstFilesSize(uint64_t* value, DBImpl* db, Version* version);
  778. bool HandleObsoleteSstFilesSize(uint64_t* value, DBImpl* db,
  779. Version* version);
  780. bool HandleEstimatePendingCompactionBytes(uint64_t* value, DBImpl* db,
  781. Version* version);
  782. bool HandleEstimateTableReadersMem(uint64_t* value, DBImpl* db,
  783. Version* version);
  784. bool HandleEstimateLiveDataSize(uint64_t* value, DBImpl* db,
  785. Version* version);
  786. bool HandleMinLogNumberToKeep(uint64_t* value, DBImpl* db, Version* version);
  787. bool HandleMinObsoleteSstNumberToKeep(uint64_t* value, DBImpl* db,
  788. Version* version);
  789. bool HandleActualDelayedWriteRate(uint64_t* value, DBImpl* db,
  790. Version* version);
  791. bool HandleIsWriteStopped(uint64_t* value, DBImpl* db, Version* version);
  792. bool HandleEstimateOldestKeyTime(uint64_t* value, DBImpl* db,
  793. Version* version);
  794. bool HandleBlockCacheCapacity(uint64_t* value, DBImpl* db, Version* version);
  795. bool HandleBlockCacheUsage(uint64_t* value, DBImpl* db, Version* version);
  796. bool HandleBlockCachePinnedUsage(uint64_t* value, DBImpl* db,
  797. Version* version);
  798. bool HandleBlockCacheEntryStatsInternal(std::string* value, bool fast);
  799. bool HandleBlockCacheEntryStatsMapInternal(
  800. std::map<std::string, std::string>* values, bool fast);
  801. bool HandleBlockCacheEntryStats(std::string* value, Slice suffix);
  802. bool HandleBlockCacheEntryStatsMap(std::map<std::string, std::string>* values,
  803. Slice suffix);
  804. bool HandleFastBlockCacheEntryStats(std::string* value, Slice suffix);
  805. bool HandleFastBlockCacheEntryStatsMap(
  806. std::map<std::string, std::string>* values, Slice suffix);
  807. bool HandleLiveSstFilesSizeAtTemperature(std::string* value, Slice suffix);
  808. bool HandleNumBlobFiles(uint64_t* value, DBImpl* db, Version* version);
  809. bool HandleBlobStats(std::string* value, Slice suffix);
  810. bool HandleTotalBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
  811. bool HandleLiveBlobFileSize(uint64_t* value, DBImpl* db, Version* version);
  812. bool HandleLiveBlobFileGarbageSize(uint64_t* value, DBImpl* db,
  813. Version* version);
  814. bool HandleBlobCacheCapacity(uint64_t* value, DBImpl* db, Version* version);
  815. bool HandleBlobCacheUsage(uint64_t* value, DBImpl* db, Version* version);
  816. bool HandleBlobCachePinnedUsage(uint64_t* value, DBImpl* db,
  817. Version* version);
  818. // Total number of background errors encountered. Every time a flush task
  819. // or compaction task fails, this counter is incremented. The failure can
  820. // be caused by any possible reason, including file system errors, out of
  821. // resources, or input file corruption. Failing when retrying the same flush
  822. // or compaction will cause the counter to increase too.
  823. uint64_t bg_error_count_;
  824. // This is a rolling count of the number of sorted runs being processed by
  825. // currently running compactions. Other metrics are only incremented, but this
  826. // metric is also decremented. Additionally, we also do not want to reset this
  827. // count to zero at a periodic interval.
  828. std::atomic<uint64_t> num_running_compaction_sorted_runs_;
  829. const int number_levels_;
  830. SystemClock* clock_;
  831. ColumnFamilyData* cfd_;
  832. uint64_t started_at_;
  833. };
  834. // IntPropertyAggregator aggregates an integer property across all column
  835. // families.
  836. class IntPropertyAggregator {
  837. public:
  838. IntPropertyAggregator() {}
  839. virtual ~IntPropertyAggregator() {}
  840. IntPropertyAggregator(const IntPropertyAggregator&) = delete;
  841. void operator=(const IntPropertyAggregator&) = delete;
  842. // Add a column family's property value to the aggregator.
  843. virtual void Add(ColumnFamilyData* cfd, uint64_t value) = 0;
  844. // Get the aggregated value.
  845. virtual uint64_t Aggregate() const = 0;
  846. };
  847. std::unique_ptr<IntPropertyAggregator> CreateIntPropertyAggregator(
  848. const Slice& property);
  849. } // namespace ROCKSDB_NAMESPACE