block_cache_trace_analyzer.cc 93 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #ifndef ROCKSDB_LITE
  6. #ifdef GFLAGS
  7. #include "tools/block_cache_analyzer/block_cache_trace_analyzer.h"
  8. #include <algorithm>
  9. #include <cinttypes>
  10. #include <cstdio>
  11. #include <cstdlib>
  12. #include <fstream>
  13. #include <iomanip>
  14. #include <iostream>
  15. #include <memory>
  16. #include <random>
  17. #include <sstream>
  18. #include "monitoring/histogram.h"
  19. #include "util/gflags_compat.h"
  20. #include "util/string_util.h"
  21. using GFLAGS_NAMESPACE::ParseCommandLineFlags;
  22. DEFINE_string(block_cache_trace_path, "", "The trace file path.");
  23. DEFINE_bool(is_block_cache_human_readable_trace, false,
  24. "Is the trace file provided for analysis generated by running "
  25. "block_cache_trace_analyzer with "
  26. "FLAGS_human_readable_trace_file_path is specified.");
  27. DEFINE_string(
  28. block_cache_sim_config_path, "",
  29. "The config file path. One cache configuration per line. The format of a "
  30. "cache configuration is "
  31. "cache_name,num_shard_bits,ghost_capacity,cache_capacity_1,...,cache_"
  32. "capacity_N. Supported cache names are lru, lru_priority, lru_hybrid, and "
  33. "lru_hybrid_no_insert_on_row_miss. User may also add a prefix 'ghost_' to "
  34. "a cache_name to add a ghost cache in front of the real cache. "
  35. "ghost_capacity and cache_capacity can be xK, xM or xG where x is a "
  36. "positive number.");
  37. DEFINE_int32(block_cache_trace_downsample_ratio, 1,
  38. "The trace collected accesses on one in every "
  39. "block_cache_trace_downsample_ratio blocks. We scale "
  40. "down the simulated cache size by this ratio.");
  41. DEFINE_bool(print_block_size_stats, false,
  42. "Print block size distribution and the distribution break down by "
  43. "block type and column family.");
  44. DEFINE_bool(print_access_count_stats, false,
  45. "Print access count distribution and the distribution break down "
  46. "by block type and column family.");
  47. DEFINE_bool(print_data_block_access_count_stats, false,
  48. "Print data block accesses by user Get and Multi-Get.");
  49. DEFINE_int32(cache_sim_warmup_seconds, 0,
  50. "The number of seconds to warmup simulated caches. The hit/miss "
  51. "counters are reset after the warmup completes.");
  52. DEFINE_int32(analyze_bottom_k_access_count_blocks, 0,
  53. "Print out detailed access information for blocks with their "
  54. "number of accesses are the bottom k among all blocks.");
  55. DEFINE_int32(analyze_top_k_access_count_blocks, 0,
  56. "Print out detailed access information for blocks with their "
  57. "number of accesses are the top k among all blocks.");
  58. DEFINE_string(block_cache_analysis_result_dir, "",
  59. "The directory that saves block cache analysis results.");
  60. DEFINE_string(
  61. timeline_labels, "",
  62. "Group the number of accesses per block per second using these labels. "
  63. "Possible labels are a combination of the following: cf (column family), "
  64. "sst, level, bt (block type), caller, block. For example, label \"cf_bt\" "
  65. "means the number of acccess per second is grouped by unique pairs of "
  66. "\"cf_bt\". A label \"all\" contains the aggregated number of accesses per "
  67. "second across all possible labels.");
  68. DEFINE_string(reuse_distance_labels, "",
  69. "Group the reuse distance of a block using these labels. Reuse "
  70. "distance is defined as the cumulated size of unique blocks read "
  71. "between two consecutive accesses on the same block.");
  72. DEFINE_string(
  73. reuse_distance_buckets, "",
  74. "Group blocks by their reuse distances given these buckets. For "
  75. "example, if 'reuse_distance_buckets' is '1K,1M,1G', we will "
  76. "create four buckets. The first three buckets contain the number of "
  77. "blocks with reuse distance less than 1KB, between 1K and 1M, between 1M "
  78. "and 1G, respectively. The last bucket contains the number of blocks with "
  79. "reuse distance larger than 1G. ");
  80. DEFINE_string(
  81. reuse_interval_labels, "",
  82. "Group the reuse interval of a block using these labels. Reuse "
  83. "interval is defined as the time between two consecutive accesses "
  84. "on the same block.");
  85. DEFINE_string(
  86. reuse_interval_buckets, "",
  87. "Group blocks by their reuse interval given these buckets. For "
  88. "example, if 'reuse_distance_buckets' is '1,10,100', we will "
  89. "create four buckets. The first three buckets contain the number of "
  90. "blocks with reuse interval less than 1 second, between 1 second and 10 "
  91. "seconds, between 10 seconds and 100 seconds, respectively. The last "
  92. "bucket contains the number of blocks with reuse interval longer than 100 "
  93. "seconds.");
  94. DEFINE_string(
  95. reuse_lifetime_labels, "",
  96. "Group the reuse lifetime of a block using these labels. Reuse "
  97. "lifetime is defined as the time interval between the first access on a "
  98. "block and the last access on the same block. For blocks that are only "
  99. "accessed once, its lifetime is set to kMaxUint64.");
  100. DEFINE_string(
  101. reuse_lifetime_buckets, "",
  102. "Group blocks by their reuse lifetime given these buckets. For "
  103. "example, if 'reuse_lifetime_buckets' is '1,10,100', we will "
  104. "create four buckets. The first three buckets contain the number of "
  105. "blocks with reuse lifetime less than 1 second, between 1 second and 10 "
  106. "seconds, between 10 seconds and 100 seconds, respectively. The last "
  107. "bucket contains the number of blocks with reuse lifetime longer than 100 "
  108. "seconds.");
  109. DEFINE_string(
  110. analyze_callers, "",
  111. "The list of callers to perform a detailed analysis on. If speicfied, the "
  112. "analyzer will output a detailed percentage of accesses for each caller "
  113. "break down by column family, level, and block type. A list of available "
  114. "callers are: Get, MultiGet, Iterator, ApproximateSize, VerifyChecksum, "
  115. "SSTDumpTool, ExternalSSTIngestion, Repair, Prefetch, Compaction, "
  116. "CompactionRefill, Flush, SSTFileReader, Uncategorized.");
  117. DEFINE_string(access_count_buckets, "",
  118. "Group number of blocks by their access count given these "
  119. "buckets. If specified, the analyzer will output a detailed "
  120. "analysis on the number of blocks grouped by their access count "
  121. "break down by block type and column family.");
  122. DEFINE_int32(analyze_blocks_reuse_k_reuse_window, 0,
  123. "Analyze the percentage of blocks that are accessed in the "
  124. "[k, 2*k] seconds are accessed again in the next [2*k, 3*k], "
  125. "[3*k, 4*k],...,[k*(n-1), k*n] seconds. ");
  126. DEFINE_string(analyze_get_spatial_locality_labels, "",
  127. "Group data blocks using these labels.");
  128. DEFINE_string(analyze_get_spatial_locality_buckets, "",
  129. "Group data blocks by their statistics using these buckets.");
  130. DEFINE_string(skew_labels, "",
  131. "Group the access count of a block using these labels.");
  132. DEFINE_string(skew_buckets, "", "Group the skew labels using these buckets.");
  133. DEFINE_bool(mrc_only, false,
  134. "Evaluate alternative cache policies only. When this flag is true, "
  135. "the analyzer does NOT maintain states of each block in memory for "
  136. "analysis. It only feeds the accesses into the cache simulators.");
  137. DEFINE_string(
  138. analyze_correlation_coefficients_labels, "",
  139. "Analyze the correlation coefficients of features such as number of past "
  140. "accesses with regard to the number of accesses till the next access.");
  141. DEFINE_int32(analyze_correlation_coefficients_max_number_of_values, 1000000,
  142. "The maximum number of values for a feature. If the number of "
  143. "values for a feature is larger than this max, it randomly "
  144. "selects 'max' number of values.");
  145. DEFINE_string(human_readable_trace_file_path, "",
  146. "The filt path that saves human readable access records.");
  147. namespace ROCKSDB_NAMESPACE {
  148. namespace {
  149. const std::string kMissRatioCurveFileName = "mrc";
  150. const std::string kGroupbyBlock = "block";
  151. const std::string kGroupbyTable = "table";
  152. const std::string kGroupbyColumnFamily = "cf";
  153. const std::string kGroupbySSTFile = "sst";
  154. const std::string kGroupbyBlockType = "bt";
  155. const std::string kGroupbyCaller = "caller";
  156. const std::string kGroupbyLevel = "level";
  157. const std::string kGroupbyAll = "all";
  158. const std::set<std::string> kGroupbyLabels{
  159. kGroupbyBlock, kGroupbyColumnFamily, kGroupbySSTFile, kGroupbyLevel,
  160. kGroupbyBlockType, kGroupbyCaller, kGroupbyAll};
  161. const std::string kSupportedCacheNames =
  162. " lru ghost_lru lru_priority ghost_lru_priority lru_hybrid "
  163. "ghost_lru_hybrid lru_hybrid_no_insert_on_row_miss "
  164. "ghost_lru_hybrid_no_insert_on_row_miss ";
  165. // The suffix for the generated csv files.
  166. const std::string kFileNameSuffixMissRatioTimeline = "miss_ratio_timeline";
  167. const std::string kFileNameSuffixMissTimeline = "miss_timeline";
  168. const std::string kFileNameSuffixSkew = "skewness";
  169. const std::string kFileNameSuffixAccessTimeline = "access_timeline";
  170. const std::string kFileNameSuffixCorrelation = "correlation_input";
  171. const std::string kFileNameSuffixAvgReuseIntervalNaccesses =
  172. "avg_reuse_interval_naccesses";
  173. const std::string kFileNameSuffixAvgReuseInterval = "avg_reuse_interval";
  174. const std::string kFileNameSuffixReuseInterval = "access_reuse_interval";
  175. const std::string kFileNameSuffixReuseLifetime = "reuse_lifetime";
  176. const std::string kFileNameSuffixAccessReuseBlocksTimeline =
  177. "reuse_blocks_timeline";
  178. const std::string kFileNameSuffixPercentOfAccessSummary =
  179. "percentage_of_accesses_summary";
  180. const std::string kFileNameSuffixPercentRefKeys = "percent_ref_keys";
  181. const std::string kFileNameSuffixPercentDataSizeOnRefKeys =
  182. "percent_data_size_on_ref_keys";
  183. const std::string kFileNameSuffixPercentAccessesOnRefKeys =
  184. "percent_accesses_on_ref_keys";
  185. const std::string kFileNameSuffixAccessCountSummary = "access_count_summary";
  186. std::string block_type_to_string(TraceType type) {
  187. switch (type) {
  188. case kBlockTraceFilterBlock:
  189. return "Filter";
  190. case kBlockTraceDataBlock:
  191. return "Data";
  192. case kBlockTraceIndexBlock:
  193. return "Index";
  194. case kBlockTraceRangeDeletionBlock:
  195. return "RangeDeletion";
  196. case kBlockTraceUncompressionDictBlock:
  197. return "UncompressionDict";
  198. default:
  199. break;
  200. }
  201. // This cannot happen.
  202. return "InvalidType";
  203. }
  204. std::string caller_to_string(TableReaderCaller caller) {
  205. switch (caller) {
  206. case kUserGet:
  207. return "Get";
  208. case kUserMultiGet:
  209. return "MultiGet";
  210. case kUserIterator:
  211. return "Iterator";
  212. case kUserApproximateSize:
  213. return "ApproximateSize";
  214. case kUserVerifyChecksum:
  215. return "VerifyChecksum";
  216. case kSSTDumpTool:
  217. return "SSTDumpTool";
  218. case kExternalSSTIngestion:
  219. return "ExternalSSTIngestion";
  220. case kRepair:
  221. return "Repair";
  222. case kPrefetch:
  223. return "Prefetch";
  224. case kCompaction:
  225. return "Compaction";
  226. case kCompactionRefill:
  227. return "CompactionRefill";
  228. case kFlush:
  229. return "Flush";
  230. case kSSTFileReader:
  231. return "SSTFileReader";
  232. case kUncategorized:
  233. return "Uncategorized";
  234. default:
  235. break;
  236. }
  237. // This cannot happen.
  238. return "InvalidCaller";
  239. }
  240. TableReaderCaller string_to_caller(std::string caller_str) {
  241. if (caller_str == "Get") {
  242. return kUserGet;
  243. } else if (caller_str == "MultiGet") {
  244. return kUserMultiGet;
  245. } else if (caller_str == "Iterator") {
  246. return kUserIterator;
  247. } else if (caller_str == "ApproximateSize") {
  248. return kUserApproximateSize;
  249. } else if (caller_str == "VerifyChecksum") {
  250. return kUserVerifyChecksum;
  251. } else if (caller_str == "SSTDumpTool") {
  252. return kSSTDumpTool;
  253. } else if (caller_str == "ExternalSSTIngestion") {
  254. return kExternalSSTIngestion;
  255. } else if (caller_str == "Repair") {
  256. return kRepair;
  257. } else if (caller_str == "Prefetch") {
  258. return kPrefetch;
  259. } else if (caller_str == "Compaction") {
  260. return kCompaction;
  261. } else if (caller_str == "CompactionRefill") {
  262. return kCompactionRefill;
  263. } else if (caller_str == "Flush") {
  264. return kFlush;
  265. } else if (caller_str == "SSTFileReader") {
  266. return kSSTFileReader;
  267. } else if (caller_str == "Uncategorized") {
  268. return kUncategorized;
  269. }
  270. return TableReaderCaller::kMaxBlockCacheLookupCaller;
  271. }
  272. bool is_user_access(TableReaderCaller caller) {
  273. switch (caller) {
  274. case kUserGet:
  275. case kUserMultiGet:
  276. case kUserIterator:
  277. case kUserApproximateSize:
  278. case kUserVerifyChecksum:
  279. return true;
  280. default:
  281. break;
  282. }
  283. return false;
  284. }
  285. const char kBreakLine[] =
  286. "***************************************************************\n";
  287. void print_break_lines(uint32_t num_break_lines) {
  288. for (uint32_t i = 0; i < num_break_lines; i++) {
  289. fprintf(stdout, kBreakLine);
  290. }
  291. }
  292. double percent(uint64_t numerator, uint64_t denomenator) {
  293. if (denomenator == 0) {
  294. return -1;
  295. }
  296. return static_cast<double>(numerator * 100.0 / denomenator);
  297. }
  298. std::map<uint64_t, uint64_t> adjust_time_unit(
  299. const std::map<uint64_t, uint64_t>& time_stats, uint64_t time_unit) {
  300. if (time_unit == 1) {
  301. return time_stats;
  302. }
  303. std::map<uint64_t, uint64_t> adjusted_time_stats;
  304. for (auto const& time : time_stats) {
  305. adjusted_time_stats[static_cast<uint64_t>(time.first / time_unit)] +=
  306. time.second;
  307. }
  308. return adjusted_time_stats;
  309. }
  310. } // namespace
  311. void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
  312. if (!cache_simulator_) {
  313. return;
  314. }
  315. if (output_dir_.empty()) {
  316. return;
  317. }
  318. uint64_t trace_duration =
  319. trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
  320. uint64_t total_accesses = access_sequence_number_;
  321. const std::string output_miss_ratio_curve_path =
  322. output_dir_ + "/" + std::to_string(trace_duration) + "_" +
  323. std::to_string(total_accesses) + "_" + kMissRatioCurveFileName;
  324. std::ofstream out(output_miss_ratio_curve_path);
  325. if (!out.is_open()) {
  326. return;
  327. }
  328. // Write header.
  329. const std::string header =
  330. "cache_name,num_shard_bits,ghost_capacity,capacity,miss_ratio,total_"
  331. "accesses";
  332. out << header << std::endl;
  333. for (auto const& config_caches : cache_simulator_->sim_caches()) {
  334. const CacheConfiguration& config = config_caches.first;
  335. for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
  336. double miss_ratio =
  337. config_caches.second[i]->miss_ratio_stats().miss_ratio();
  338. // Write the body.
  339. out << config.cache_name;
  340. out << ",";
  341. out << config.num_shard_bits;
  342. out << ",";
  343. out << config.ghost_cache_capacity;
  344. out << ",";
  345. out << config.cache_capacities[i];
  346. out << ",";
  347. out << std::fixed << std::setprecision(4) << miss_ratio;
  348. out << ",";
  349. out << config_caches.second[i]->miss_ratio_stats().total_accesses();
  350. out << std::endl;
  351. }
  352. }
  353. out.close();
  354. }
  355. void BlockCacheTraceAnalyzer::UpdateFeatureVectors(
  356. const std::vector<uint64_t>& access_sequence_number_timeline,
  357. const std::vector<uint64_t>& access_timeline, const std::string& label,
  358. std::map<std::string, Features>* label_features,
  359. std::map<std::string, Predictions>* label_predictions) const {
  360. if (access_sequence_number_timeline.empty() || access_timeline.empty()) {
  361. return;
  362. }
  363. assert(access_timeline.size() == access_sequence_number_timeline.size());
  364. uint64_t prev_access_sequence_number = access_sequence_number_timeline[0];
  365. uint64_t prev_access_timestamp = access_timeline[0];
  366. for (uint32_t i = 0; i < access_sequence_number_timeline.size(); i++) {
  367. uint64_t num_accesses_since_last_access =
  368. access_sequence_number_timeline[i] - prev_access_sequence_number;
  369. uint64_t elapsed_time_since_last_access =
  370. access_timeline[i] - prev_access_timestamp;
  371. prev_access_sequence_number = access_sequence_number_timeline[i];
  372. prev_access_timestamp = access_timeline[i];
  373. if (i < access_sequence_number_timeline.size() - 1) {
  374. (*label_features)[label].num_accesses_since_last_access.push_back(
  375. num_accesses_since_last_access);
  376. (*label_features)[label].num_past_accesses.push_back(i);
  377. (*label_features)[label].elapsed_time_since_last_access.push_back(
  378. elapsed_time_since_last_access);
  379. }
  380. if (i >= 1) {
  381. (*label_predictions)[label].num_accesses_till_next_access.push_back(
  382. num_accesses_since_last_access);
  383. (*label_predictions)[label].elapsed_time_till_next_access.push_back(
  384. elapsed_time_since_last_access);
  385. }
  386. }
  387. }
  388. void BlockCacheTraceAnalyzer::WriteMissRatioTimeline(uint64_t time_unit) const {
  389. if (!cache_simulator_ || output_dir_.empty()) {
  390. return;
  391. }
  392. std::map<uint64_t, std::map<std::string, std::map<uint64_t, double>>>
  393. cs_name_timeline;
  394. uint64_t start_time = port::kMaxUint64;
  395. uint64_t end_time = 0;
  396. const std::map<uint64_t, uint64_t>& trace_num_misses =
  397. adjust_time_unit(miss_ratio_stats_.num_misses_timeline(), time_unit);
  398. const std::map<uint64_t, uint64_t>& trace_num_accesses =
  399. adjust_time_unit(miss_ratio_stats_.num_accesses_timeline(), time_unit);
  400. assert(trace_num_misses.size() == trace_num_accesses.size());
  401. for (auto const& num_miss : trace_num_misses) {
  402. uint64_t time = num_miss.first;
  403. start_time = std::min(start_time, time);
  404. end_time = std::max(end_time, time);
  405. uint64_t miss = num_miss.second;
  406. auto it = trace_num_accesses.find(time);
  407. assert(it != trace_num_accesses.end());
  408. uint64_t access = it->second;
  409. cs_name_timeline[port::kMaxUint64]["trace"][time] = percent(miss, access);
  410. }
  411. for (auto const& config_caches : cache_simulator_->sim_caches()) {
  412. const CacheConfiguration& config = config_caches.first;
  413. std::string cache_label = config.cache_name + "-" +
  414. std::to_string(config.num_shard_bits) + "-" +
  415. std::to_string(config.ghost_cache_capacity);
  416. for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
  417. const std::map<uint64_t, uint64_t>& num_misses = adjust_time_unit(
  418. config_caches.second[i]->miss_ratio_stats().num_misses_timeline(),
  419. time_unit);
  420. const std::map<uint64_t, uint64_t>& num_accesses = adjust_time_unit(
  421. config_caches.second[i]->miss_ratio_stats().num_accesses_timeline(),
  422. time_unit);
  423. assert(num_misses.size() == num_accesses.size());
  424. for (auto const& num_miss : num_misses) {
  425. uint64_t time = num_miss.first;
  426. start_time = std::min(start_time, time);
  427. end_time = std::max(end_time, time);
  428. uint64_t miss = num_miss.second;
  429. auto it = num_accesses.find(time);
  430. assert(it != num_accesses.end());
  431. uint64_t access = it->second;
  432. cs_name_timeline[config.cache_capacities[i]][cache_label][time] =
  433. percent(miss, access);
  434. }
  435. }
  436. }
  437. for (auto const& it : cs_name_timeline) {
  438. const std::string output_miss_ratio_timeline_path =
  439. output_dir_ + "/" + std::to_string(it.first) + "_" +
  440. std::to_string(time_unit) + "_" + kFileNameSuffixMissRatioTimeline;
  441. std::ofstream out(output_miss_ratio_timeline_path);
  442. if (!out.is_open()) {
  443. return;
  444. }
  445. std::string header("time");
  446. for (uint64_t now = start_time; now <= end_time; now++) {
  447. header += ",";
  448. header += std::to_string(now);
  449. }
  450. out << header << std::endl;
  451. for (auto const& label : it.second) {
  452. std::string row(label.first);
  453. for (uint64_t now = start_time; now <= end_time; now++) {
  454. auto misses = label.second.find(now);
  455. row += ",";
  456. if (misses != label.second.end()) {
  457. row += std::to_string(misses->second);
  458. } else {
  459. row += "0";
  460. }
  461. }
  462. out << row << std::endl;
  463. }
  464. out.close();
  465. }
  466. }
  467. void BlockCacheTraceAnalyzer::WriteMissTimeline(uint64_t time_unit) const {
  468. if (!cache_simulator_ || output_dir_.empty()) {
  469. return;
  470. }
  471. std::map<uint64_t, std::map<std::string, std::map<uint64_t, uint64_t>>>
  472. cs_name_timeline;
  473. uint64_t start_time = port::kMaxUint64;
  474. uint64_t end_time = 0;
  475. const std::map<uint64_t, uint64_t>& trace_num_misses =
  476. adjust_time_unit(miss_ratio_stats_.num_misses_timeline(), time_unit);
  477. for (auto const& num_miss : trace_num_misses) {
  478. uint64_t time = num_miss.first;
  479. start_time = std::min(start_time, time);
  480. end_time = std::max(end_time, time);
  481. uint64_t miss = num_miss.second;
  482. cs_name_timeline[port::kMaxUint64]["trace"][time] = miss;
  483. }
  484. for (auto const& config_caches : cache_simulator_->sim_caches()) {
  485. const CacheConfiguration& config = config_caches.first;
  486. std::string cache_label = config.cache_name + "-" +
  487. std::to_string(config.num_shard_bits) + "-" +
  488. std::to_string(config.ghost_cache_capacity);
  489. for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
  490. const std::map<uint64_t, uint64_t>& num_misses = adjust_time_unit(
  491. config_caches.second[i]->miss_ratio_stats().num_misses_timeline(),
  492. time_unit);
  493. for (auto const& num_miss : num_misses) {
  494. uint64_t time = num_miss.first;
  495. start_time = std::min(start_time, time);
  496. end_time = std::max(end_time, time);
  497. uint64_t miss = num_miss.second;
  498. cs_name_timeline[config.cache_capacities[i]][cache_label][time] = miss;
  499. }
  500. }
  501. }
  502. for (auto const& it : cs_name_timeline) {
  503. const std::string output_miss_ratio_timeline_path =
  504. output_dir_ + "/" + std::to_string(it.first) + "_" +
  505. std::to_string(time_unit) + "_" + kFileNameSuffixMissTimeline;
  506. std::ofstream out(output_miss_ratio_timeline_path);
  507. if (!out.is_open()) {
  508. return;
  509. }
  510. std::string header("time");
  511. for (uint64_t now = start_time; now <= end_time; now++) {
  512. header += ",";
  513. header += std::to_string(now);
  514. }
  515. out << header << std::endl;
  516. for (auto const& label : it.second) {
  517. std::string row(label.first);
  518. for (uint64_t now = start_time; now <= end_time; now++) {
  519. auto misses = label.second.find(now);
  520. row += ",";
  521. if (misses != label.second.end()) {
  522. row += std::to_string(misses->second);
  523. } else {
  524. row += "0";
  525. }
  526. }
  527. out << row << std::endl;
  528. }
  529. out.close();
  530. }
  531. }
  532. void BlockCacheTraceAnalyzer::WriteSkewness(
  533. const std::string& label_str, const std::vector<uint64_t>& percent_buckets,
  534. TraceType target_block_type) const {
  535. std::set<std::string> labels = ParseLabelStr(label_str);
  536. std::map<std::string, uint64_t> label_naccesses;
  537. uint64_t total_naccesses = 0;
  538. auto block_callback = [&](const std::string& cf_name, uint64_t fd,
  539. uint32_t level, TraceType type,
  540. const std::string& /*block_key*/, uint64_t block_id,
  541. const BlockAccessInfo& block) {
  542. if (target_block_type != TraceType::kTraceMax &&
  543. target_block_type != type) {
  544. return;
  545. }
  546. const std::string label = BuildLabel(
  547. labels, cf_name, fd, level, type,
  548. TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
  549. label_naccesses[label] += block.num_accesses;
  550. total_naccesses += block.num_accesses;
  551. };
  552. TraverseBlocks(block_callback, &labels);
  553. std::map<std::string, std::map<uint64_t, uint64_t>> label_bucket_naccesses;
  554. std::vector<std::pair<std::string, uint64_t>> pairs;
  555. for (auto const& itr : label_naccesses) {
  556. pairs.push_back(itr);
  557. }
  558. // Sort in descending order.
  559. sort(pairs.begin(), pairs.end(),
  560. [=](const std::pair<std::string, uint64_t>& a,
  561. const std::pair<std::string, uint64_t>& b) {
  562. return b.second < a.second;
  563. });
  564. size_t prev_start_index = 0;
  565. for (auto const& percent : percent_buckets) {
  566. label_bucket_naccesses[label_str][percent] = 0;
  567. size_t end_index = 0;
  568. if (percent == port::kMaxUint64) {
  569. end_index = label_naccesses.size();
  570. } else {
  571. end_index = percent * label_naccesses.size() / 100;
  572. }
  573. for (size_t i = prev_start_index; i < end_index; i++) {
  574. label_bucket_naccesses[label_str][percent] += pairs[i].second;
  575. }
  576. prev_start_index = end_index;
  577. }
  578. std::string filename_suffix;
  579. if (target_block_type != TraceType::kTraceMax) {
  580. filename_suffix = block_type_to_string(target_block_type);
  581. filename_suffix += "_";
  582. }
  583. filename_suffix += kFileNameSuffixSkew;
  584. WriteStatsToFile(label_str, percent_buckets, filename_suffix,
  585. label_bucket_naccesses, total_naccesses);
  586. }
  587. void BlockCacheTraceAnalyzer::WriteCorrelationFeatures(
  588. const std::string& label_str, uint32_t max_number_of_values) const {
  589. std::set<std::string> labels = ParseLabelStr(label_str);
  590. std::map<std::string, Features> label_features;
  591. std::map<std::string, Predictions> label_predictions;
  592. auto block_callback =
  593. [&](const std::string& cf_name, uint64_t fd, uint32_t level,
  594. TraceType block_type, const std::string& /*block_key*/,
  595. uint64_t /*block_key_id*/, const BlockAccessInfo& block) {
  596. if (block.table_id == 0 && labels.find(kGroupbyTable) != labels.end()) {
  597. // We only know table id information for get requests.
  598. return;
  599. }
  600. if (labels.find(kGroupbyCaller) != labels.end()) {
  601. // Group by caller.
  602. for (auto const& caller_map : block.caller_access_timeline) {
  603. const std::string label =
  604. BuildLabel(labels, cf_name, fd, level, block_type,
  605. caller_map.first, /*block_id=*/0, block);
  606. auto it = block.caller_access_sequence__number_timeline.find(
  607. caller_map.first);
  608. assert(it != block.caller_access_sequence__number_timeline.end());
  609. UpdateFeatureVectors(it->second, caller_map.second, label,
  610. &label_features, &label_predictions);
  611. }
  612. return;
  613. }
  614. const std::string label =
  615. BuildLabel(labels, cf_name, fd, level, block_type,
  616. TableReaderCaller::kMaxBlockCacheLookupCaller,
  617. /*block_id=*/0, block);
  618. UpdateFeatureVectors(block.access_sequence_number_timeline,
  619. block.access_timeline, label, &label_features,
  620. &label_predictions);
  621. };
  622. TraverseBlocks(block_callback, &labels);
  623. WriteCorrelationFeaturesToFile(label_str, label_features, label_predictions,
  624. max_number_of_values);
  625. }
  626. void BlockCacheTraceAnalyzer::WriteCorrelationFeaturesToFile(
  627. const std::string& label,
  628. const std::map<std::string, Features>& label_features,
  629. const std::map<std::string, Predictions>& label_predictions,
  630. uint32_t max_number_of_values) const {
  631. std::default_random_engine rand_engine(static_cast<std::default_random_engine::result_type>(env_->NowMicros()));
  632. for (auto const& label_feature_vectors : label_features) {
  633. const Features& past = label_feature_vectors.second;
  634. auto it = label_predictions.find(label_feature_vectors.first);
  635. assert(it != label_predictions.end());
  636. const Predictions& future = it->second;
  637. const std::string output_path = output_dir_ + "/" + label + "_" +
  638. label_feature_vectors.first + "_" +
  639. kFileNameSuffixCorrelation;
  640. std::ofstream out(output_path);
  641. if (!out.is_open()) {
  642. return;
  643. }
  644. std::string header(
  645. "num_accesses_since_last_access,elapsed_time_since_last_access,num_"
  646. "past_accesses,num_accesses_till_next_access,elapsed_time_till_next_"
  647. "access");
  648. out << header << std::endl;
  649. std::vector<uint32_t> indexes;
  650. for (uint32_t i = 0; i < past.num_accesses_since_last_access.size(); i++) {
  651. indexes.push_back(i);
  652. }
  653. std::shuffle(indexes.begin(), indexes.end(), rand_engine);
  654. for (uint32_t i = 0; i < max_number_of_values && i < indexes.size(); i++) {
  655. uint32_t rand_index = indexes[i];
  656. out << std::to_string(past.num_accesses_since_last_access[rand_index])
  657. << ",";
  658. out << std::to_string(past.elapsed_time_since_last_access[rand_index])
  659. << ",";
  660. out << std::to_string(past.num_past_accesses[rand_index]) << ",";
  661. out << std::to_string(future.num_accesses_till_next_access[rand_index])
  662. << ",";
  663. out << std::to_string(future.elapsed_time_till_next_access[rand_index])
  664. << std::endl;
  665. }
  666. out.close();
  667. }
  668. }
  669. void BlockCacheTraceAnalyzer::WriteCorrelationFeaturesForGet(
  670. uint32_t max_number_of_values) const {
  671. std::string label = "GetKeyInfo";
  672. std::map<std::string, Features> label_features;
  673. std::map<std::string, Predictions> label_predictions;
  674. for (auto const& get_info : get_key_info_map_) {
  675. const GetKeyInfo& info = get_info.second;
  676. UpdateFeatureVectors(info.access_sequence_number_timeline,
  677. info.access_timeline, label, &label_features,
  678. &label_predictions);
  679. }
  680. WriteCorrelationFeaturesToFile(label, label_features, label_predictions,
  681. max_number_of_values);
  682. }
  683. std::set<std::string> BlockCacheTraceAnalyzer::ParseLabelStr(
  684. const std::string& label_str) const {
  685. std::stringstream ss(label_str);
  686. std::set<std::string> labels;
  687. // label_str is in the form of "label1_label2_label3", e.g., cf_bt.
  688. while (ss.good()) {
  689. std::string label_name;
  690. getline(ss, label_name, '_');
  691. if (kGroupbyLabels.find(label_name) == kGroupbyLabels.end()) {
  692. // Unknown label name.
  693. fprintf(stderr, "Unknown label name %s, label string %s\n",
  694. label_name.c_str(), label_str.c_str());
  695. return {};
  696. }
  697. labels.insert(label_name);
  698. }
  699. return labels;
  700. }
  701. std::string BlockCacheTraceAnalyzer::BuildLabel(
  702. const std::set<std::string>& labels, const std::string& cf_name,
  703. uint64_t fd, uint32_t level, TraceType type, TableReaderCaller caller,
  704. uint64_t block_key, const BlockAccessInfo& block) const {
  705. std::map<std::string, std::string> label_value_map;
  706. label_value_map[kGroupbyAll] = kGroupbyAll;
  707. label_value_map[kGroupbyLevel] = std::to_string(level);
  708. label_value_map[kGroupbyCaller] = caller_to_string(caller);
  709. label_value_map[kGroupbySSTFile] = std::to_string(fd);
  710. label_value_map[kGroupbyBlockType] = block_type_to_string(type);
  711. label_value_map[kGroupbyColumnFamily] = cf_name;
  712. label_value_map[kGroupbyBlock] = std::to_string(block_key);
  713. label_value_map[kGroupbyTable] = std::to_string(block.table_id);
  714. // Concatenate the label values.
  715. std::string label;
  716. for (auto const& l : labels) {
  717. label += label_value_map[l];
  718. label += "-";
  719. }
  720. if (!label.empty()) {
  721. label.pop_back();
  722. }
  723. return label;
  724. }
  725. void BlockCacheTraceAnalyzer::TraverseBlocks(
  726. std::function<void(const std::string& /*cf_name*/, uint64_t /*fd*/,
  727. uint32_t /*level*/, TraceType /*block_type*/,
  728. const std::string& /*block_key*/,
  729. uint64_t /*block_key_id*/,
  730. const BlockAccessInfo& /*block_access_info*/)>
  731. block_callback,
  732. std::set<std::string>* labels) const {
  733. for (auto const& cf_aggregates : cf_aggregates_map_) {
  734. // Stats per column family.
  735. const std::string& cf_name = cf_aggregates.first;
  736. for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
  737. // Stats per SST file.
  738. const uint64_t fd = file_aggregates.first;
  739. const uint32_t level = file_aggregates.second.level;
  740. for (auto const& block_type_aggregates :
  741. file_aggregates.second.block_type_aggregates_map) {
  742. // Stats per block type.
  743. const TraceType type = block_type_aggregates.first;
  744. for (auto const& block_access_info :
  745. block_type_aggregates.second.block_access_info_map) {
  746. // Stats per block.
  747. if (labels && block_access_info.second.table_id == 0 &&
  748. labels->find(kGroupbyTable) != labels->end()) {
  749. // We only know table id information for get requests.
  750. return;
  751. }
  752. block_callback(cf_name, fd, level, type, block_access_info.first,
  753. block_access_info.second.block_id,
  754. block_access_info.second);
  755. }
  756. }
  757. }
  758. }
  759. }
  760. void BlockCacheTraceAnalyzer::WriteGetSpatialLocality(
  761. const std::string& label_str,
  762. const std::vector<uint64_t>& percent_buckets) const {
  763. std::set<std::string> labels = ParseLabelStr(label_str);
  764. std::map<std::string, std::map<uint64_t, uint64_t>> label_pnrefkeys_nblocks;
  765. std::map<std::string, std::map<uint64_t, uint64_t>> label_pnrefs_nblocks;
  766. std::map<std::string, std::map<uint64_t, uint64_t>> label_pndatasize_nblocks;
  767. uint64_t nblocks = 0;
  768. auto block_callback = [&](const std::string& cf_name, uint64_t fd,
  769. uint32_t level, TraceType /*block_type*/,
  770. const std::string& /*block_key*/,
  771. uint64_t /*block_key_id*/,
  772. const BlockAccessInfo& block) {
  773. if (block.num_keys == 0) {
  774. return;
  775. }
  776. uint64_t naccesses = 0;
  777. for (auto const& key_access : block.key_num_access_map) {
  778. for (auto const& caller_access : key_access.second) {
  779. if (caller_access.first == TableReaderCaller::kUserGet) {
  780. naccesses += caller_access.second;
  781. }
  782. }
  783. }
  784. const std::string label =
  785. BuildLabel(labels, cf_name, fd, level, TraceType::kBlockTraceDataBlock,
  786. TableReaderCaller::kUserGet, /*block_id=*/0, block);
  787. const uint64_t percent_referenced_for_existing_keys =
  788. static_cast<uint64_t>(std::max(
  789. percent(block.key_num_access_map.size(), block.num_keys), 0.0));
  790. const uint64_t percent_accesses_for_existing_keys =
  791. static_cast<uint64_t>(std::max(
  792. percent(block.num_referenced_key_exist_in_block, naccesses), 0.0));
  793. const uint64_t percent_referenced_data_size = static_cast<uint64_t>(
  794. std::max(percent(block.referenced_data_size, block.block_size), 0.0));
  795. if (label_pnrefkeys_nblocks.find(label) == label_pnrefkeys_nblocks.end()) {
  796. for (auto const& percent_bucket : percent_buckets) {
  797. label_pnrefkeys_nblocks[label][percent_bucket] = 0;
  798. label_pnrefs_nblocks[label][percent_bucket] = 0;
  799. label_pndatasize_nblocks[label][percent_bucket] = 0;
  800. }
  801. }
  802. label_pnrefkeys_nblocks[label]
  803. .upper_bound(percent_referenced_for_existing_keys)
  804. ->second += 1;
  805. label_pnrefs_nblocks[label]
  806. .upper_bound(percent_accesses_for_existing_keys)
  807. ->second += 1;
  808. label_pndatasize_nblocks[label]
  809. .upper_bound(percent_referenced_data_size)
  810. ->second += 1;
  811. nblocks += 1;
  812. };
  813. TraverseBlocks(block_callback, &labels);
  814. WriteStatsToFile(label_str, percent_buckets, kFileNameSuffixPercentRefKeys,
  815. label_pnrefkeys_nblocks, nblocks);
  816. WriteStatsToFile(label_str, percent_buckets,
  817. kFileNameSuffixPercentAccessesOnRefKeys,
  818. label_pnrefs_nblocks, nblocks);
  819. WriteStatsToFile(label_str, percent_buckets,
  820. kFileNameSuffixPercentDataSizeOnRefKeys,
  821. label_pndatasize_nblocks, nblocks);
  822. }
  823. void BlockCacheTraceAnalyzer::WriteAccessTimeline(const std::string& label_str,
  824. uint64_t time_unit,
  825. bool user_access_only) const {
  826. std::set<std::string> labels = ParseLabelStr(label_str);
  827. uint64_t start_time = port::kMaxUint64;
  828. uint64_t end_time = 0;
  829. std::map<std::string, std::map<uint64_t, uint64_t>> label_access_timeline;
  830. std::map<uint64_t, std::vector<std::string>> access_count_block_id_map;
  831. auto block_callback = [&](const std::string& cf_name, uint64_t fd,
  832. uint32_t level, TraceType type,
  833. const std::string& /*block_key*/, uint64_t block_id,
  834. const BlockAccessInfo& block) {
  835. uint64_t naccesses = 0;
  836. for (auto const& timeline : block.caller_num_accesses_timeline) {
  837. const TableReaderCaller caller = timeline.first;
  838. if (user_access_only && !is_user_access(caller)) {
  839. continue;
  840. }
  841. const std::string label =
  842. BuildLabel(labels, cf_name, fd, level, type, caller, block_id, block);
  843. for (auto const& naccess : timeline.second) {
  844. const uint64_t timestamp = naccess.first / time_unit;
  845. const uint64_t num = naccess.second;
  846. label_access_timeline[label][timestamp] += num;
  847. start_time = std::min(start_time, timestamp);
  848. end_time = std::max(end_time, timestamp);
  849. naccesses += num;
  850. }
  851. }
  852. if (naccesses > 0) {
  853. access_count_block_id_map[naccesses].push_back(std::to_string(block_id));
  854. }
  855. };
  856. TraverseBlocks(block_callback, &labels);
  857. // We have label_access_timeline now. Write them into a file.
  858. const std::string user_access_prefix =
  859. user_access_only ? "user_access_only_" : "all_access_";
  860. const std::string output_path = output_dir_ + "/" + user_access_prefix +
  861. label_str + "_" + std::to_string(time_unit) +
  862. "_" + kFileNameSuffixAccessTimeline;
  863. std::ofstream out(output_path);
  864. if (!out.is_open()) {
  865. return;
  866. }
  867. std::string header("time");
  868. if (labels.find("block") != labels.end()) {
  869. for (uint64_t now = start_time; now <= end_time; now++) {
  870. header += ",";
  871. header += std::to_string(now);
  872. }
  873. out << header << std::endl;
  874. // Write the most frequently accessed blocks first.
  875. for (auto naccess_it = access_count_block_id_map.rbegin();
  876. naccess_it != access_count_block_id_map.rend(); naccess_it++) {
  877. for (auto& block_id_it : naccess_it->second) {
  878. std::string row(block_id_it);
  879. for (uint64_t now = start_time; now <= end_time; now++) {
  880. auto it = label_access_timeline[block_id_it].find(now);
  881. row += ",";
  882. if (it != label_access_timeline[block_id_it].end()) {
  883. row += std::to_string(it->second);
  884. } else {
  885. row += "0";
  886. }
  887. }
  888. out << row << std::endl;
  889. }
  890. }
  891. out.close();
  892. return;
  893. }
  894. for (uint64_t now = start_time; now <= end_time; now++) {
  895. header += ",";
  896. header += std::to_string(now);
  897. }
  898. out << header << std::endl;
  899. for (auto const& label : label_access_timeline) {
  900. std::string row(label.first);
  901. for (uint64_t now = start_time; now <= end_time; now++) {
  902. auto it = label.second.find(now);
  903. row += ",";
  904. if (it != label.second.end()) {
  905. row += std::to_string(it->second);
  906. } else {
  907. row += "0";
  908. }
  909. }
  910. out << row << std::endl;
  911. }
  912. out.close();
  913. }
  914. void BlockCacheTraceAnalyzer::WriteReuseDistance(
  915. const std::string& label_str,
  916. const std::vector<uint64_t>& distance_buckets) const {
  917. std::set<std::string> labels = ParseLabelStr(label_str);
  918. std::map<std::string, std::map<uint64_t, uint64_t>> label_distance_num_reuses;
  919. uint64_t total_num_reuses = 0;
  920. auto block_callback = [&](const std::string& cf_name, uint64_t fd,
  921. uint32_t level, TraceType type,
  922. const std::string& /*block_key*/, uint64_t block_id,
  923. const BlockAccessInfo& block) {
  924. const std::string label = BuildLabel(
  925. labels, cf_name, fd, level, type,
  926. TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
  927. if (label_distance_num_reuses.find(label) ==
  928. label_distance_num_reuses.end()) {
  929. // The first time we encounter this label.
  930. for (auto const& distance_bucket : distance_buckets) {
  931. label_distance_num_reuses[label][distance_bucket] = 0;
  932. }
  933. }
  934. for (auto const& reuse_distance : block.reuse_distance_count) {
  935. label_distance_num_reuses[label]
  936. .upper_bound(reuse_distance.first)
  937. ->second += reuse_distance.second;
  938. total_num_reuses += reuse_distance.second;
  939. }
  940. };
  941. TraverseBlocks(block_callback, &labels);
  942. // We have label_naccesses and label_distance_num_reuses now. Write them into
  943. // a file.
  944. const std::string output_path =
  945. output_dir_ + "/" + label_str + "_reuse_distance";
  946. std::ofstream out(output_path);
  947. if (!out.is_open()) {
  948. return;
  949. }
  950. std::string header("bucket");
  951. for (auto const& label_it : label_distance_num_reuses) {
  952. header += ",";
  953. header += label_it.first;
  954. }
  955. out << header << std::endl;
  956. for (auto const& bucket : distance_buckets) {
  957. std::string row(std::to_string(bucket));
  958. for (auto const& label_it : label_distance_num_reuses) {
  959. auto const& it = label_it.second.find(bucket);
  960. assert(it != label_it.second.end());
  961. row += ",";
  962. row += std::to_string(percent(it->second, total_num_reuses));
  963. }
  964. out << row << std::endl;
  965. }
  966. out.close();
  967. }
  968. void BlockCacheTraceAnalyzer::UpdateReuseIntervalStats(
  969. const std::string& label, const std::vector<uint64_t>& time_buckets,
  970. const std::map<uint64_t, uint64_t> timeline,
  971. std::map<std::string, std::map<uint64_t, uint64_t>>* label_time_num_reuses,
  972. uint64_t* total_num_reuses) const {
  973. assert(label_time_num_reuses);
  974. assert(total_num_reuses);
  975. if (label_time_num_reuses->find(label) == label_time_num_reuses->end()) {
  976. // The first time we encounter this label.
  977. for (auto const& time_bucket : time_buckets) {
  978. (*label_time_num_reuses)[label][time_bucket] = 0;
  979. }
  980. }
  981. auto it = timeline.begin();
  982. uint64_t prev_timestamp = it->first;
  983. const uint64_t prev_num = it->second;
  984. it++;
  985. // Reused within one second.
  986. if (prev_num > 1) {
  987. (*label_time_num_reuses)[label].upper_bound(0)->second += prev_num - 1;
  988. *total_num_reuses += prev_num - 1;
  989. }
  990. while (it != timeline.end()) {
  991. const uint64_t timestamp = it->first;
  992. const uint64_t num = it->second;
  993. const uint64_t reuse_interval = timestamp - prev_timestamp;
  994. (*label_time_num_reuses)[label].upper_bound(reuse_interval)->second += 1;
  995. if (num > 1) {
  996. (*label_time_num_reuses)[label].upper_bound(0)->second += num - 1;
  997. }
  998. prev_timestamp = timestamp;
  999. *total_num_reuses += num;
  1000. it++;
  1001. }
  1002. }
  1003. void BlockCacheTraceAnalyzer::WriteStatsToFile(
  1004. const std::string& label_str, const std::vector<uint64_t>& time_buckets,
  1005. const std::string& filename_suffix,
  1006. const std::map<std::string, std::map<uint64_t, uint64_t>>& label_data,
  1007. uint64_t ntotal) const {
  1008. const std::string output_path =
  1009. output_dir_ + "/" + label_str + "_" + filename_suffix;
  1010. std::ofstream out(output_path);
  1011. if (!out.is_open()) {
  1012. return;
  1013. }
  1014. std::string header("bucket");
  1015. for (auto const& label_it : label_data) {
  1016. header += ",";
  1017. header += label_it.first;
  1018. }
  1019. out << header << std::endl;
  1020. for (auto const& bucket : time_buckets) {
  1021. std::string row(std::to_string(bucket));
  1022. for (auto const& label_it : label_data) {
  1023. auto const& it = label_it.second.find(bucket);
  1024. assert(it != label_it.second.end());
  1025. row += ",";
  1026. row += std::to_string(percent(it->second, ntotal));
  1027. }
  1028. out << row << std::endl;
  1029. }
  1030. out.close();
  1031. }
  1032. void BlockCacheTraceAnalyzer::WriteReuseInterval(
  1033. const std::string& label_str,
  1034. const std::vector<uint64_t>& time_buckets) const {
  1035. std::set<std::string> labels = ParseLabelStr(label_str);
  1036. std::map<std::string, std::map<uint64_t, uint64_t>> label_time_num_reuses;
  1037. std::map<std::string, std::map<uint64_t, uint64_t>> label_avg_reuse_nblocks;
  1038. std::map<std::string, std::map<uint64_t, uint64_t>> label_avg_reuse_naccesses;
  1039. uint64_t total_num_reuses = 0;
  1040. uint64_t total_nblocks = 0;
  1041. uint64_t total_accesses = 0;
  1042. auto block_callback = [&](const std::string& cf_name, uint64_t fd,
  1043. uint32_t level, TraceType type,
  1044. const std::string& /*block_key*/, uint64_t block_id,
  1045. const BlockAccessInfo& block) {
  1046. total_nblocks++;
  1047. total_accesses += block.num_accesses;
  1048. uint64_t avg_reuse_interval = 0;
  1049. if (block.num_accesses > 1) {
  1050. avg_reuse_interval = ((block.last_access_time - block.first_access_time) /
  1051. kMicrosInSecond) /
  1052. block.num_accesses;
  1053. } else {
  1054. avg_reuse_interval = port::kMaxUint64 - 1;
  1055. }
  1056. if (labels.find(kGroupbyCaller) != labels.end()) {
  1057. for (auto const& timeline : block.caller_num_accesses_timeline) {
  1058. const TableReaderCaller caller = timeline.first;
  1059. const std::string label = BuildLabel(labels, cf_name, fd, level, type,
  1060. caller, block_id, block);
  1061. UpdateReuseIntervalStats(label, time_buckets, timeline.second,
  1062. &label_time_num_reuses, &total_num_reuses);
  1063. }
  1064. return;
  1065. }
  1066. // Does not group by caller so we need to flatten the access timeline.
  1067. const std::string label = BuildLabel(
  1068. labels, cf_name, fd, level, type,
  1069. TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
  1070. std::map<uint64_t, uint64_t> timeline;
  1071. for (auto const& caller_timeline : block.caller_num_accesses_timeline) {
  1072. for (auto const& time_naccess : caller_timeline.second) {
  1073. timeline[time_naccess.first] += time_naccess.second;
  1074. }
  1075. }
  1076. UpdateReuseIntervalStats(label, time_buckets, timeline,
  1077. &label_time_num_reuses, &total_num_reuses);
  1078. if (label_avg_reuse_nblocks.find(label) == label_avg_reuse_nblocks.end()) {
  1079. for (auto const& time_bucket : time_buckets) {
  1080. label_avg_reuse_nblocks[label][time_bucket] = 0;
  1081. label_avg_reuse_naccesses[label][time_bucket] = 0;
  1082. }
  1083. }
  1084. label_avg_reuse_nblocks[label].upper_bound(avg_reuse_interval)->second += 1;
  1085. label_avg_reuse_naccesses[label].upper_bound(avg_reuse_interval)->second +=
  1086. block.num_accesses;
  1087. };
  1088. TraverseBlocks(block_callback, &labels);
  1089. // Write the stats into files.
  1090. WriteStatsToFile(label_str, time_buckets, kFileNameSuffixReuseInterval,
  1091. label_time_num_reuses, total_num_reuses);
  1092. WriteStatsToFile(label_str, time_buckets, kFileNameSuffixAvgReuseInterval,
  1093. label_avg_reuse_nblocks, total_nblocks);
  1094. WriteStatsToFile(label_str, time_buckets,
  1095. kFileNameSuffixAvgReuseIntervalNaccesses,
  1096. label_avg_reuse_naccesses, total_accesses);
  1097. }
  1098. void BlockCacheTraceAnalyzer::WriteReuseLifetime(
  1099. const std::string& label_str,
  1100. const std::vector<uint64_t>& time_buckets) const {
  1101. std::set<std::string> labels = ParseLabelStr(label_str);
  1102. std::map<std::string, std::map<uint64_t, uint64_t>> label_lifetime_nblocks;
  1103. uint64_t total_nblocks = 0;
  1104. auto block_callback = [&](const std::string& cf_name, uint64_t fd,
  1105. uint32_t level, TraceType type,
  1106. const std::string& /*block_key*/, uint64_t block_id,
  1107. const BlockAccessInfo& block) {
  1108. uint64_t lifetime = 0;
  1109. if (block.num_accesses > 1) {
  1110. lifetime =
  1111. (block.last_access_time - block.first_access_time) / kMicrosInSecond;
  1112. } else {
  1113. lifetime = port::kMaxUint64 - 1;
  1114. }
  1115. const std::string label = BuildLabel(
  1116. labels, cf_name, fd, level, type,
  1117. TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
  1118. if (label_lifetime_nblocks.find(label) == label_lifetime_nblocks.end()) {
  1119. // The first time we encounter this label.
  1120. for (auto const& time_bucket : time_buckets) {
  1121. label_lifetime_nblocks[label][time_bucket] = 0;
  1122. }
  1123. }
  1124. label_lifetime_nblocks[label].upper_bound(lifetime)->second += 1;
  1125. total_nblocks += 1;
  1126. };
  1127. TraverseBlocks(block_callback, &labels);
  1128. WriteStatsToFile(label_str, time_buckets, kFileNameSuffixReuseLifetime,
  1129. label_lifetime_nblocks, total_nblocks);
  1130. }
  1131. void BlockCacheTraceAnalyzer::WriteBlockReuseTimeline(
  1132. const uint64_t reuse_window, bool user_access_only, TraceType block_type) const {
  1133. // A map from block key to an array of bools that states whether a block is
  1134. // accessed in a time window.
  1135. std::map<uint64_t, std::vector<bool>> block_accessed;
  1136. const uint64_t trace_duration =
  1137. trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
  1138. const uint64_t reuse_vector_size = (trace_duration / reuse_window);
  1139. if (reuse_vector_size < 2) {
  1140. // The reuse window is less than 2. We cannot calculate the reused
  1141. // percentage of blocks.
  1142. return;
  1143. }
  1144. auto block_callback = [&](const std::string& /*cf_name*/, uint64_t /*fd*/,
  1145. uint32_t /*level*/, TraceType /*type*/,
  1146. const std::string& /*block_key*/, uint64_t block_id,
  1147. const BlockAccessInfo& block) {
  1148. if (block_accessed.find(block_id) == block_accessed.end()) {
  1149. block_accessed[block_id].resize(reuse_vector_size);
  1150. for (uint64_t i = 0; i < reuse_vector_size; i++) {
  1151. block_accessed[block_id][i] = false;
  1152. }
  1153. }
  1154. for (auto const& caller_num : block.caller_num_accesses_timeline) {
  1155. const TableReaderCaller caller = caller_num.first;
  1156. for (auto const& timeline : caller_num.second) {
  1157. const uint64_t timestamp = timeline.first;
  1158. const uint64_t elapsed_time =
  1159. timestamp - trace_start_timestamp_in_seconds_;
  1160. if (!user_access_only || is_user_access(caller)) {
  1161. uint64_t index =
  1162. std::min(elapsed_time / reuse_window, reuse_vector_size - 1);
  1163. block_accessed[block_id][index] = true;
  1164. }
  1165. }
  1166. }
  1167. };
  1168. TraverseBlocks(block_callback);
  1169. // A cell is the number of blocks accessed in a reuse window.
  1170. std::unique_ptr<uint64_t[]> reuse_table(new uint64_t[reuse_vector_size * reuse_vector_size]);
  1171. for (uint64_t start_time = 0; start_time < reuse_vector_size; start_time++) {
  1172. // Initialize the reuse_table.
  1173. for (uint64_t i = 0; i < reuse_vector_size; i++) {
  1174. reuse_table[start_time * reuse_vector_size + i] = 0;
  1175. }
  1176. // Examine all blocks.
  1177. for (auto const& block : block_accessed) {
  1178. for (uint64_t i = start_time; i < reuse_vector_size; i++) {
  1179. if (block.second[start_time] && block.second[i]) {
  1180. // This block is accessed at start time and at the current time. We
  1181. // increment reuse_table[start_time][i] since it is reused at the ith
  1182. // window.
  1183. reuse_table[start_time * reuse_vector_size + i]++;
  1184. }
  1185. }
  1186. }
  1187. }
  1188. const std::string user_access_prefix =
  1189. user_access_only ? "_user_access_only_" : "_all_access_";
  1190. const std::string output_path =
  1191. output_dir_ + "/" + block_type_to_string(block_type) +
  1192. user_access_prefix + std::to_string(reuse_window) + "_" +
  1193. kFileNameSuffixAccessReuseBlocksTimeline;
  1194. std::ofstream out(output_path);
  1195. if (!out.is_open()) {
  1196. return;
  1197. }
  1198. std::string header("start_time");
  1199. for (uint64_t start_time = 0; start_time < reuse_vector_size; start_time++) {
  1200. header += ",";
  1201. header += std::to_string(start_time);
  1202. }
  1203. out << header << std::endl;
  1204. for (uint64_t start_time = 0; start_time < reuse_vector_size; start_time++) {
  1205. std::string row(std::to_string(start_time * reuse_window));
  1206. for (uint64_t j = 0; j < reuse_vector_size; j++) {
  1207. row += ",";
  1208. if (j < start_time) {
  1209. row += "100.0";
  1210. } else {
  1211. row += std::to_string(percent(reuse_table[start_time * reuse_vector_size + j],
  1212. reuse_table[start_time * reuse_vector_size + start_time]));
  1213. }
  1214. }
  1215. out << row << std::endl;
  1216. }
  1217. out.close();
  1218. }
  1219. std::string BlockCacheTraceAnalyzer::OutputPercentAccessStats(
  1220. uint64_t total_accesses,
  1221. const std::map<std::string, uint64_t>& cf_access_count) const {
  1222. std::string row;
  1223. for (auto const& cf_aggregates : cf_aggregates_map_) {
  1224. const std::string& cf_name = cf_aggregates.first;
  1225. const auto& naccess = cf_access_count.find(cf_name);
  1226. row += ",";
  1227. if (naccess != cf_access_count.end()) {
  1228. row += std::to_string(percent(naccess->second, total_accesses));
  1229. } else {
  1230. row += "0";
  1231. }
  1232. }
  1233. return row;
  1234. }
  1235. void BlockCacheTraceAnalyzer::WritePercentAccessSummaryStats() const {
  1236. std::map<TableReaderCaller, std::map<std::string, uint64_t>>
  1237. caller_cf_accesses;
  1238. uint64_t total_accesses = 0;
  1239. auto block_callback =
  1240. [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
  1241. TraceType /*type*/, const std::string& /*block_key*/,
  1242. uint64_t /*block_id*/, const BlockAccessInfo& block) {
  1243. for (auto const& caller_num : block.caller_num_access_map) {
  1244. const TableReaderCaller caller = caller_num.first;
  1245. const uint64_t naccess = caller_num.second;
  1246. caller_cf_accesses[caller][cf_name] += naccess;
  1247. total_accesses += naccess;
  1248. }
  1249. };
  1250. TraverseBlocks(block_callback);
  1251. const std::string output_path =
  1252. output_dir_ + "/" + kFileNameSuffixPercentOfAccessSummary;
  1253. std::ofstream out(output_path);
  1254. if (!out.is_open()) {
  1255. return;
  1256. }
  1257. std::string header("caller");
  1258. for (auto const& cf_name : cf_aggregates_map_) {
  1259. header += ",";
  1260. header += cf_name.first;
  1261. }
  1262. out << header << std::endl;
  1263. for (auto const& cf_naccess_it : caller_cf_accesses) {
  1264. const TableReaderCaller caller = cf_naccess_it.first;
  1265. std::string row;
  1266. row += caller_to_string(caller);
  1267. row += OutputPercentAccessStats(total_accesses, cf_naccess_it.second);
  1268. out << row << std::endl;
  1269. }
  1270. out.close();
  1271. }
  1272. void BlockCacheTraceAnalyzer::WriteDetailedPercentAccessSummaryStats(
  1273. TableReaderCaller analyzing_caller) const {
  1274. std::map<uint32_t, std::map<std::string, uint64_t>> level_cf_accesses;
  1275. std::map<TraceType, std::map<std::string, uint64_t>> bt_cf_accesses;
  1276. uint64_t total_accesses = 0;
  1277. auto block_callback =
  1278. [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t level,
  1279. TraceType type, const std::string& /*block_key*/,
  1280. uint64_t /*block_id*/, const BlockAccessInfo& block) {
  1281. for (auto const& caller_num : block.caller_num_access_map) {
  1282. const TableReaderCaller caller = caller_num.first;
  1283. if (caller == analyzing_caller) {
  1284. const uint64_t naccess = caller_num.second;
  1285. level_cf_accesses[level][cf_name] += naccess;
  1286. bt_cf_accesses[type][cf_name] += naccess;
  1287. total_accesses += naccess;
  1288. }
  1289. }
  1290. };
  1291. TraverseBlocks(block_callback);
  1292. {
  1293. const std::string output_path =
  1294. output_dir_ + "/" + caller_to_string(analyzing_caller) + "_level_" +
  1295. kFileNameSuffixPercentOfAccessSummary;
  1296. std::ofstream out(output_path);
  1297. if (!out.is_open()) {
  1298. return;
  1299. }
  1300. std::string header("level");
  1301. for (auto const& cf_name : cf_aggregates_map_) {
  1302. header += ",";
  1303. header += cf_name.first;
  1304. }
  1305. out << header << std::endl;
  1306. for (auto const& level_naccess_it : level_cf_accesses) {
  1307. const uint32_t level = level_naccess_it.first;
  1308. std::string row;
  1309. row += std::to_string(level);
  1310. row += OutputPercentAccessStats(total_accesses, level_naccess_it.second);
  1311. out << row << std::endl;
  1312. }
  1313. out.close();
  1314. }
  1315. {
  1316. const std::string output_path =
  1317. output_dir_ + "/" + caller_to_string(analyzing_caller) + "_bt_" +
  1318. kFileNameSuffixPercentOfAccessSummary;
  1319. std::ofstream out(output_path);
  1320. if (!out.is_open()) {
  1321. return;
  1322. }
  1323. std::string header("bt");
  1324. for (auto const& cf_name : cf_aggregates_map_) {
  1325. header += ",";
  1326. header += cf_name.first;
  1327. }
  1328. out << header << std::endl;
  1329. for (auto const& bt_naccess_it : bt_cf_accesses) {
  1330. const TraceType bt = bt_naccess_it.first;
  1331. std::string row;
  1332. row += block_type_to_string(bt);
  1333. row += OutputPercentAccessStats(total_accesses, bt_naccess_it.second);
  1334. out << row << std::endl;
  1335. }
  1336. out.close();
  1337. }
  1338. }
  1339. void BlockCacheTraceAnalyzer::WriteAccessCountSummaryStats(
  1340. const std::vector<uint64_t>& access_count_buckets,
  1341. bool user_access_only) const {
  1342. // x: buckets.
  1343. // y: # of accesses.
  1344. std::map<std::string, std::map<uint64_t, uint64_t>> bt_access_nblocks;
  1345. std::map<std::string, std::map<uint64_t, uint64_t>> cf_access_nblocks;
  1346. uint64_t total_nblocks = 0;
  1347. auto block_callback =
  1348. [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
  1349. TraceType type, const std::string& /*block_key*/,
  1350. uint64_t /*block_id*/, const BlockAccessInfo& block) {
  1351. const std::string type_str = block_type_to_string(type);
  1352. if (cf_access_nblocks.find(cf_name) == cf_access_nblocks.end()) {
  1353. // initialize.
  1354. for (auto& access : access_count_buckets) {
  1355. cf_access_nblocks[cf_name][access] = 0;
  1356. }
  1357. }
  1358. if (bt_access_nblocks.find(type_str) == bt_access_nblocks.end()) {
  1359. // initialize.
  1360. for (auto& access : access_count_buckets) {
  1361. bt_access_nblocks[type_str][access] = 0;
  1362. }
  1363. }
  1364. uint64_t naccesses = 0;
  1365. for (auto const& caller_access : block.caller_num_access_map) {
  1366. if (!user_access_only || is_user_access(caller_access.first)) {
  1367. naccesses += caller_access.second;
  1368. }
  1369. }
  1370. if (naccesses == 0) {
  1371. return;
  1372. }
  1373. total_nblocks += 1;
  1374. bt_access_nblocks[type_str].upper_bound(naccesses)->second += 1;
  1375. cf_access_nblocks[cf_name].upper_bound(naccesses)->second += 1;
  1376. };
  1377. TraverseBlocks(block_callback);
  1378. const std::string user_access_prefix =
  1379. user_access_only ? "user_access_only_" : "all_access_";
  1380. WriteStatsToFile("cf", access_count_buckets,
  1381. user_access_prefix + kFileNameSuffixAccessCountSummary,
  1382. cf_access_nblocks, total_nblocks);
  1383. WriteStatsToFile("bt", access_count_buckets,
  1384. user_access_prefix + kFileNameSuffixAccessCountSummary,
  1385. bt_access_nblocks, total_nblocks);
  1386. }
  1387. BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer(
  1388. const std::string& trace_file_path, const std::string& output_dir,
  1389. const std::string& human_readable_trace_file_path,
  1390. bool compute_reuse_distance, bool mrc_only,
  1391. bool is_human_readable_trace_file,
  1392. std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator)
  1393. : env_(ROCKSDB_NAMESPACE::Env::Default()),
  1394. trace_file_path_(trace_file_path),
  1395. output_dir_(output_dir),
  1396. human_readable_trace_file_path_(human_readable_trace_file_path),
  1397. compute_reuse_distance_(compute_reuse_distance),
  1398. mrc_only_(mrc_only),
  1399. is_human_readable_trace_file_(is_human_readable_trace_file),
  1400. cache_simulator_(std::move(cache_simulator)) {}
  1401. void BlockCacheTraceAnalyzer::ComputeReuseDistance(
  1402. BlockAccessInfo* info) const {
  1403. assert(info);
  1404. if (info->num_accesses == 0) {
  1405. return;
  1406. }
  1407. uint64_t reuse_distance = 0;
  1408. for (auto const& block_key : info->unique_blocks_since_last_access) {
  1409. auto const& it = block_info_map_.find(block_key);
  1410. // This block must exist.
  1411. assert(it != block_info_map_.end());
  1412. reuse_distance += it->second->block_size;
  1413. }
  1414. info->reuse_distance_count[reuse_distance] += 1;
  1415. // We clear this hash set since this is the second access on this block.
  1416. info->unique_blocks_since_last_access.clear();
  1417. }
  1418. Status BlockCacheTraceAnalyzer::RecordAccess(
  1419. const BlockCacheTraceRecord& access) {
  1420. ColumnFamilyAccessInfoAggregate& cf_aggr = cf_aggregates_map_[access.cf_name];
  1421. SSTFileAccessInfoAggregate& file_aggr =
  1422. cf_aggr.fd_aggregates_map[access.sst_fd_number];
  1423. file_aggr.level = access.level;
  1424. BlockTypeAccessInfoAggregate& block_type_aggr =
  1425. file_aggr.block_type_aggregates_map[access.block_type];
  1426. if (block_type_aggr.block_access_info_map.find(access.block_key) ==
  1427. block_type_aggr.block_access_info_map.end()) {
  1428. block_type_aggr.block_access_info_map[access.block_key].block_id =
  1429. unique_block_id_;
  1430. unique_block_id_++;
  1431. }
  1432. BlockAccessInfo& block_access_info =
  1433. block_type_aggr.block_access_info_map[access.block_key];
  1434. if (compute_reuse_distance_) {
  1435. ComputeReuseDistance(&block_access_info);
  1436. }
  1437. block_access_info.AddAccess(access, access_sequence_number_);
  1438. block_info_map_[access.block_key] = &block_access_info;
  1439. uint64_t get_key_id = 0;
  1440. if (access.caller == TableReaderCaller::kUserGet &&
  1441. access.get_id != BlockCacheTraceHelper::kReservedGetId) {
  1442. std::string user_key = ExtractUserKey(access.referenced_key).ToString();
  1443. if (get_key_info_map_.find(user_key) == get_key_info_map_.end()) {
  1444. get_key_info_map_[user_key].key_id = unique_get_key_id_;
  1445. unique_get_key_id_++;
  1446. }
  1447. get_key_id = get_key_info_map_[user_key].key_id;
  1448. get_key_info_map_[user_key].AddAccess(access, access_sequence_number_);
  1449. }
  1450. if (compute_reuse_distance_) {
  1451. // Add this block to all existing blocks.
  1452. for (auto& cf_aggregates : cf_aggregates_map_) {
  1453. for (auto& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
  1454. for (auto& block_type_aggregates :
  1455. file_aggregates.second.block_type_aggregates_map) {
  1456. for (auto& existing_block :
  1457. block_type_aggregates.second.block_access_info_map) {
  1458. existing_block.second.unique_blocks_since_last_access.insert(
  1459. access.block_key);
  1460. }
  1461. }
  1462. }
  1463. }
  1464. }
  1465. return human_readable_trace_writer_.WriteHumanReadableTraceRecord(
  1466. access, block_access_info.block_id, get_key_id);
  1467. }
  1468. Status BlockCacheTraceAnalyzer::Analyze() {
  1469. std::unique_ptr<BlockCacheTraceReader> reader;
  1470. Status s = Status::OK();
  1471. if (is_human_readable_trace_file_) {
  1472. reader.reset(new BlockCacheHumanReadableTraceReader(trace_file_path_));
  1473. } else {
  1474. std::unique_ptr<TraceReader> trace_reader;
  1475. s = NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
  1476. if (!s.ok()) {
  1477. return s;
  1478. }
  1479. reader.reset(new BlockCacheTraceReader(std::move(trace_reader)));
  1480. s = reader->ReadHeader(&header_);
  1481. if (!s.ok()) {
  1482. return s;
  1483. }
  1484. }
  1485. if (!human_readable_trace_file_path_.empty()) {
  1486. s = human_readable_trace_writer_.NewWritableFile(
  1487. human_readable_trace_file_path_, env_);
  1488. if (!s.ok()) {
  1489. return s;
  1490. }
  1491. }
  1492. uint64_t start = env_->NowMicros();
  1493. uint64_t time_interval = 0;
  1494. while (s.ok()) {
  1495. BlockCacheTraceRecord access;
  1496. s = reader->ReadAccess(&access);
  1497. if (!s.ok()) {
  1498. break;
  1499. }
  1500. if (!mrc_only_) {
  1501. s = RecordAccess(access);
  1502. if (!s.ok()) {
  1503. break;
  1504. }
  1505. }
  1506. if (trace_start_timestamp_in_seconds_ == 0) {
  1507. trace_start_timestamp_in_seconds_ =
  1508. access.access_timestamp / kMicrosInSecond;
  1509. }
  1510. trace_end_timestamp_in_seconds_ = access.access_timestamp / kMicrosInSecond;
  1511. miss_ratio_stats_.UpdateMetrics(access.access_timestamp,
  1512. is_user_access(access.caller),
  1513. access.is_cache_hit == Boolean::kFalse);
  1514. if (cache_simulator_) {
  1515. cache_simulator_->Access(access);
  1516. }
  1517. access_sequence_number_++;
  1518. uint64_t now = env_->NowMicros();
  1519. uint64_t duration = (now - start) / kMicrosInSecond;
  1520. if (duration > 10 * time_interval) {
  1521. uint64_t trace_duration =
  1522. trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
  1523. fprintf(stdout,
  1524. "Running for %" PRIu64 " seconds: Processed %" PRIu64
  1525. " records/second. Trace duration %" PRIu64
  1526. " seconds. Observed miss ratio %.2f\n",
  1527. duration, duration > 0 ? access_sequence_number_ / duration : 0,
  1528. trace_duration, miss_ratio_stats_.miss_ratio());
  1529. time_interval++;
  1530. }
  1531. }
  1532. uint64_t now = env_->NowMicros();
  1533. uint64_t duration = (now - start) / kMicrosInSecond;
  1534. uint64_t trace_duration =
  1535. trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
  1536. fprintf(stdout,
  1537. "Running for %" PRIu64 " seconds: Processed %" PRIu64
  1538. " records/second. Trace duration %" PRIu64
  1539. " seconds. Observed miss ratio %.2f\n",
  1540. duration, duration > 0 ? access_sequence_number_ / duration : 0,
  1541. trace_duration, miss_ratio_stats_.miss_ratio());
  1542. return s;
  1543. }
  1544. void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
  1545. HistogramStat bs_stats;
  1546. std::map<TraceType, HistogramStat> bt_stats_map;
  1547. std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
  1548. auto block_callback =
  1549. [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
  1550. TraceType type, const std::string& /*block_key*/,
  1551. uint64_t /*block_id*/, const BlockAccessInfo& block) {
  1552. if (block.block_size == 0) {
  1553. // Block size may be 0 when 1) compaction observes a cache miss and
  1554. // does not insert the missing block into the cache again. 2)
  1555. // fetching filter blocks in SST files at the last level.
  1556. return;
  1557. }
  1558. bs_stats.Add(block.block_size);
  1559. bt_stats_map[type].Add(block.block_size);
  1560. cf_bt_stats_map[cf_name][type].Add(block.block_size);
  1561. };
  1562. TraverseBlocks(block_callback);
  1563. fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str());
  1564. for (auto const& bt_stats : bt_stats_map) {
  1565. print_break_lines(/*num_break_lines=*/1);
  1566. fprintf(stdout, "Block size stats for block type %s: \n%s",
  1567. block_type_to_string(bt_stats.first).c_str(),
  1568. bt_stats.second.ToString().c_str());
  1569. }
  1570. for (auto const& cf_bt_stats : cf_bt_stats_map) {
  1571. const std::string& cf_name = cf_bt_stats.first;
  1572. for (auto const& bt_stats : cf_bt_stats.second) {
  1573. print_break_lines(/*num_break_lines=*/1);
  1574. fprintf(stdout,
  1575. "Block size stats for column family %s and block type %s: \n%s",
  1576. cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
  1577. bt_stats.second.ToString().c_str());
  1578. }
  1579. }
  1580. }
  1581. void BlockCacheTraceAnalyzer::PrintAccessCountStats(bool user_access_only,
  1582. uint32_t bottom_k,
  1583. uint32_t top_k) const {
  1584. HistogramStat access_stats;
  1585. std::map<TraceType, HistogramStat> bt_stats_map;
  1586. std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
  1587. std::map<uint64_t, std::vector<std::string>> access_count_blocks;
  1588. auto block_callback = [&](const std::string& cf_name, uint64_t /*fd*/,
  1589. uint32_t /*level*/, TraceType type,
  1590. const std::string& block_key, uint64_t /*block_id*/,
  1591. const BlockAccessInfo& block) {
  1592. uint64_t naccesses = 0;
  1593. for (auto const& caller_access : block.caller_num_access_map) {
  1594. if (!user_access_only || is_user_access(caller_access.first)) {
  1595. naccesses += caller_access.second;
  1596. }
  1597. }
  1598. if (naccesses == 0) {
  1599. return;
  1600. }
  1601. if (type == TraceType::kBlockTraceDataBlock) {
  1602. access_count_blocks[naccesses].push_back(block_key);
  1603. }
  1604. access_stats.Add(naccesses);
  1605. bt_stats_map[type].Add(naccesses);
  1606. cf_bt_stats_map[cf_name][type].Add(naccesses);
  1607. };
  1608. TraverseBlocks(block_callback);
  1609. fprintf(stdout,
  1610. "Block access count stats: The number of accesses per block. %s\n%s",
  1611. user_access_only ? "User accesses only" : "All accesses",
  1612. access_stats.ToString().c_str());
  1613. uint32_t bottom_k_index = 0;
  1614. for (auto naccess_it = access_count_blocks.begin();
  1615. naccess_it != access_count_blocks.end(); naccess_it++) {
  1616. bottom_k_index++;
  1617. if (bottom_k_index >= bottom_k) {
  1618. break;
  1619. }
  1620. std::map<TableReaderCaller, uint64_t> caller_naccesses;
  1621. uint64_t naccesses = 0;
  1622. for (auto const& block_id : naccess_it->second) {
  1623. BlockAccessInfo* block = block_info_map_.find(block_id)->second;
  1624. for (auto const& caller_access : block->caller_num_access_map) {
  1625. if (!user_access_only || is_user_access(caller_access.first)) {
  1626. caller_naccesses[caller_access.first] += caller_access.second;
  1627. naccesses += caller_access.second;
  1628. }
  1629. }
  1630. }
  1631. std::string statistics("Caller:");
  1632. for (auto const& caller_naccessess_it : caller_naccesses) {
  1633. statistics += caller_to_string(caller_naccessess_it.first);
  1634. statistics += ":";
  1635. statistics +=
  1636. std::to_string(percent(caller_naccessess_it.second, naccesses));
  1637. statistics += ",";
  1638. }
  1639. fprintf(stdout,
  1640. "Bottom %" PRIu32 " access count. Access count=%" PRIu64
  1641. " nblocks=%" ROCKSDB_PRIszt " %s\n",
  1642. bottom_k, naccess_it->first, naccess_it->second.size(),
  1643. statistics.c_str());
  1644. }
  1645. uint32_t top_k_index = 0;
  1646. for (auto naccess_it = access_count_blocks.rbegin();
  1647. naccess_it != access_count_blocks.rend(); naccess_it++) {
  1648. top_k_index++;
  1649. if (top_k_index >= top_k) {
  1650. break;
  1651. }
  1652. for (auto const& block_id : naccess_it->second) {
  1653. BlockAccessInfo* block = block_info_map_.find(block_id)->second;
  1654. std::string statistics("Caller:");
  1655. uint64_t naccesses = 0;
  1656. for (auto const& caller_access : block->caller_num_access_map) {
  1657. if (!user_access_only || is_user_access(caller_access.first)) {
  1658. naccesses += caller_access.second;
  1659. }
  1660. }
  1661. assert(naccesses > 0);
  1662. for (auto const& caller_access : block->caller_num_access_map) {
  1663. if (!user_access_only || is_user_access(caller_access.first)) {
  1664. statistics += ",";
  1665. statistics += caller_to_string(caller_access.first);
  1666. statistics += ":";
  1667. statistics +=
  1668. std::to_string(percent(caller_access.second, naccesses));
  1669. }
  1670. }
  1671. uint64_t ref_keys_accesses = 0;
  1672. uint64_t ref_keys_does_not_exist_accesses = 0;
  1673. for (auto const& ref_key_caller_access : block->key_num_access_map) {
  1674. for (auto const& caller_access : ref_key_caller_access.second) {
  1675. if (!user_access_only || is_user_access(caller_access.first)) {
  1676. ref_keys_accesses += caller_access.second;
  1677. }
  1678. }
  1679. }
  1680. for (auto const& ref_key_caller_access :
  1681. block->non_exist_key_num_access_map) {
  1682. for (auto const& caller_access : ref_key_caller_access.second) {
  1683. if (!user_access_only || is_user_access(caller_access.first)) {
  1684. ref_keys_does_not_exist_accesses += caller_access.second;
  1685. }
  1686. }
  1687. }
  1688. statistics += ",nkeys=";
  1689. statistics += std::to_string(block->num_keys);
  1690. statistics += ",block_size=";
  1691. statistics += std::to_string(block->block_size);
  1692. statistics += ",num_ref_keys=";
  1693. statistics += std::to_string(block->key_num_access_map.size());
  1694. statistics += ",percent_access_ref_keys=";
  1695. statistics += std::to_string(percent(ref_keys_accesses, naccesses));
  1696. statistics += ",num_ref_keys_does_not_exist=";
  1697. statistics += std::to_string(block->non_exist_key_num_access_map.size());
  1698. statistics += ",percent_access_ref_keys_does_not_exist=";
  1699. statistics +=
  1700. std::to_string(percent(ref_keys_does_not_exist_accesses, naccesses));
  1701. statistics += ",ref_data_size=";
  1702. statistics += std::to_string(block->referenced_data_size);
  1703. fprintf(stdout,
  1704. "Top %" PRIu32 " access count blocks access_count=%" PRIu64
  1705. " %s\n",
  1706. top_k, naccess_it->first, statistics.c_str());
  1707. }
  1708. }
  1709. for (auto const& bt_stats : bt_stats_map) {
  1710. print_break_lines(/*num_break_lines=*/1);
  1711. fprintf(stdout, "Break down by block type %s: \n%s",
  1712. block_type_to_string(bt_stats.first).c_str(),
  1713. bt_stats.second.ToString().c_str());
  1714. }
  1715. for (auto const& cf_bt_stats : cf_bt_stats_map) {
  1716. const std::string& cf_name = cf_bt_stats.first;
  1717. for (auto const& bt_stats : cf_bt_stats.second) {
  1718. print_break_lines(/*num_break_lines=*/1);
  1719. fprintf(stdout,
  1720. "Break down by column family %s and block type "
  1721. "%s: \n%s",
  1722. cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
  1723. bt_stats.second.ToString().c_str());
  1724. }
  1725. }
  1726. }
  1727. void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
  1728. HistogramStat existing_keys_stats;
  1729. std::map<std::string, HistogramStat> cf_existing_keys_stats_map;
  1730. HistogramStat non_existing_keys_stats;
  1731. std::map<std::string, HistogramStat> cf_non_existing_keys_stats_map;
  1732. HistogramStat block_access_stats;
  1733. std::map<std::string, HistogramStat> cf_block_access_info;
  1734. HistogramStat percent_referenced_bytes;
  1735. std::map<std::string, HistogramStat> cf_percent_referenced_bytes;
  1736. // Total number of accesses in a data block / number of keys in a data block.
  1737. HistogramStat avg_naccesses_per_key_in_a_data_block;
  1738. std::map<std::string, HistogramStat> cf_avg_naccesses_per_key_in_a_data_block;
  1739. // The standard deviation on the number of accesses of a key in a data block.
  1740. HistogramStat stdev_naccesses_per_key_in_a_data_block;
  1741. std::map<std::string, HistogramStat>
  1742. cf_stdev_naccesses_per_key_in_a_data_block;
  1743. auto block_callback =
  1744. [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
  1745. TraceType /*type*/, const std::string& /*block_key*/,
  1746. uint64_t /*block_id*/, const BlockAccessInfo& block) {
  1747. if (block.num_keys == 0) {
  1748. return;
  1749. }
  1750. // Use four decimal points.
  1751. uint64_t percent_referenced_for_existing_keys = (uint64_t)(
  1752. ((double)block.key_num_access_map.size() / (double)block.num_keys) *
  1753. 10000.0);
  1754. uint64_t percent_referenced_for_non_existing_keys =
  1755. (uint64_t)(((double)block.non_exist_key_num_access_map.size() /
  1756. (double)block.num_keys) *
  1757. 10000.0);
  1758. uint64_t percent_accesses_for_existing_keys =
  1759. (uint64_t)(((double)block.num_referenced_key_exist_in_block /
  1760. (double)block.num_accesses) *
  1761. 10000.0);
  1762. HistogramStat hist_naccess_per_key;
  1763. for (auto const& key_access : block.key_num_access_map) {
  1764. for (auto const& caller_access : key_access.second) {
  1765. hist_naccess_per_key.Add(caller_access.second);
  1766. }
  1767. }
  1768. uint64_t avg_accesses =
  1769. static_cast<uint64_t>(hist_naccess_per_key.Average());
  1770. uint64_t stdev_accesses =
  1771. static_cast<uint64_t>(hist_naccess_per_key.StandardDeviation());
  1772. avg_naccesses_per_key_in_a_data_block.Add(avg_accesses);
  1773. cf_avg_naccesses_per_key_in_a_data_block[cf_name].Add(avg_accesses);
  1774. stdev_naccesses_per_key_in_a_data_block.Add(stdev_accesses);
  1775. cf_stdev_naccesses_per_key_in_a_data_block[cf_name].Add(stdev_accesses);
  1776. existing_keys_stats.Add(percent_referenced_for_existing_keys);
  1777. cf_existing_keys_stats_map[cf_name].Add(
  1778. percent_referenced_for_existing_keys);
  1779. non_existing_keys_stats.Add(percent_referenced_for_non_existing_keys);
  1780. cf_non_existing_keys_stats_map[cf_name].Add(
  1781. percent_referenced_for_non_existing_keys);
  1782. block_access_stats.Add(percent_accesses_for_existing_keys);
  1783. cf_block_access_info[cf_name].Add(percent_accesses_for_existing_keys);
  1784. };
  1785. TraverseBlocks(block_callback);
  1786. fprintf(stdout,
  1787. "Histogram on the number of referenced keys existing in a block over "
  1788. "the total number of keys in a block: \n%s",
  1789. existing_keys_stats.ToString().c_str());
  1790. for (auto const& cf_stats : cf_existing_keys_stats_map) {
  1791. print_break_lines(/*num_break_lines=*/1);
  1792. fprintf(stdout, "Break down by column family %s: \n%s",
  1793. cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
  1794. }
  1795. print_break_lines(/*num_break_lines=*/1);
  1796. fprintf(
  1797. stdout,
  1798. "Histogram on the number of referenced keys DO NOT exist in a block over "
  1799. "the total number of keys in a block: \n%s",
  1800. non_existing_keys_stats.ToString().c_str());
  1801. for (auto const& cf_stats : cf_non_existing_keys_stats_map) {
  1802. print_break_lines(/*num_break_lines=*/1);
  1803. fprintf(stdout, "Break down by column family %s: \n%s",
  1804. cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
  1805. }
  1806. print_break_lines(/*num_break_lines=*/1);
  1807. fprintf(stdout,
  1808. "Histogram on the number of accesses on keys exist in a block over "
  1809. "the total number of accesses in a block: \n%s",
  1810. block_access_stats.ToString().c_str());
  1811. for (auto const& cf_stats : cf_block_access_info) {
  1812. print_break_lines(/*num_break_lines=*/1);
  1813. fprintf(stdout, "Break down by column family %s: \n%s",
  1814. cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
  1815. }
  1816. print_break_lines(/*num_break_lines=*/1);
  1817. fprintf(
  1818. stdout,
  1819. "Histogram on the average number of accesses per key in a block: \n%s",
  1820. avg_naccesses_per_key_in_a_data_block.ToString().c_str());
  1821. for (auto const& cf_stats : cf_avg_naccesses_per_key_in_a_data_block) {
  1822. fprintf(stdout, "Break down by column family %s: \n%s",
  1823. cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
  1824. }
  1825. print_break_lines(/*num_break_lines=*/1);
  1826. fprintf(stdout,
  1827. "Histogram on the standard deviation of the number of accesses per "
  1828. "key in a block: \n%s",
  1829. stdev_naccesses_per_key_in_a_data_block.ToString().c_str());
  1830. for (auto const& cf_stats : cf_stdev_naccesses_per_key_in_a_data_block) {
  1831. fprintf(stdout, "Break down by column family %s: \n%s",
  1832. cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
  1833. }
  1834. }
  1835. void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
  1836. uint64_t total_num_files = 0;
  1837. uint64_t total_num_blocks = 0;
  1838. uint64_t total_num_accesses = 0;
  1839. std::map<TraceType, uint64_t> bt_num_blocks_map;
  1840. std::map<TableReaderCaller, uint64_t> caller_num_access_map;
  1841. std::map<TableReaderCaller, std::map<TraceType, uint64_t>>
  1842. caller_bt_num_access_map;
  1843. std::map<TableReaderCaller, std::map<uint32_t, uint64_t>>
  1844. caller_level_num_access_map;
  1845. for (auto const& cf_aggregates : cf_aggregates_map_) {
  1846. // Stats per column family.
  1847. const std::string& cf_name = cf_aggregates.first;
  1848. uint64_t cf_num_files = 0;
  1849. uint64_t cf_num_blocks = 0;
  1850. std::map<TraceType, uint64_t> cf_bt_blocks;
  1851. uint64_t cf_num_accesses = 0;
  1852. std::map<TableReaderCaller, uint64_t> cf_caller_num_accesses_map;
  1853. std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
  1854. cf_caller_level_num_accesses_map;
  1855. std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
  1856. cf_caller_file_num_accesses_map;
  1857. std::map<TableReaderCaller, std::map<TraceType, uint64_t>>
  1858. cf_caller_bt_num_accesses_map;
  1859. total_num_files += cf_aggregates.second.fd_aggregates_map.size();
  1860. for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
  1861. // Stats per SST file.
  1862. const uint64_t fd = file_aggregates.first;
  1863. const uint32_t level = file_aggregates.second.level;
  1864. cf_num_files++;
  1865. for (auto const& block_type_aggregates :
  1866. file_aggregates.second.block_type_aggregates_map) {
  1867. // Stats per block type.
  1868. const TraceType type = block_type_aggregates.first;
  1869. cf_bt_blocks[type] +=
  1870. block_type_aggregates.second.block_access_info_map.size();
  1871. total_num_blocks +=
  1872. block_type_aggregates.second.block_access_info_map.size();
  1873. bt_num_blocks_map[type] +=
  1874. block_type_aggregates.second.block_access_info_map.size();
  1875. for (auto const& block_access_info :
  1876. block_type_aggregates.second.block_access_info_map) {
  1877. // Stats per block.
  1878. cf_num_blocks++;
  1879. for (auto const& stats :
  1880. block_access_info.second.caller_num_access_map) {
  1881. // Stats per caller.
  1882. const TableReaderCaller caller = stats.first;
  1883. const uint64_t num_accesses = stats.second;
  1884. // Overall stats.
  1885. total_num_accesses += num_accesses;
  1886. caller_num_access_map[caller] += num_accesses;
  1887. caller_bt_num_access_map[caller][type] += num_accesses;
  1888. caller_level_num_access_map[caller][level] += num_accesses;
  1889. // Column Family stats.
  1890. cf_num_accesses += num_accesses;
  1891. cf_caller_num_accesses_map[caller] += num_accesses;
  1892. cf_caller_level_num_accesses_map[caller][level] += num_accesses;
  1893. cf_caller_file_num_accesses_map[caller][fd] += num_accesses;
  1894. cf_caller_bt_num_accesses_map[caller][type] += num_accesses;
  1895. }
  1896. }
  1897. }
  1898. }
  1899. // Print stats.
  1900. print_break_lines(/*num_break_lines=*/3);
  1901. fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str());
  1902. fprintf(stdout,
  1903. " Number of files:%" PRIu64 " Number of blocks: %" PRIu64
  1904. " Number of accesses: %" PRIu64 "\n",
  1905. cf_num_files, cf_num_blocks, cf_num_accesses);
  1906. for (auto block_type : cf_bt_blocks) {
  1907. fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
  1908. block_type_to_string(block_type.first).c_str(), block_type.second,
  1909. percent(block_type.second, cf_num_blocks));
  1910. }
  1911. for (auto caller : cf_caller_num_accesses_map) {
  1912. const uint64_t naccesses = caller.second;
  1913. print_break_lines(/*num_break_lines=*/1);
  1914. fprintf(stdout,
  1915. "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
  1916. caller_to_string(caller.first).c_str(), naccesses,
  1917. percent(naccesses, cf_num_accesses));
  1918. fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
  1919. caller_to_string(caller.first).c_str());
  1920. for (auto naccess_level :
  1921. cf_caller_level_num_accesses_map[caller.first]) {
  1922. fprintf(stdout,
  1923. "\t Level %" PRIu64 ": Number of accesses: %" PRIu64
  1924. " Percent: %.2f\n",
  1925. naccess_level.first, naccess_level.second,
  1926. percent(naccess_level.second, naccesses));
  1927. }
  1928. fprintf(stdout, "Caller %s: Number of accesses per file break down\n",
  1929. caller_to_string(caller.first).c_str());
  1930. for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) {
  1931. fprintf(stdout,
  1932. "\t File %" PRIu64 ": Number of accesses: %" PRIu64
  1933. " Percent: %.2f\n",
  1934. naccess_file.first, naccess_file.second,
  1935. percent(naccess_file.second, naccesses));
  1936. }
  1937. fprintf(stdout,
  1938. "Caller %s: Number of accesses per block type break down\n",
  1939. caller_to_string(caller.first).c_str());
  1940. for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) {
  1941. fprintf(stdout,
  1942. "\t Block Type %s: Number of accesses: %" PRIu64
  1943. " Percent: %.2f\n",
  1944. block_type_to_string(naccess_type.first).c_str(),
  1945. naccess_type.second, percent(naccess_type.second, naccesses));
  1946. }
  1947. }
  1948. }
  1949. print_break_lines(/*num_break_lines=*/3);
  1950. fprintf(stdout, "Overall statistics:\n");
  1951. fprintf(stdout,
  1952. "Number of files: %" PRIu64 " Number of blocks: %" PRIu64
  1953. " Number of accesses: %" PRIu64 "\n",
  1954. total_num_files, total_num_blocks, total_num_accesses);
  1955. for (auto block_type : bt_num_blocks_map) {
  1956. fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
  1957. block_type_to_string(block_type.first).c_str(), block_type.second,
  1958. percent(block_type.second, total_num_blocks));
  1959. }
  1960. for (auto caller : caller_num_access_map) {
  1961. print_break_lines(/*num_break_lines=*/1);
  1962. uint64_t naccesses = caller.second;
  1963. fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
  1964. caller_to_string(caller.first).c_str(), naccesses,
  1965. percent(naccesses, total_num_accesses));
  1966. fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
  1967. caller_to_string(caller.first).c_str());
  1968. for (auto naccess_level : caller_level_num_access_map[caller.first]) {
  1969. fprintf(stdout,
  1970. "\t Level %d: Number of accesses: %" PRIu64 " Percent: %.2f\n",
  1971. naccess_level.first, naccess_level.second,
  1972. percent(naccess_level.second, naccesses));
  1973. }
  1974. fprintf(stdout, "Caller %s: Number of accesses per block type break down\n",
  1975. caller_to_string(caller.first).c_str());
  1976. for (auto naccess_type : caller_bt_num_access_map[caller.first]) {
  1977. fprintf(stdout,
  1978. "\t Block Type %s: Number of accesses: %" PRIu64
  1979. " Percent: %.2f\n",
  1980. block_type_to_string(naccess_type.first).c_str(),
  1981. naccess_type.second, percent(naccess_type.second, naccesses));
  1982. }
  1983. }
  1984. }
  1985. std::vector<CacheConfiguration> parse_cache_config_file(
  1986. const std::string& config_path) {
  1987. std::ifstream file(config_path);
  1988. if (!file.is_open()) {
  1989. return {};
  1990. }
  1991. std::vector<CacheConfiguration> configs;
  1992. std::string line;
  1993. while (getline(file, line)) {
  1994. CacheConfiguration cache_config;
  1995. std::stringstream ss(line);
  1996. std::vector<std::string> config_strs;
  1997. while (ss.good()) {
  1998. std::string substr;
  1999. getline(ss, substr, ',');
  2000. config_strs.push_back(substr);
  2001. }
  2002. // Sanity checks.
  2003. if (config_strs.size() < 4) {
  2004. fprintf(stderr, "Invalid cache simulator configuration %s\n",
  2005. line.c_str());
  2006. exit(1);
  2007. }
  2008. if (kSupportedCacheNames.find(" " + config_strs[0] + " ") ==
  2009. std::string::npos) {
  2010. fprintf(stderr, "Invalid cache name %s. Supported cache names are %s\n",
  2011. line.c_str(), kSupportedCacheNames.c_str());
  2012. exit(1);
  2013. }
  2014. cache_config.cache_name = config_strs[0];
  2015. cache_config.num_shard_bits = ParseUint32(config_strs[1]);
  2016. cache_config.ghost_cache_capacity = ParseUint64(config_strs[2]);
  2017. for (uint32_t i = 3; i < config_strs.size(); i++) {
  2018. uint64_t capacity = ParseUint64(config_strs[i]);
  2019. if (capacity == 0) {
  2020. fprintf(stderr, "Invalid cache capacity %s, %s\n",
  2021. config_strs[i].c_str(), line.c_str());
  2022. exit(1);
  2023. }
  2024. cache_config.cache_capacities.push_back(capacity);
  2025. }
  2026. configs.push_back(cache_config);
  2027. }
  2028. file.close();
  2029. return configs;
  2030. }
  2031. std::vector<uint64_t> parse_buckets(const std::string& bucket_str) {
  2032. std::vector<uint64_t> buckets;
  2033. std::stringstream ss(bucket_str);
  2034. while (ss.good()) {
  2035. std::string bucket;
  2036. getline(ss, bucket, ',');
  2037. buckets.push_back(ParseUint64(bucket));
  2038. }
  2039. buckets.push_back(port::kMaxUint64);
  2040. return buckets;
  2041. }
  2042. int block_cache_trace_analyzer_tool(int argc, char** argv) {
  2043. ParseCommandLineFlags(&argc, &argv, true);
  2044. if (FLAGS_block_cache_trace_path.empty()) {
  2045. fprintf(stderr, "block cache trace path is empty\n");
  2046. exit(1);
  2047. }
  2048. uint64_t warmup_seconds =
  2049. FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0;
  2050. uint32_t downsample_ratio = FLAGS_block_cache_trace_downsample_ratio > 0
  2051. ? FLAGS_block_cache_trace_downsample_ratio
  2052. : 0;
  2053. std::vector<CacheConfiguration> cache_configs =
  2054. parse_cache_config_file(FLAGS_block_cache_sim_config_path);
  2055. std::unique_ptr<BlockCacheTraceSimulator> cache_simulator;
  2056. if (!cache_configs.empty()) {
  2057. cache_simulator.reset(new BlockCacheTraceSimulator(
  2058. warmup_seconds, downsample_ratio, cache_configs));
  2059. Status s = cache_simulator->InitializeCaches();
  2060. if (!s.ok()) {
  2061. fprintf(stderr, "Cannot initialize cache simulators %s\n",
  2062. s.ToString().c_str());
  2063. exit(1);
  2064. }
  2065. }
  2066. BlockCacheTraceAnalyzer analyzer(
  2067. FLAGS_block_cache_trace_path, FLAGS_block_cache_analysis_result_dir,
  2068. FLAGS_human_readable_trace_file_path,
  2069. !FLAGS_reuse_distance_labels.empty(), FLAGS_mrc_only,
  2070. FLAGS_is_block_cache_human_readable_trace, std::move(cache_simulator));
  2071. Status s = analyzer.Analyze();
  2072. if (!s.IsIncomplete() && !s.ok()) {
  2073. // Read all traces.
  2074. fprintf(stderr, "Cannot process the trace %s\n", s.ToString().c_str());
  2075. exit(1);
  2076. }
  2077. fprintf(stdout, "Status: %s\n", s.ToString().c_str());
  2078. analyzer.WriteMissRatioCurves();
  2079. analyzer.WriteMissRatioTimeline(1);
  2080. analyzer.WriteMissRatioTimeline(kSecondInMinute);
  2081. analyzer.WriteMissRatioTimeline(kSecondInHour);
  2082. analyzer.WriteMissTimeline(1);
  2083. analyzer.WriteMissTimeline(kSecondInMinute);
  2084. analyzer.WriteMissTimeline(kSecondInHour);
  2085. if (FLAGS_mrc_only) {
  2086. fprintf(stdout,
  2087. "Skipping the analysis statistics since the user wants to compute "
  2088. "MRC only");
  2089. return 0;
  2090. }
  2091. analyzer.PrintStatsSummary();
  2092. if (FLAGS_print_access_count_stats) {
  2093. print_break_lines(/*num_break_lines=*/3);
  2094. analyzer.PrintAccessCountStats(
  2095. /*user_access_only=*/false, FLAGS_analyze_bottom_k_access_count_blocks,
  2096. FLAGS_analyze_top_k_access_count_blocks);
  2097. print_break_lines(/*num_break_lines=*/3);
  2098. analyzer.PrintAccessCountStats(
  2099. /*user_access_only=*/true, FLAGS_analyze_bottom_k_access_count_blocks,
  2100. FLAGS_analyze_top_k_access_count_blocks);
  2101. }
  2102. if (FLAGS_print_block_size_stats) {
  2103. print_break_lines(/*num_break_lines=*/3);
  2104. analyzer.PrintBlockSizeStats();
  2105. }
  2106. if (FLAGS_print_data_block_access_count_stats) {
  2107. print_break_lines(/*num_break_lines=*/3);
  2108. analyzer.PrintDataBlockAccessStats();
  2109. }
  2110. print_break_lines(/*num_break_lines=*/3);
  2111. if (!FLAGS_timeline_labels.empty()) {
  2112. std::stringstream ss(FLAGS_timeline_labels);
  2113. while (ss.good()) {
  2114. std::string label;
  2115. getline(ss, label, ',');
  2116. if (label.find("block") != std::string::npos) {
  2117. analyzer.WriteAccessTimeline(label, kSecondInMinute, true);
  2118. analyzer.WriteAccessTimeline(label, kSecondInMinute, false);
  2119. analyzer.WriteAccessTimeline(label, kSecondInHour, true);
  2120. analyzer.WriteAccessTimeline(label, kSecondInHour, false);
  2121. } else {
  2122. analyzer.WriteAccessTimeline(label, kSecondInMinute, false);
  2123. analyzer.WriteAccessTimeline(label, kSecondInHour, false);
  2124. }
  2125. }
  2126. }
  2127. if (!FLAGS_analyze_callers.empty()) {
  2128. analyzer.WritePercentAccessSummaryStats();
  2129. std::stringstream ss(FLAGS_analyze_callers);
  2130. while (ss.good()) {
  2131. std::string caller;
  2132. getline(ss, caller, ',');
  2133. analyzer.WriteDetailedPercentAccessSummaryStats(string_to_caller(caller));
  2134. }
  2135. }
  2136. if (!FLAGS_access_count_buckets.empty()) {
  2137. std::vector<uint64_t> buckets = parse_buckets(FLAGS_access_count_buckets);
  2138. analyzer.WriteAccessCountSummaryStats(buckets, /*user_access_only=*/true);
  2139. analyzer.WriteAccessCountSummaryStats(buckets, /*user_access_only=*/false);
  2140. }
  2141. if (!FLAGS_reuse_distance_labels.empty() &&
  2142. !FLAGS_reuse_distance_buckets.empty()) {
  2143. std::vector<uint64_t> buckets = parse_buckets(FLAGS_reuse_distance_buckets);
  2144. std::stringstream ss(FLAGS_reuse_distance_labels);
  2145. while (ss.good()) {
  2146. std::string label;
  2147. getline(ss, label, ',');
  2148. analyzer.WriteReuseDistance(label, buckets);
  2149. }
  2150. }
  2151. if (!FLAGS_reuse_interval_labels.empty() &&
  2152. !FLAGS_reuse_interval_buckets.empty()) {
  2153. std::vector<uint64_t> buckets = parse_buckets(FLAGS_reuse_interval_buckets);
  2154. std::stringstream ss(FLAGS_reuse_interval_labels);
  2155. while (ss.good()) {
  2156. std::string label;
  2157. getline(ss, label, ',');
  2158. analyzer.WriteReuseInterval(label, buckets);
  2159. }
  2160. }
  2161. if (!FLAGS_reuse_lifetime_labels.empty() &&
  2162. !FLAGS_reuse_lifetime_buckets.empty()) {
  2163. std::vector<uint64_t> buckets = parse_buckets(FLAGS_reuse_lifetime_buckets);
  2164. std::stringstream ss(FLAGS_reuse_lifetime_labels);
  2165. while (ss.good()) {
  2166. std::string label;
  2167. getline(ss, label, ',');
  2168. analyzer.WriteReuseLifetime(label, buckets);
  2169. }
  2170. }
  2171. if (FLAGS_analyze_blocks_reuse_k_reuse_window != 0) {
  2172. std::vector<TraceType> block_types{TraceType::kBlockTraceIndexBlock,
  2173. TraceType::kBlockTraceDataBlock,
  2174. TraceType::kBlockTraceFilterBlock};
  2175. for (auto block_type : block_types) {
  2176. analyzer.WriteBlockReuseTimeline(
  2177. FLAGS_analyze_blocks_reuse_k_reuse_window,
  2178. /*user_access_only=*/true, block_type);
  2179. analyzer.WriteBlockReuseTimeline(
  2180. FLAGS_analyze_blocks_reuse_k_reuse_window,
  2181. /*user_access_only=*/false, block_type);
  2182. }
  2183. }
  2184. if (!FLAGS_analyze_get_spatial_locality_labels.empty() &&
  2185. !FLAGS_analyze_get_spatial_locality_buckets.empty()) {
  2186. std::vector<uint64_t> buckets =
  2187. parse_buckets(FLAGS_analyze_get_spatial_locality_buckets);
  2188. std::stringstream ss(FLAGS_analyze_get_spatial_locality_labels);
  2189. while (ss.good()) {
  2190. std::string label;
  2191. getline(ss, label, ',');
  2192. analyzer.WriteGetSpatialLocality(label, buckets);
  2193. }
  2194. }
  2195. if (!FLAGS_analyze_correlation_coefficients_labels.empty()) {
  2196. std::stringstream ss(FLAGS_analyze_correlation_coefficients_labels);
  2197. while (ss.good()) {
  2198. std::string label;
  2199. getline(ss, label, ',');
  2200. analyzer.WriteCorrelationFeatures(
  2201. label, FLAGS_analyze_correlation_coefficients_max_number_of_values);
  2202. }
  2203. analyzer.WriteCorrelationFeaturesForGet(
  2204. FLAGS_analyze_correlation_coefficients_max_number_of_values);
  2205. }
  2206. if (!FLAGS_skew_labels.empty() && !FLAGS_skew_buckets.empty()) {
  2207. std::vector<uint64_t> buckets = parse_buckets(FLAGS_skew_buckets);
  2208. std::stringstream ss(FLAGS_skew_labels);
  2209. while (ss.good()) {
  2210. std::string label;
  2211. getline(ss, label, ',');
  2212. if (label.find("block") != std::string::npos) {
  2213. analyzer.WriteSkewness(label, buckets,
  2214. TraceType::kBlockTraceIndexBlock);
  2215. analyzer.WriteSkewness(label, buckets,
  2216. TraceType::kBlockTraceFilterBlock);
  2217. analyzer.WriteSkewness(label, buckets, TraceType::kBlockTraceDataBlock);
  2218. analyzer.WriteSkewness(label, buckets, TraceType::kTraceMax);
  2219. } else {
  2220. analyzer.WriteSkewness(label, buckets, TraceType::kTraceMax);
  2221. }
  2222. }
  2223. }
  2224. return 0;
  2225. }
  2226. } // namespace ROCKSDB_NAMESPACE
  2227. #endif // GFLAGS
  2228. #endif // ROCKSDB_LITE