sst_dump_tool.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #include "rocksdb/sst_dump_tool.h"
  7. #include <cinttypes>
  8. #include <iostream>
  9. #include "db_stress_tool/db_stress_compression_manager.h"
  10. #include "options/options_helper.h"
  11. #include "port/port.h"
  12. #include "rocksdb/convenience.h"
  13. #include "rocksdb/utilities/ldb_cmd.h"
  14. #include "table/block_based/block.h"
  15. #include "table/block_based/block_based_table_factory.h"
  16. #include "table/sst_file_dumper.h"
  17. namespace ROCKSDB_NAMESPACE {
  18. namespace {
  19. void print_help(bool to_stderr) {
  20. std::string supported_compressions;
  21. for (CompressionType ct : GetSupportedCompressions()) {
  22. if (!supported_compressions.empty()) {
  23. supported_compressions += ", ";
  24. }
  25. std::string str;
  26. Status s = GetStringFromCompressionType(&str, ct);
  27. assert(s.ok());
  28. supported_compressions += str;
  29. }
  30. fprintf(
  31. to_stderr ? stderr : stdout,
  32. R"(sst_dump <db_dirs_OR_sst_files...> [--command=check|scan|raw|recompress|identify]
  33. --file=<db_dir_OR_sst_file>
  34. Path to SST file or directory containing SST files (old option syntax)
  35. --env_uri=<uri of underlying Env>
  36. URI of underlying Env, mutually exclusive with fs_uri
  37. --fs_uri=<uri of underlying FileSystem>
  38. URI of underlying FileSystem, mutually exclusive with env_uri
  39. --command=check|scan|raw|verify|identify
  40. check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
  41. When read_num, from and to are not set, it compares the number of keys read with num_entries in table
  42. property and will report corruption if there is a mismatch.
  43. scan: Iterate over entries in files and print them to screen
  44. raw: Dump all the table contents to <file_name>_dump.txt
  45. verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
  46. recompress: reports the SST file size if recompressed with different
  47. compression types
  48. identify: Reports a file is a valid SST file or lists all valid SST files under a directory
  49. --output_hex
  50. Can be combined with scan command to print the keys and values in Hex
  51. --decode_blob_index
  52. Decode blob indexes and print them in a human-readable format during scans.
  53. --from=<user_key>
  54. Key to start reading from when executing check|scan
  55. --to=<user_key>
  56. Key to stop reading at when executing check|scan
  57. --prefix=<user_key>
  58. Returns all keys with this prefix when executing check|scan
  59. Cannot be used in conjunction with --from
  60. --read_num=<num>
  61. Maximum number of entries to read when executing check|scan
  62. --verify_checksum
  63. Verify file checksum when executing check|scan
  64. --input_key_hex
  65. Can be combined with --from and --to to indicate that these values are encoded in Hex
  66. --show_properties
  67. Print table properties after iterating over the file when executing
  68. check|scan|raw|identify
  69. --block_size=<block_size>
  70. Can be combined with --command=recompress to set the block size that will
  71. be used when trying different compression algorithms
  72. --compression_types=<comma-separated list of CompressionType members, e.g.,
  73. kSnappyCompression or kCustomCompressionC4>
  74. Can be combined with --command=recompress to run recompression for this
  75. list of compression types
  76. Supported built-in compression types: %s
  77. --compression_manager=<compression manager string>
  78. Used with --command=recompress to specify a compression manager to use
  79. instead of the built-in compression manager, which may support a
  80. different set of compression types.
  81. --parse_internal_key=<0xKEY>
  82. Convenience option to parse an internal key on the command line. Dumps the
  83. internal key in hex format {'key' @ SN: type}
  84. --compression_level=<compression_level>
  85. Sets both --compression_level_from= and --compression_level_to=
  86. --compression_level_from=<compression_level>
  87. Compression level to start compressing when executing recompress. One compression type
  88. and compression_level_to must also be specified
  89. --compression_level_to=<compression_level>
  90. Compression level to stop compressing when executing recompress. One compression type
  91. and compression_level_from must also be specified
  92. --compression_max_dict_buffer_bytes=<int64_t>
  93. Limit on buffer size from which we collect samples for dictionary generation.
  94. --compression_max_dict_bytes=<uint32_t>
  95. Maximum size of dictionary used to prime the compression library
  96. --compression_parallel_threads=<uint32_t>
  97. Number of parallel threads to use with --command=recompress
  98. --compression_use_zstd_finalize_dict
  99. Use zstd's finalizeDictionary() API instead of zstd's dictionary trainer to generate dictionary.
  100. --compression_zstd_max_train_bytes=<uint32_t>
  101. Maximum size of training data passed to zstd's dictionary trainer
  102. --list_meta_blocks
  103. Print the list of all meta blocks in the file
  104. )",
  105. supported_compressions.c_str());
  106. }
  107. // arg_name would include all prefix, e.g. "--my_arg="
  108. // arg_val is the parses value.
  109. // True if there is a match. False otherwise.
  110. // Woud exit after printing errmsg if cannot be parsed.
  111. bool ParseIntArg(const char* arg, const std::string arg_name,
  112. const std::string err_msg, int64_t* arg_val) {
  113. if (strncmp(arg, arg_name.c_str(), arg_name.size()) == 0) {
  114. std::string input_str = arg + arg_name.size();
  115. std::istringstream iss(input_str);
  116. iss >> *arg_val;
  117. if (iss.fail()) {
  118. fprintf(stderr, "%s\n", err_msg.c_str());
  119. exit(1);
  120. }
  121. return true;
  122. }
  123. return false;
  124. }
  125. } // namespace
  126. int SSTDumpTool::Run(int argc, char const* const* argv, Options options) {
  127. std::string env_uri, fs_uri;
  128. enum DirVsFile {
  129. kUnknownDirVsFile,
  130. kDir,
  131. kFile,
  132. };
  133. std::vector<std::pair<const char*, DirVsFile>> dirs_or_files;
  134. uint64_t read_num = std::numeric_limits<uint64_t>::max();
  135. std::string command;
  136. char junk;
  137. uint64_t n;
  138. bool verify_checksum = false;
  139. bool output_hex = false;
  140. bool decode_blob_index = false;
  141. bool input_key_hex = false;
  142. bool has_from = false;
  143. bool has_to = false;
  144. bool use_from_as_prefix = false;
  145. bool show_properties = false;
  146. bool show_summary = false;
  147. bool list_meta_blocks = false;
  148. bool has_compression_level_from = false;
  149. bool has_compression_level_to = false;
  150. std::string from_key;
  151. std::string to_key;
  152. std::string block_size_str;
  153. std::string compression_level_from_str;
  154. std::string compression_level_to_str;
  155. size_t block_size = 16384; // A popular choice for default
  156. size_t readahead_size = 2 * 1024 * 1024;
  157. std::vector<CompressionType> compression_types;
  158. std::shared_ptr<CompressionManager> compression_manager;
  159. uint64_t total_num_files = 0;
  160. uint64_t total_num_data_blocks = 0;
  161. uint64_t total_data_block_size = 0;
  162. uint64_t total_index_block_size = 0;
  163. uint64_t total_filter_block_size = 0;
  164. int32_t compress_level_from = CompressionOptions::kDefaultCompressionLevel;
  165. int32_t compress_level_to = CompressionOptions::kDefaultCompressionLevel;
  166. uint32_t compression_max_dict_bytes =
  167. ROCKSDB_NAMESPACE::CompressionOptions().max_dict_bytes;
  168. uint32_t compression_zstd_max_train_bytes =
  169. ROCKSDB_NAMESPACE::CompressionOptions().zstd_max_train_bytes;
  170. uint64_t compression_max_dict_buffer_bytes =
  171. ROCKSDB_NAMESPACE::CompressionOptions().max_dict_buffer_bytes;
  172. bool compression_use_zstd_finalize_dict =
  173. !ROCKSDB_NAMESPACE::CompressionOptions().use_zstd_dict_trainer;
  174. uint32_t compression_parallel_threads = 1;
  175. int64_t tmp_val;
  176. TEST_AllowUnsupportedFormatVersion() = true;
  177. DbStressCustomCompressionManager::Register();
  178. for (int i = 1; i < argc; i++) {
  179. if (strncmp(argv[i], "--env_uri=", 10) == 0) {
  180. env_uri = argv[i] + 10;
  181. } else if (strncmp(argv[i], "--fs_uri=", 9) == 0) {
  182. fs_uri = argv[i] + 9;
  183. } else if (strncmp(argv[i], "--file=", 7) == 0) {
  184. dirs_or_files.emplace_back(argv[i] + 7, kUnknownDirVsFile);
  185. } else if (strcmp(argv[i], "--output_hex") == 0) {
  186. output_hex = true;
  187. } else if (strcmp(argv[i], "--decode_blob_index") == 0) {
  188. decode_blob_index = true;
  189. } else if (strcmp(argv[i], "--input_key_hex") == 0) {
  190. input_key_hex = true;
  191. } else if (sscanf(argv[i], "--read_num=%lu%c", (unsigned long*)&n, &junk) ==
  192. 1) {
  193. read_num = n;
  194. } else if (strcmp(argv[i], "--verify_checksum") == 0) {
  195. verify_checksum = true;
  196. } else if (strncmp(argv[i], "--command=", 10) == 0) {
  197. command = argv[i] + 10;
  198. } else if (strncmp(argv[i], "--from=", 7) == 0) {
  199. from_key = argv[i] + 7;
  200. has_from = true;
  201. } else if (strncmp(argv[i], "--to=", 5) == 0) {
  202. to_key = argv[i] + 5;
  203. has_to = true;
  204. } else if (strncmp(argv[i], "--prefix=", 9) == 0) {
  205. from_key = argv[i] + 9;
  206. use_from_as_prefix = true;
  207. } else if (strcmp(argv[i], "--show_properties") == 0) {
  208. show_properties = true;
  209. } else if (strcmp(argv[i], "--show_summary") == 0) {
  210. show_summary = true;
  211. } else if (ParseIntArg(argv[i], "--set_block_size=",
  212. "block size must be numeric", &tmp_val) ||
  213. ParseIntArg(argv[i], "--block_size=",
  214. "block size must be numeric", &tmp_val)) {
  215. block_size = static_cast<size_t>(tmp_val);
  216. } else if (ParseIntArg(argv[i], "--readahead_size=",
  217. "readahead_size must be numeric", &tmp_val)) {
  218. readahead_size = static_cast<size_t>(tmp_val);
  219. } else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
  220. std::string compression_types_csv = argv[i] + 20;
  221. std::istringstream iss(compression_types_csv);
  222. std::string compression_type;
  223. while (std::getline(iss, compression_type, ',')) {
  224. auto iter =
  225. OptionsHelper::compression_type_string_map.find(compression_type);
  226. if (iter == OptionsHelper::compression_type_string_map.end()) {
  227. fprintf(stderr, "%s is not a valid CompressionType\n",
  228. compression_type.c_str());
  229. exit(1);
  230. }
  231. compression_types.emplace_back(iter->second);
  232. }
  233. } else if (strncmp(argv[i], "--compression_manager=", 22) == 0) {
  234. std::string compression_manager_str = argv[i] + 22;
  235. ConfigOptions config_options;
  236. config_options.ignore_unsupported_options = false;
  237. Status s = CompressionManager::CreateFromString(
  238. config_options, compression_manager_str, &compression_manager);
  239. if (!s.ok()) {
  240. fprintf(stderr, "Failed to create compression manager: %s\n",
  241. s.ToString().c_str());
  242. exit(1);
  243. }
  244. if (compression_manager == nullptr) {
  245. fprintf(stderr, "No compression manager created: %s\n",
  246. compression_manager_str.c_str());
  247. exit(1);
  248. }
  249. options.compression_manager = compression_manager;
  250. printf("Using compression manager: %s\n",
  251. compression_manager->GetId().c_str());
  252. } else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
  253. std::string in_key(argv[i] + 21);
  254. try {
  255. in_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(in_key);
  256. } catch (...) {
  257. std::cerr << "ERROR: Invalid key input '" << in_key
  258. << "' Use 0x{hex representation of internal rocksdb key}"
  259. << std::endl;
  260. return -1;
  261. }
  262. Slice sl_key = ROCKSDB_NAMESPACE::Slice(in_key);
  263. ParsedInternalKey ikey;
  264. int retc = 0;
  265. Status pik_status =
  266. ParseInternalKey(sl_key, &ikey, true /* log_err_key */);
  267. if (!pik_status.ok()) {
  268. std::cerr << pik_status.getState() << "\n";
  269. retc = -1;
  270. }
  271. fprintf(stdout, "key=%s\n", ikey.DebugString(true, true).c_str());
  272. return retc;
  273. } else if (ParseIntArg(argv[i], "--compression_level=",
  274. "compression_level must be numeric", &tmp_val)) {
  275. has_compression_level_from = true;
  276. has_compression_level_to = true;
  277. compress_level_from = static_cast<int>(tmp_val);
  278. compress_level_to = static_cast<int>(tmp_val);
  279. } else if (ParseIntArg(argv[i], "--compression_level_from=",
  280. "compression_level_from must be numeric",
  281. &tmp_val)) {
  282. has_compression_level_from = true;
  283. compress_level_from = static_cast<int>(tmp_val);
  284. } else if (ParseIntArg(argv[i], "--compression_level_to=",
  285. "compression_level_to must be numeric", &tmp_val)) {
  286. has_compression_level_to = true;
  287. compress_level_to = static_cast<int>(tmp_val);
  288. } else if (ParseIntArg(argv[i], "--compression_max_dict_bytes=",
  289. "compression_max_dict_bytes must be numeric",
  290. &tmp_val)) {
  291. if (tmp_val < 0 || tmp_val > std::numeric_limits<uint32_t>::max()) {
  292. fprintf(stderr, "compression_max_dict_bytes must be a uint32_t: '%s'\n",
  293. argv[i]);
  294. print_help(/*to_stderr*/ true);
  295. return 1;
  296. }
  297. compression_max_dict_bytes = static_cast<uint32_t>(tmp_val);
  298. } else if (ParseIntArg(argv[i], "--compression_parallel_threads=",
  299. "compression_parallel_threads must be numeric",
  300. &tmp_val)) {
  301. if (tmp_val < 0 || tmp_val > 100) {
  302. fprintf(stderr, "compression_parallel_threads out of range: '%s'\n",
  303. argv[i]);
  304. print_help(/*to_stderr*/ true);
  305. return 1;
  306. }
  307. compression_parallel_threads = static_cast<uint32_t>(tmp_val);
  308. } else if (ParseIntArg(argv[i], "--compression_zstd_max_train_bytes=",
  309. "compression_zstd_max_train_bytes must be numeric",
  310. &tmp_val)) {
  311. if (tmp_val < 0 || tmp_val > std::numeric_limits<uint32_t>::max()) {
  312. fprintf(stderr,
  313. "compression_zstd_max_train_bytes must be a uint32_t: '%s'\n",
  314. argv[i]);
  315. print_help(/*to_stderr*/ true);
  316. return 1;
  317. }
  318. compression_zstd_max_train_bytes = static_cast<uint32_t>(tmp_val);
  319. } else if (ParseIntArg(argv[i], "--compression_max_dict_buffer_bytes=",
  320. "compression_max_dict_buffer_bytes must be numeric",
  321. &tmp_val)) {
  322. if (tmp_val < 0) {
  323. fprintf(stderr,
  324. "compression_max_dict_buffer_bytes must be positive: '%s'\n",
  325. argv[i]);
  326. print_help(/*to_stderr*/ true);
  327. return 1;
  328. }
  329. compression_max_dict_buffer_bytes = static_cast<uint64_t>(tmp_val);
  330. } else if (strcmp(argv[i], "--compression_use_zstd_finalize_dict") == 0) {
  331. compression_use_zstd_finalize_dict = true;
  332. } else if (strcmp(argv[i], "--list_meta_blocks") == 0) {
  333. list_meta_blocks = true;
  334. } else if (strcmp(argv[i], "--help") == 0) {
  335. print_help(/*to_stderr*/ false);
  336. return 0;
  337. } else if (strcmp(argv[i], "--version") == 0) {
  338. printf("%s\n", GetRocksBuildInfoAsString("sst_dump").c_str());
  339. return 0;
  340. } else if (strcmp(argv[i], "--") == 0) {
  341. // Remaining args are dir-or-file
  342. for (++i; i < argc; ++i) {
  343. dirs_or_files.emplace_back(argv[i], kUnknownDirVsFile);
  344. }
  345. } else if (argv[i][0] == '-') {
  346. fprintf(stderr, "Unrecognized argument '%s'\n\n", argv[i]);
  347. print_help(/*to_stderr*/ true);
  348. return 1;
  349. } else {
  350. // Dir-or-file arg
  351. dirs_or_files.emplace_back(argv[i], kUnknownDirVsFile);
  352. }
  353. }
  354. if (has_compression_level_from ^ has_compression_level_to) {
  355. fprintf(stderr,
  356. "Specify both --compression_level_from and "
  357. "--compression_level_to.\n\n");
  358. exit(1);
  359. }
  360. if (use_from_as_prefix && has_from) {
  361. fprintf(stderr, "Cannot specify --prefix and --from\n\n");
  362. exit(1);
  363. }
  364. if (input_key_hex) {
  365. if (has_from || use_from_as_prefix) {
  366. from_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(from_key);
  367. }
  368. if (has_to) {
  369. to_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(to_key);
  370. }
  371. }
  372. if (dirs_or_files.empty()) {
  373. fprintf(stderr, "file or directory must be specified.\n\n");
  374. print_help(/*to_stderr*/ true);
  375. exit(1);
  376. }
  377. std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
  378. // If caller of SSTDumpTool::Run(...) does not specify a different env other
  379. // than Env::Default(), then try to load custom env based on env_uri/fs_uri.
  380. // Otherwise, the caller is responsible for creating custom env.
  381. {
  382. ConfigOptions config_options;
  383. config_options.env = options.env;
  384. Status s = Env::CreateFromUri(config_options, env_uri, fs_uri, &options.env,
  385. &env_guard);
  386. if (!s.ok()) {
  387. fprintf(stderr, "CreateEnvFromUri: %s\n", s.ToString().c_str());
  388. exit(1);
  389. } else {
  390. fprintf(stdout, "options.env is %p\n", options.env);
  391. }
  392. }
  393. std::vector<std::string> filenames;
  394. ROCKSDB_NAMESPACE::Env* env = options.env;
  395. ROCKSDB_NAMESPACE::Status st;
  396. for (size_t i = 0; i < dirs_or_files.size(); ++i) {
  397. auto dir_or_file = dirs_or_files[i].first;
  398. std::vector<std::string> children;
  399. st = env->GetChildren(dirs_or_files[i].first, &children);
  400. if (!st.ok() || children.empty()) {
  401. // dir_or_file does not exist or does not contain children
  402. // Check its existence first
  403. Status s = env->FileExists(dir_or_file);
  404. // dir_or_file does not exist
  405. if (!s.ok()) {
  406. fprintf(stderr, "%s%s: No such file or directory\n",
  407. s.ToString().c_str(), dir_or_file);
  408. return 1;
  409. }
  410. // dir_or_file exists and is treated as a "file"
  411. // since it has no children
  412. // This is ok since later it will be checked
  413. // that whether it is a valid sst or not
  414. // (A directory "file" is not a valid sst)
  415. filenames.emplace_back(dir_or_file);
  416. dirs_or_files[i].second = kFile;
  417. } else {
  418. for (auto& child : children) {
  419. filenames.push_back(std::string{dir_or_file} + "/" + child);
  420. }
  421. dirs_or_files[i].second = kDir;
  422. }
  423. }
  424. uint64_t total_read = 0;
  425. // List of RocksDB SST file without corruption
  426. std::vector<std::string> valid_sst_files;
  427. for (size_t i = 0; i < filenames.size(); i++) {
  428. std::string filename = filenames.at(i);
  429. if (filename.length() <= 4 ||
  430. filename.rfind(".sst") != filename.length() - 4) {
  431. // ignore
  432. continue;
  433. }
  434. if (command == "verify") {
  435. verify_checksum = true;
  436. }
  437. // Update options for when simulating writing a table file
  438. {
  439. BlockBasedTableOptions bbto;
  440. if (options.table_factory->IsInstanceOf(
  441. TableFactory::kBlockBasedTableName()) &&
  442. options.table_factory->GetOptions<BlockBasedTableOptions>()) {
  443. bbto = *options.table_factory->GetOptions<BlockBasedTableOptions>();
  444. }
  445. bbto.block_size = block_size;
  446. // Maximize compression features available
  447. bbto.format_version = kLatestFormatVersion;
  448. options.table_factory = std::make_shared<BlockBasedTableFactory>(bbto);
  449. }
  450. options.compression_opts.max_dict_bytes = compression_max_dict_bytes;
  451. options.compression_opts.zstd_max_train_bytes =
  452. compression_zstd_max_train_bytes;
  453. options.compression_opts.max_dict_buffer_bytes =
  454. compression_max_dict_buffer_bytes;
  455. options.compression_opts.use_zstd_dict_trainer =
  456. !compression_use_zstd_finalize_dict;
  457. options.compression_opts.parallel_threads = compression_parallel_threads;
  458. ROCKSDB_NAMESPACE::SstFileDumper dumper(
  459. options, filename, Temperature::kUnknown, readahead_size,
  460. verify_checksum, output_hex, decode_blob_index);
  461. // Not a valid SST
  462. if (!dumper.getStatus().ok()) {
  463. fprintf(stderr, "%s: %s\n", filename.c_str(),
  464. dumper.getStatus().ToString().c_str());
  465. continue;
  466. } else {
  467. valid_sst_files.push_back(filename);
  468. // Print out from and to key information once
  469. // where there is at least one valid SST
  470. if (valid_sst_files.size() == 1) {
  471. // from_key and to_key are only used for "check", "scan", or ""
  472. if (command == "check" || command == "scan" || command == "") {
  473. fprintf(stdout, "from [%s] to [%s]\n",
  474. ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
  475. ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
  476. }
  477. }
  478. }
  479. if (command == "recompress") {
  480. if (compression_types.empty()) {
  481. if (options.compression_manager != nullptr) {
  482. for (int c = 0; c < kDisableCompressionOption; ++c) {
  483. if (options.compression_manager->SupportsCompressionType(
  484. static_cast<CompressionType>(c))) {
  485. compression_types.emplace_back(static_cast<CompressionType>(c));
  486. }
  487. }
  488. } else {
  489. compression_types = GetSupportedCompressions();
  490. }
  491. }
  492. st = dumper.ShowAllCompressionSizes(
  493. compression_types, compress_level_from, compress_level_to);
  494. if (!st.ok()) {
  495. fprintf(stderr, "Failed to recompress: %s\n", st.ToString().c_str());
  496. exit(1);
  497. }
  498. continue;
  499. }
  500. if (command == "raw") {
  501. std::string out_filename = filename.substr(0, filename.length() - 4);
  502. out_filename.append("_dump.txt");
  503. st = dumper.DumpTable(out_filename);
  504. if (!st.ok()) {
  505. fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
  506. exit(1);
  507. } else {
  508. fprintf(stdout, "raw dump written to file %s\n", out_filename.data());
  509. }
  510. continue;
  511. }
  512. // scan all files in give file path.
  513. if (command == "" || command == "scan" || command == "check") {
  514. st = dumper.ReadSequential(
  515. command == "scan", read_num > 0 ? (read_num - total_read) : read_num,
  516. has_from || use_from_as_prefix, from_key, has_to, to_key,
  517. use_from_as_prefix);
  518. if (!st.ok()) {
  519. fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
  520. }
  521. total_read += dumper.GetReadNumber();
  522. if (read_num > 0 && total_read > read_num) {
  523. break;
  524. }
  525. }
  526. if (command == "verify") {
  527. st = dumper.VerifyChecksum();
  528. if (!st.ok()) {
  529. fprintf(stderr, "%s is corrupted: %s\n", filename.c_str(),
  530. st.ToString().c_str());
  531. } else {
  532. fprintf(stdout, "The file is ok\n");
  533. }
  534. continue;
  535. }
  536. if (show_properties || show_summary) {
  537. const ROCKSDB_NAMESPACE::TableProperties* table_properties;
  538. std::shared_ptr<const ROCKSDB_NAMESPACE::TableProperties>
  539. table_properties_from_reader;
  540. st = dumper.ReadTableProperties(&table_properties_from_reader);
  541. if (!st.ok()) {
  542. fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
  543. fprintf(stderr, "Try to use initial table properties\n");
  544. table_properties = dumper.GetInitTableProperties();
  545. } else {
  546. table_properties = table_properties_from_reader.get();
  547. }
  548. if (table_properties != nullptr) {
  549. if (show_properties) {
  550. fprintf(stdout,
  551. "Table Properties:\n"
  552. "------------------------------\n"
  553. " %s",
  554. table_properties->ToString("\n ", ": ").c_str());
  555. }
  556. total_num_files += 1;
  557. total_num_data_blocks += table_properties->num_data_blocks;
  558. total_data_block_size += table_properties->data_size;
  559. total_index_block_size += table_properties->index_size;
  560. total_filter_block_size += table_properties->filter_size;
  561. if (show_properties) {
  562. fprintf(stdout,
  563. "Raw user collected properties\n"
  564. "------------------------------\n");
  565. for (const auto& kv : table_properties->user_collected_properties) {
  566. std::string prop_name = kv.first;
  567. std::string prop_val = Slice(kv.second).ToString(true);
  568. fprintf(stdout, " # %s: 0x%s\n", prop_name.c_str(),
  569. prop_val.c_str());
  570. }
  571. }
  572. } else {
  573. fprintf(stderr, "Reader unexpectedly returned null properties\n");
  574. }
  575. }
  576. BlockContents& meta_index_contents = dumper.GetMetaIndexContents();
  577. if (list_meta_blocks && meta_index_contents.data.size() > 0) {
  578. Block meta_index_block(std::move(meta_index_contents));
  579. std::unique_ptr<MetaBlockIter> meta_index_iter;
  580. meta_index_iter.reset(meta_index_block.NewMetaIterator());
  581. meta_index_iter->SeekToFirst();
  582. fprintf(stdout,
  583. "Meta Blocks:\n"
  584. "------------------------------\n");
  585. while (meta_index_iter->status().ok() && meta_index_iter->Valid()) {
  586. Slice v = meta_index_iter->value();
  587. BlockHandle handle;
  588. st = handle.DecodeFrom(&v);
  589. if (!st.ok()) {
  590. fprintf(stderr, "%s: Could not decode block handle - %s\n",
  591. filename.c_str(), st.ToString().c_str());
  592. } else {
  593. fprintf(stdout, " %s: %" PRIu64 " %" PRIu64 "\n",
  594. meta_index_iter->key().ToString().c_str(), handle.offset(),
  595. handle.size());
  596. }
  597. meta_index_iter->Next();
  598. }
  599. } else if (list_meta_blocks) {
  600. fprintf(stderr, "Could not read the meta index block\n");
  601. }
  602. }
  603. if (show_summary) {
  604. fprintf(stdout, "total number of files: %" PRIu64 "\n", total_num_files);
  605. fprintf(stdout, "total number of data blocks: %" PRIu64 "\n",
  606. total_num_data_blocks);
  607. fprintf(stdout, "total data block size: %" PRIu64 "\n",
  608. total_data_block_size);
  609. fprintf(stdout, "total index block size: %" PRIu64 "\n",
  610. total_index_block_size);
  611. fprintf(stdout, "total filter block size: %" PRIu64 "\n",
  612. total_filter_block_size);
  613. }
  614. if (valid_sst_files.empty()) {
  615. // No valid SST files are found
  616. // Exit with an error state
  617. for (auto& e : dirs_or_files) {
  618. if (e.second == kDir) {
  619. fprintf(stdout, "------------------------------\n");
  620. fprintf(stderr, "No valid SST files found in %s\n", e.first);
  621. } else {
  622. assert(e.second == kFile);
  623. fprintf(stderr, "%s is not a valid SST file\n", e.first);
  624. }
  625. }
  626. return 1;
  627. } else {
  628. assert(!dirs_or_files.empty());
  629. if (command == "identify") {
  630. if (dirs_or_files.size() > 1 || dirs_or_files[0].second == kDir) {
  631. fprintf(stdout, "------------------------------\n");
  632. std::string single_dir_msg;
  633. if (dirs_or_files.size() == 1) {
  634. single_dir_msg += " found in ";
  635. single_dir_msg += dirs_or_files[0].first;
  636. }
  637. fprintf(stdout, "List of valid SST files%s:\n", single_dir_msg.c_str());
  638. for (const auto& f : valid_sst_files) {
  639. fprintf(stdout, "%s\n", f.c_str());
  640. }
  641. fprintf(stdout, "Number of valid SST files: %zu\n",
  642. valid_sst_files.size());
  643. } else {
  644. fprintf(stdout, "%s is a valid SST file\n", dirs_or_files[0].first);
  645. }
  646. }
  647. // At least one valid SST
  648. // exit with a success state
  649. return 0;
  650. }
  651. }
  652. } // namespace ROCKSDB_NAMESPACE