sst_dump_tool.cc 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #ifndef ROCKSDB_LITE
  7. #include "tools/sst_dump_tool_imp.h"
  8. #include <cinttypes>
  9. #include <iostream>
  10. #include <map>
  11. #include <memory>
  12. #include <sstream>
  13. #include <vector>
  14. #include "db/blob_index.h"
  15. #include "db/memtable.h"
  16. #include "db/write_batch_internal.h"
  17. #include "env/composite_env_wrapper.h"
  18. #include "options/cf_options.h"
  19. #include "rocksdb/db.h"
  20. #include "rocksdb/env.h"
  21. #include "rocksdb/iterator.h"
  22. #include "rocksdb/slice_transform.h"
  23. #include "rocksdb/status.h"
  24. #include "rocksdb/table_properties.h"
  25. #include "rocksdb/utilities/ldb_cmd.h"
  26. #include "table/block_based/block.h"
  27. #include "table/block_based/block_based_table_builder.h"
  28. #include "table/block_based/block_based_table_factory.h"
  29. #include "table/block_based/block_builder.h"
  30. #include "table/format.h"
  31. #include "table/meta_blocks.h"
  32. #include "table/plain/plain_table_factory.h"
  33. #include "table/table_reader.h"
  34. #include "util/compression.h"
  35. #include "util/random.h"
  36. #include "port/port.h"
  37. namespace ROCKSDB_NAMESPACE {
  38. SstFileDumper::SstFileDumper(const Options& options,
  39. const std::string& file_path, bool verify_checksum,
  40. bool output_hex, bool decode_blob_index)
  41. : file_name_(file_path),
  42. read_num_(0),
  43. verify_checksum_(verify_checksum),
  44. output_hex_(output_hex),
  45. decode_blob_index_(decode_blob_index),
  46. options_(options),
  47. ioptions_(options_),
  48. moptions_(ColumnFamilyOptions(options_)),
  49. internal_comparator_(BytewiseComparator()) {
  50. fprintf(stdout, "Process %s\n", file_path.c_str());
  51. init_result_ = GetTableReader(file_name_);
  52. }
  53. extern const uint64_t kBlockBasedTableMagicNumber;
  54. extern const uint64_t kLegacyBlockBasedTableMagicNumber;
  55. extern const uint64_t kPlainTableMagicNumber;
  56. extern const uint64_t kLegacyPlainTableMagicNumber;
  57. const char* testFileName = "test_file_name";
  58. static const std::vector<std::pair<CompressionType, const char*>>
  59. kCompressions = {
  60. {CompressionType::kNoCompression, "kNoCompression"},
  61. {CompressionType::kSnappyCompression, "kSnappyCompression"},
  62. {CompressionType::kZlibCompression, "kZlibCompression"},
  63. {CompressionType::kBZip2Compression, "kBZip2Compression"},
  64. {CompressionType::kLZ4Compression, "kLZ4Compression"},
  65. {CompressionType::kLZ4HCCompression, "kLZ4HCCompression"},
  66. {CompressionType::kXpressCompression, "kXpressCompression"},
  67. {CompressionType::kZSTD, "kZSTD"}};
  68. Status SstFileDumper::GetTableReader(const std::string& file_path) {
  69. // Warning about 'magic_number' being uninitialized shows up only in UBsan
  70. // builds. Though access is guarded by 's.ok()' checks, fix the issue to
  71. // avoid any warnings.
  72. uint64_t magic_number = Footer::kInvalidTableMagicNumber;
  73. // read table magic number
  74. Footer footer;
  75. std::unique_ptr<RandomAccessFile> file;
  76. uint64_t file_size = 0;
  77. Status s = options_.env->NewRandomAccessFile(file_path, &file, soptions_);
  78. if (s.ok()) {
  79. s = options_.env->GetFileSize(file_path, &file_size);
  80. }
  81. file_.reset(new RandomAccessFileReader(NewLegacyRandomAccessFileWrapper(file),
  82. file_path));
  83. if (s.ok()) {
  84. s = ReadFooterFromFile(file_.get(), nullptr /* prefetch_buffer */,
  85. file_size, &footer);
  86. }
  87. if (s.ok()) {
  88. magic_number = footer.table_magic_number();
  89. }
  90. if (s.ok()) {
  91. if (magic_number == kPlainTableMagicNumber ||
  92. magic_number == kLegacyPlainTableMagicNumber) {
  93. soptions_.use_mmap_reads = true;
  94. options_.env->NewRandomAccessFile(file_path, &file, soptions_);
  95. file_.reset(new RandomAccessFileReader(
  96. NewLegacyRandomAccessFileWrapper(file), file_path));
  97. }
  98. options_.comparator = &internal_comparator_;
  99. // For old sst format, ReadTableProperties might fail but file can be read
  100. if (ReadTableProperties(magic_number, file_.get(), file_size).ok()) {
  101. SetTableOptionsByMagicNumber(magic_number);
  102. } else {
  103. SetOldTableOptions();
  104. }
  105. }
  106. if (s.ok()) {
  107. s = NewTableReader(ioptions_, soptions_, internal_comparator_, file_size,
  108. &table_reader_);
  109. }
  110. return s;
  111. }
  112. Status SstFileDumper::NewTableReader(
  113. const ImmutableCFOptions& /*ioptions*/, const EnvOptions& /*soptions*/,
  114. const InternalKeyComparator& /*internal_comparator*/, uint64_t file_size,
  115. std::unique_ptr<TableReader>* /*table_reader*/) {
  116. // We need to turn off pre-fetching of index and filter nodes for
  117. // BlockBasedTable
  118. if (BlockBasedTableFactory::kName == options_.table_factory->Name()) {
  119. return options_.table_factory->NewTableReader(
  120. TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(),
  121. soptions_, internal_comparator_),
  122. std::move(file_), file_size, &table_reader_, /*enable_prefetch=*/false);
  123. }
  124. // For all other factory implementation
  125. return options_.table_factory->NewTableReader(
  126. TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(), soptions_,
  127. internal_comparator_),
  128. std::move(file_), file_size, &table_reader_);
  129. }
  130. Status SstFileDumper::VerifyChecksum() {
  131. // We could pass specific readahead setting into read options if needed.
  132. return table_reader_->VerifyChecksum(ReadOptions(),
  133. TableReaderCaller::kSSTDumpTool);
  134. }
  135. Status SstFileDumper::DumpTable(const std::string& out_filename) {
  136. std::unique_ptr<WritableFile> out_file;
  137. Env* env = options_.env;
  138. env->NewWritableFile(out_filename, &out_file, soptions_);
  139. Status s = table_reader_->DumpTable(out_file.get());
  140. out_file->Close();
  141. return s;
  142. }
  143. uint64_t SstFileDumper::CalculateCompressedTableSize(
  144. const TableBuilderOptions& tb_options, size_t block_size,
  145. uint64_t* num_data_blocks) {
  146. std::unique_ptr<WritableFile> out_file;
  147. std::unique_ptr<Env> env(NewMemEnv(options_.env));
  148. env->NewWritableFile(testFileName, &out_file, soptions_);
  149. std::unique_ptr<WritableFileWriter> dest_writer;
  150. dest_writer.reset(
  151. new WritableFileWriter(NewLegacyWritableFileWrapper(std::move(out_file)),
  152. testFileName, soptions_));
  153. BlockBasedTableOptions table_options;
  154. table_options.block_size = block_size;
  155. BlockBasedTableFactory block_based_tf(table_options);
  156. std::unique_ptr<TableBuilder> table_builder;
  157. table_builder.reset(block_based_tf.NewTableBuilder(
  158. tb_options,
  159. TablePropertiesCollectorFactory::Context::kUnknownColumnFamily,
  160. dest_writer.get()));
  161. std::unique_ptr<InternalIterator> iter(table_reader_->NewIterator(
  162. ReadOptions(), moptions_.prefix_extractor.get(), /*arena=*/nullptr,
  163. /*skip_filters=*/false, TableReaderCaller::kSSTDumpTool));
  164. for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
  165. if (!iter->status().ok()) {
  166. fputs(iter->status().ToString().c_str(), stderr);
  167. exit(1);
  168. }
  169. table_builder->Add(iter->key(), iter->value());
  170. }
  171. Status s = table_builder->Finish();
  172. if (!s.ok()) {
  173. fputs(s.ToString().c_str(), stderr);
  174. exit(1);
  175. }
  176. uint64_t size = table_builder->FileSize();
  177. assert(num_data_blocks != nullptr);
  178. *num_data_blocks = table_builder->GetTableProperties().num_data_blocks;
  179. env->DeleteFile(testFileName);
  180. return size;
  181. }
  182. int SstFileDumper::ShowAllCompressionSizes(
  183. size_t block_size,
  184. const std::vector<std::pair<CompressionType, const char*>>&
  185. compression_types) {
  186. ReadOptions read_options;
  187. Options opts;
  188. opts.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
  189. opts.statistics->set_stats_level(StatsLevel::kAll);
  190. const ImmutableCFOptions imoptions(opts);
  191. const ColumnFamilyOptions cfo(opts);
  192. const MutableCFOptions moptions(cfo);
  193. ROCKSDB_NAMESPACE::InternalKeyComparator ikc(opts.comparator);
  194. std::vector<std::unique_ptr<IntTblPropCollectorFactory> >
  195. block_based_table_factories;
  196. fprintf(stdout, "Block Size: %" ROCKSDB_PRIszt "\n", block_size);
  197. for (auto& i : compression_types) {
  198. if (CompressionTypeSupported(i.first)) {
  199. CompressionOptions compress_opt;
  200. std::string column_family_name;
  201. int unknown_level = -1;
  202. TableBuilderOptions tb_opts(
  203. imoptions, moptions, ikc, &block_based_table_factories, i.first,
  204. 0 /* sample_for_compression */, compress_opt,
  205. false /* skip_filters */, column_family_name, unknown_level);
  206. uint64_t num_data_blocks = 0;
  207. uint64_t file_size =
  208. CalculateCompressedTableSize(tb_opts, block_size, &num_data_blocks);
  209. fprintf(stdout, "Compression: %-24s", i.second);
  210. fprintf(stdout, " Size: %10" PRIu64, file_size);
  211. fprintf(stdout, " Blocks: %6" PRIu64, num_data_blocks);
  212. const uint64_t compressed_blocks =
  213. opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_COMPRESSED);
  214. const uint64_t not_compressed_blocks =
  215. opts.statistics->getAndResetTickerCount(NUMBER_BLOCK_NOT_COMPRESSED);
  216. // When the option enable_index_compression is true,
  217. // NUMBER_BLOCK_COMPRESSED is incremented for index block(s).
  218. if ((compressed_blocks + not_compressed_blocks) > num_data_blocks) {
  219. num_data_blocks = compressed_blocks + not_compressed_blocks;
  220. }
  221. const uint64_t ratio_not_compressed_blocks =
  222. (num_data_blocks - compressed_blocks) - not_compressed_blocks;
  223. const double compressed_pcnt =
  224. (0 == num_data_blocks) ? 0.0
  225. : ((static_cast<double>(compressed_blocks) /
  226. static_cast<double>(num_data_blocks)) *
  227. 100.0);
  228. const double ratio_not_compressed_pcnt =
  229. (0 == num_data_blocks)
  230. ? 0.0
  231. : ((static_cast<double>(ratio_not_compressed_blocks) /
  232. static_cast<double>(num_data_blocks)) *
  233. 100.0);
  234. const double not_compressed_pcnt =
  235. (0 == num_data_blocks)
  236. ? 0.0
  237. : ((static_cast<double>(not_compressed_blocks) /
  238. static_cast<double>(num_data_blocks)) *
  239. 100.0);
  240. fprintf(stdout, " Compressed: %6" PRIu64 " (%5.1f%%)", compressed_blocks,
  241. compressed_pcnt);
  242. fprintf(stdout, " Not compressed (ratio): %6" PRIu64 " (%5.1f%%)",
  243. ratio_not_compressed_blocks, ratio_not_compressed_pcnt);
  244. fprintf(stdout, " Not compressed (abort): %6" PRIu64 " (%5.1f%%)\n",
  245. not_compressed_blocks, not_compressed_pcnt);
  246. } else {
  247. fprintf(stdout, "Unsupported compression type: %s.\n", i.second);
  248. }
  249. }
  250. return 0;
  251. }
  252. Status SstFileDumper::ReadTableProperties(uint64_t table_magic_number,
  253. RandomAccessFileReader* file,
  254. uint64_t file_size) {
  255. TableProperties* table_properties = nullptr;
  256. Status s = ROCKSDB_NAMESPACE::ReadTableProperties(
  257. file, file_size, table_magic_number, ioptions_, &table_properties);
  258. if (s.ok()) {
  259. table_properties_.reset(table_properties);
  260. } else {
  261. fprintf(stdout, "Not able to read table properties\n");
  262. }
  263. return s;
  264. }
  265. Status SstFileDumper::SetTableOptionsByMagicNumber(
  266. uint64_t table_magic_number) {
  267. assert(table_properties_);
  268. if (table_magic_number == kBlockBasedTableMagicNumber ||
  269. table_magic_number == kLegacyBlockBasedTableMagicNumber) {
  270. options_.table_factory = std::make_shared<BlockBasedTableFactory>();
  271. fprintf(stdout, "Sst file format: block-based\n");
  272. auto& props = table_properties_->user_collected_properties;
  273. auto pos = props.find(BlockBasedTablePropertyNames::kIndexType);
  274. if (pos != props.end()) {
  275. auto index_type_on_file = static_cast<BlockBasedTableOptions::IndexType>(
  276. DecodeFixed32(pos->second.c_str()));
  277. if (index_type_on_file ==
  278. BlockBasedTableOptions::IndexType::kHashSearch) {
  279. options_.prefix_extractor.reset(NewNoopTransform());
  280. }
  281. }
  282. } else if (table_magic_number == kPlainTableMagicNumber ||
  283. table_magic_number == kLegacyPlainTableMagicNumber) {
  284. options_.allow_mmap_reads = true;
  285. PlainTableOptions plain_table_options;
  286. plain_table_options.user_key_len = kPlainTableVariableLength;
  287. plain_table_options.bloom_bits_per_key = 0;
  288. plain_table_options.hash_table_ratio = 0;
  289. plain_table_options.index_sparseness = 1;
  290. plain_table_options.huge_page_tlb_size = 0;
  291. plain_table_options.encoding_type = kPlain;
  292. plain_table_options.full_scan_mode = true;
  293. options_.table_factory.reset(NewPlainTableFactory(plain_table_options));
  294. fprintf(stdout, "Sst file format: plain table\n");
  295. } else {
  296. char error_msg_buffer[80];
  297. snprintf(error_msg_buffer, sizeof(error_msg_buffer) - 1,
  298. "Unsupported table magic number --- %lx",
  299. (long)table_magic_number);
  300. return Status::InvalidArgument(error_msg_buffer);
  301. }
  302. return Status::OK();
  303. }
  304. Status SstFileDumper::SetOldTableOptions() {
  305. assert(table_properties_ == nullptr);
  306. options_.table_factory = std::make_shared<BlockBasedTableFactory>();
  307. fprintf(stdout, "Sst file format: block-based(old version)\n");
  308. return Status::OK();
  309. }
  310. Status SstFileDumper::ReadSequential(bool print_kv, uint64_t read_num,
  311. bool has_from, const std::string& from_key,
  312. bool has_to, const std::string& to_key,
  313. bool use_from_as_prefix) {
  314. if (!table_reader_) {
  315. return init_result_;
  316. }
  317. InternalIterator* iter = table_reader_->NewIterator(
  318. ReadOptions(verify_checksum_, false), moptions_.prefix_extractor.get(),
  319. /*arena=*/nullptr, /*skip_filters=*/false,
  320. TableReaderCaller::kSSTDumpTool);
  321. uint64_t i = 0;
  322. if (has_from) {
  323. InternalKey ikey;
  324. ikey.SetMinPossibleForUserKey(from_key);
  325. iter->Seek(ikey.Encode());
  326. } else {
  327. iter->SeekToFirst();
  328. }
  329. for (; iter->Valid(); iter->Next()) {
  330. Slice key = iter->key();
  331. Slice value = iter->value();
  332. ++i;
  333. if (read_num > 0 && i > read_num)
  334. break;
  335. ParsedInternalKey ikey;
  336. if (!ParseInternalKey(key, &ikey)) {
  337. std::cerr << "Internal Key ["
  338. << key.ToString(true /* in hex*/)
  339. << "] parse error!\n";
  340. continue;
  341. }
  342. // the key returned is not prefixed with out 'from' key
  343. if (use_from_as_prefix && !ikey.user_key.starts_with(from_key)) {
  344. break;
  345. }
  346. // If end marker was specified, we stop before it
  347. if (has_to && BytewiseComparator()->Compare(ikey.user_key, to_key) >= 0) {
  348. break;
  349. }
  350. if (print_kv) {
  351. if (!decode_blob_index_ || ikey.type != kTypeBlobIndex) {
  352. fprintf(stdout, "%s => %s\n", ikey.DebugString(output_hex_).c_str(),
  353. value.ToString(output_hex_).c_str());
  354. } else {
  355. BlobIndex blob_index;
  356. const Status s = blob_index.DecodeFrom(value);
  357. if (!s.ok()) {
  358. fprintf(stderr, "%s => error decoding blob index\n",
  359. ikey.DebugString(output_hex_).c_str());
  360. continue;
  361. }
  362. fprintf(stdout, "%s => %s\n", ikey.DebugString(output_hex_).c_str(),
  363. blob_index.DebugString(output_hex_).c_str());
  364. }
  365. }
  366. }
  367. read_num_ += i;
  368. Status ret = iter->status();
  369. delete iter;
  370. return ret;
  371. }
  372. Status SstFileDumper::ReadTableProperties(
  373. std::shared_ptr<const TableProperties>* table_properties) {
  374. if (!table_reader_) {
  375. return init_result_;
  376. }
  377. *table_properties = table_reader_->GetTableProperties();
  378. return init_result_;
  379. }
  380. namespace {
  381. void print_help() {
  382. fprintf(
  383. stderr,
  384. R"(sst_dump --file=<data_dir_OR_sst_file> [--command=check|scan|raw|recompress]
  385. --file=<data_dir_OR_sst_file>
  386. Path to SST file or directory containing SST files
  387. --env_uri=<uri of underlying Env>
  388. URI of underlying Env
  389. --command=check|scan|raw|verify
  390. check: Iterate over entries in files but don't print anything except if an error is encountered (default command)
  391. scan: Iterate over entries in files and print them to screen
  392. raw: Dump all the table contents to <file_name>_dump.txt
  393. verify: Iterate all the blocks in files verifying checksum to detect possible corruption but don't print anything except if a corruption is encountered
  394. recompress: reports the SST file size if recompressed with different
  395. compression types
  396. --output_hex
  397. Can be combined with scan command to print the keys and values in Hex
  398. --decode_blob_index
  399. Decode blob indexes and print them in a human-readable format during scans.
  400. --from=<user_key>
  401. Key to start reading from when executing check|scan
  402. --to=<user_key>
  403. Key to stop reading at when executing check|scan
  404. --prefix=<user_key>
  405. Returns all keys with this prefix when executing check|scan
  406. Cannot be used in conjunction with --from
  407. --read_num=<num>
  408. Maximum number of entries to read when executing check|scan
  409. --verify_checksum
  410. Verify file checksum when executing check|scan
  411. --input_key_hex
  412. Can be combined with --from and --to to indicate that these values are encoded in Hex
  413. --show_properties
  414. Print table properties after iterating over the file when executing
  415. check|scan|raw
  416. --set_block_size=<block_size>
  417. Can be combined with --command=recompress to set the block size that will
  418. be used when trying different compression algorithms
  419. --compression_types=<comma-separated list of CompressionType members, e.g.,
  420. kSnappyCompression>
  421. Can be combined with --command=recompress to run recompression for this
  422. list of compression types
  423. --parse_internal_key=<0xKEY>
  424. Convenience option to parse an internal key on the command line. Dumps the
  425. internal key in hex format {'key' @ SN: type}
  426. )");
  427. }
  428. } // namespace
  429. int SSTDumpTool::Run(int argc, char** argv, Options options) {
  430. const char* env_uri = nullptr;
  431. const char* dir_or_file = nullptr;
  432. uint64_t read_num = std::numeric_limits<uint64_t>::max();
  433. std::string command;
  434. char junk;
  435. uint64_t n;
  436. bool verify_checksum = false;
  437. bool output_hex = false;
  438. bool decode_blob_index = false;
  439. bool input_key_hex = false;
  440. bool has_from = false;
  441. bool has_to = false;
  442. bool use_from_as_prefix = false;
  443. bool show_properties = false;
  444. bool show_summary = false;
  445. bool set_block_size = false;
  446. std::string from_key;
  447. std::string to_key;
  448. std::string block_size_str;
  449. size_t block_size = 0;
  450. std::vector<std::pair<CompressionType, const char*>> compression_types;
  451. uint64_t total_num_files = 0;
  452. uint64_t total_num_data_blocks = 0;
  453. uint64_t total_data_block_size = 0;
  454. uint64_t total_index_block_size = 0;
  455. uint64_t total_filter_block_size = 0;
  456. for (int i = 1; i < argc; i++) {
  457. if (strncmp(argv[i], "--env_uri=", 10) == 0) {
  458. env_uri = argv[i] + 10;
  459. } else if (strncmp(argv[i], "--file=", 7) == 0) {
  460. dir_or_file = argv[i] + 7;
  461. } else if (strcmp(argv[i], "--output_hex") == 0) {
  462. output_hex = true;
  463. } else if (strcmp(argv[i], "--decode_blob_index") == 0) {
  464. decode_blob_index = true;
  465. } else if (strcmp(argv[i], "--input_key_hex") == 0) {
  466. input_key_hex = true;
  467. } else if (sscanf(argv[i], "--read_num=%lu%c", (unsigned long*)&n, &junk) ==
  468. 1) {
  469. read_num = n;
  470. } else if (strcmp(argv[i], "--verify_checksum") == 0) {
  471. verify_checksum = true;
  472. } else if (strncmp(argv[i], "--command=", 10) == 0) {
  473. command = argv[i] + 10;
  474. } else if (strncmp(argv[i], "--from=", 7) == 0) {
  475. from_key = argv[i] + 7;
  476. has_from = true;
  477. } else if (strncmp(argv[i], "--to=", 5) == 0) {
  478. to_key = argv[i] + 5;
  479. has_to = true;
  480. } else if (strncmp(argv[i], "--prefix=", 9) == 0) {
  481. from_key = argv[i] + 9;
  482. use_from_as_prefix = true;
  483. } else if (strcmp(argv[i], "--show_properties") == 0) {
  484. show_properties = true;
  485. } else if (strcmp(argv[i], "--show_summary") == 0) {
  486. show_summary = true;
  487. } else if (strncmp(argv[i], "--set_block_size=", 17) == 0) {
  488. set_block_size = true;
  489. block_size_str = argv[i] + 17;
  490. std::istringstream iss(block_size_str);
  491. iss >> block_size;
  492. if (iss.fail()) {
  493. fprintf(stderr, "block size must be numeric\n");
  494. exit(1);
  495. }
  496. } else if (strncmp(argv[i], "--compression_types=", 20) == 0) {
  497. std::string compression_types_csv = argv[i] + 20;
  498. std::istringstream iss(compression_types_csv);
  499. std::string compression_type;
  500. while (std::getline(iss, compression_type, ',')) {
  501. auto iter = std::find_if(
  502. kCompressions.begin(), kCompressions.end(),
  503. [&compression_type](std::pair<CompressionType, const char*> curr) {
  504. return curr.second == compression_type;
  505. });
  506. if (iter == kCompressions.end()) {
  507. fprintf(stderr, "%s is not a valid CompressionType\n",
  508. compression_type.c_str());
  509. exit(1);
  510. }
  511. compression_types.emplace_back(*iter);
  512. }
  513. } else if (strncmp(argv[i], "--parse_internal_key=", 21) == 0) {
  514. std::string in_key(argv[i] + 21);
  515. try {
  516. in_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(in_key);
  517. } catch (...) {
  518. std::cerr << "ERROR: Invalid key input '"
  519. << in_key
  520. << "' Use 0x{hex representation of internal rocksdb key}" << std::endl;
  521. return -1;
  522. }
  523. Slice sl_key = ROCKSDB_NAMESPACE::Slice(in_key);
  524. ParsedInternalKey ikey;
  525. int retc = 0;
  526. if (!ParseInternalKey(sl_key, &ikey)) {
  527. std::cerr << "Internal Key [" << sl_key.ToString(true /* in hex*/)
  528. << "] parse error!\n";
  529. retc = -1;
  530. }
  531. fprintf(stdout, "key=%s\n", ikey.DebugString(true).c_str());
  532. return retc;
  533. } else {
  534. fprintf(stderr, "Unrecognized argument '%s'\n\n", argv[i]);
  535. print_help();
  536. exit(1);
  537. }
  538. }
  539. if (use_from_as_prefix && has_from) {
  540. fprintf(stderr, "Cannot specify --prefix and --from\n\n");
  541. exit(1);
  542. }
  543. if (input_key_hex) {
  544. if (has_from || use_from_as_prefix) {
  545. from_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(from_key);
  546. }
  547. if (has_to) {
  548. to_key = ROCKSDB_NAMESPACE::LDBCommand::HexToString(to_key);
  549. }
  550. }
  551. if (dir_or_file == nullptr) {
  552. fprintf(stderr, "file or directory must be specified.\n\n");
  553. print_help();
  554. exit(1);
  555. }
  556. std::shared_ptr<ROCKSDB_NAMESPACE::Env> env_guard;
  557. // If caller of SSTDumpTool::Run(...) does not specify a different env other
  558. // than Env::Default(), then try to load custom env based on dir_or_file.
  559. // Otherwise, the caller is responsible for creating custom env.
  560. if (!options.env || options.env == ROCKSDB_NAMESPACE::Env::Default()) {
  561. Env* env = Env::Default();
  562. Status s = Env::LoadEnv(env_uri ? env_uri : "", &env, &env_guard);
  563. if (!s.ok() && !s.IsNotFound()) {
  564. fprintf(stderr, "LoadEnv: %s\n", s.ToString().c_str());
  565. exit(1);
  566. }
  567. options.env = env;
  568. } else {
  569. fprintf(stdout, "options.env is %p\n", options.env);
  570. }
  571. std::vector<std::string> filenames;
  572. ROCKSDB_NAMESPACE::Env* env = options.env;
  573. ROCKSDB_NAMESPACE::Status st = env->GetChildren(dir_or_file, &filenames);
  574. bool dir = true;
  575. if (!st.ok()) {
  576. filenames.clear();
  577. filenames.push_back(dir_or_file);
  578. dir = false;
  579. }
  580. fprintf(stdout, "from [%s] to [%s]\n",
  581. ROCKSDB_NAMESPACE::Slice(from_key).ToString(true).c_str(),
  582. ROCKSDB_NAMESPACE::Slice(to_key).ToString(true).c_str());
  583. uint64_t total_read = 0;
  584. for (size_t i = 0; i < filenames.size(); i++) {
  585. std::string filename = filenames.at(i);
  586. if (filename.length() <= 4 ||
  587. filename.rfind(".sst") != filename.length() - 4) {
  588. // ignore
  589. continue;
  590. }
  591. if (dir) {
  592. filename = std::string(dir_or_file) + "/" + filename;
  593. }
  594. ROCKSDB_NAMESPACE::SstFileDumper dumper(options, filename, verify_checksum,
  595. output_hex, decode_blob_index);
  596. if (!dumper.getStatus().ok()) {
  597. fprintf(stderr, "%s: %s\n", filename.c_str(),
  598. dumper.getStatus().ToString().c_str());
  599. continue;
  600. }
  601. if (command == "recompress") {
  602. dumper.ShowAllCompressionSizes(
  603. set_block_size ? block_size : 16384,
  604. compression_types.empty() ? kCompressions : compression_types);
  605. return 0;
  606. }
  607. if (command == "raw") {
  608. std::string out_filename = filename.substr(0, filename.length() - 4);
  609. out_filename.append("_dump.txt");
  610. st = dumper.DumpTable(out_filename);
  611. if (!st.ok()) {
  612. fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
  613. exit(1);
  614. } else {
  615. fprintf(stdout, "raw dump written to file %s\n", &out_filename[0]);
  616. }
  617. continue;
  618. }
  619. // scan all files in give file path.
  620. if (command == "" || command == "scan" || command == "check") {
  621. st = dumper.ReadSequential(
  622. command == "scan", read_num > 0 ? (read_num - total_read) : read_num,
  623. has_from || use_from_as_prefix, from_key, has_to, to_key,
  624. use_from_as_prefix);
  625. if (!st.ok()) {
  626. fprintf(stderr, "%s: %s\n", filename.c_str(),
  627. st.ToString().c_str());
  628. }
  629. total_read += dumper.GetReadNumber();
  630. if (read_num > 0 && total_read > read_num) {
  631. break;
  632. }
  633. }
  634. if (command == "verify") {
  635. st = dumper.VerifyChecksum();
  636. if (!st.ok()) {
  637. fprintf(stderr, "%s is corrupted: %s\n", filename.c_str(),
  638. st.ToString().c_str());
  639. } else {
  640. fprintf(stdout, "The file is ok\n");
  641. }
  642. continue;
  643. }
  644. if (show_properties || show_summary) {
  645. const ROCKSDB_NAMESPACE::TableProperties* table_properties;
  646. std::shared_ptr<const ROCKSDB_NAMESPACE::TableProperties>
  647. table_properties_from_reader;
  648. st = dumper.ReadTableProperties(&table_properties_from_reader);
  649. if (!st.ok()) {
  650. fprintf(stderr, "%s: %s\n", filename.c_str(), st.ToString().c_str());
  651. fprintf(stderr, "Try to use initial table properties\n");
  652. table_properties = dumper.GetInitTableProperties();
  653. } else {
  654. table_properties = table_properties_from_reader.get();
  655. }
  656. if (table_properties != nullptr) {
  657. if (show_properties) {
  658. fprintf(stdout,
  659. "Table Properties:\n"
  660. "------------------------------\n"
  661. " %s",
  662. table_properties->ToString("\n ", ": ").c_str());
  663. }
  664. total_num_files += 1;
  665. total_num_data_blocks += table_properties->num_data_blocks;
  666. total_data_block_size += table_properties->data_size;
  667. total_index_block_size += table_properties->index_size;
  668. total_filter_block_size += table_properties->filter_size;
  669. if (show_properties) {
  670. fprintf(stdout,
  671. "Raw user collected properties\n"
  672. "------------------------------\n");
  673. for (const auto& kv : table_properties->user_collected_properties) {
  674. std::string prop_name = kv.first;
  675. std::string prop_val = Slice(kv.second).ToString(true);
  676. fprintf(stdout, " # %s: 0x%s\n", prop_name.c_str(),
  677. prop_val.c_str());
  678. }
  679. }
  680. } else {
  681. fprintf(stderr, "Reader unexpectedly returned null properties\n");
  682. }
  683. }
  684. }
  685. if (show_summary) {
  686. fprintf(stdout, "total number of files: %" PRIu64 "\n", total_num_files);
  687. fprintf(stdout, "total number of data blocks: %" PRIu64 "\n",
  688. total_num_data_blocks);
  689. fprintf(stdout, "total data block size: %" PRIu64 "\n",
  690. total_data_block_size);
  691. fprintf(stdout, "total index block size: %" PRIu64 "\n",
  692. total_index_block_size);
  693. fprintf(stdout, "total filter block size: %" PRIu64 "\n",
  694. total_filter_block_size);
  695. }
  696. return 0;
  697. }
  698. } // namespace ROCKSDB_NAMESPACE
  699. #endif // ROCKSDB_LITE