compaction_job_stats_test.cc 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <algorithm>
  10. #include <cinttypes>
  11. #include <iostream>
  12. #include <mutex>
  13. #include <queue>
  14. #include <set>
  15. #include <thread>
  16. #include <unordered_set>
  17. #include <utility>
  18. #include "db/db_impl/db_impl.h"
  19. #include "db/dbformat.h"
  20. #include "db/job_context.h"
  21. #include "db/version_set.h"
  22. #include "db/write_batch_internal.h"
  23. #include "env/mock_env.h"
  24. #include "file/filename.h"
  25. #include "logging/logging.h"
  26. #include "memtable/hash_linklist_rep.h"
  27. #include "monitoring/statistics.h"
  28. #include "monitoring/thread_status_util.h"
  29. #include "port/stack_trace.h"
  30. #include "rocksdb/cache.h"
  31. #include "rocksdb/compaction_filter.h"
  32. #include "rocksdb/convenience.h"
  33. #include "rocksdb/db.h"
  34. #include "rocksdb/env.h"
  35. #include "rocksdb/experimental.h"
  36. #include "rocksdb/filter_policy.h"
  37. #include "rocksdb/options.h"
  38. #include "rocksdb/perf_context.h"
  39. #include "rocksdb/slice.h"
  40. #include "rocksdb/slice_transform.h"
  41. #include "rocksdb/table.h"
  42. #include "rocksdb/table_properties.h"
  43. #include "rocksdb/thread_status.h"
  44. #include "rocksdb/utilities/checkpoint.h"
  45. #include "rocksdb/utilities/write_batch_with_index.h"
  46. #include "table/block_based/block_based_table_factory.h"
  47. #include "table/mock_table.h"
  48. #include "table/plain/plain_table_factory.h"
  49. #include "table/scoped_arena_iterator.h"
  50. #include "test_util/sync_point.h"
  51. #include "test_util/testharness.h"
  52. #include "test_util/testutil.h"
  53. #include "util/compression.h"
  54. #include "util/hash.h"
  55. #include "util/mutexlock.h"
  56. #include "util/rate_limiter.h"
  57. #include "util/string_util.h"
  58. #include "utilities/merge_operators.h"
  59. #if !defined(IOS_CROSS_COMPILE)
  60. #ifndef ROCKSDB_LITE
  61. namespace ROCKSDB_NAMESPACE {
  62. static std::string RandomString(Random* rnd, int len, double ratio) {
  63. std::string r;
  64. test::CompressibleString(rnd, ratio, len, &r);
  65. return r;
  66. }
  67. std::string Key(uint64_t key, int length) {
  68. const int kBufSize = 1000;
  69. char buf[kBufSize];
  70. if (length > kBufSize) {
  71. length = kBufSize;
  72. }
  73. snprintf(buf, kBufSize, "%0*" PRIu64, length, key);
  74. return std::string(buf);
  75. }
  76. class CompactionJobStatsTest : public testing::Test,
  77. public testing::WithParamInterface<bool> {
  78. public:
  79. std::string dbname_;
  80. std::string alternative_wal_dir_;
  81. Env* env_;
  82. DB* db_;
  83. std::vector<ColumnFamilyHandle*> handles_;
  84. uint32_t max_subcompactions_;
  85. Options last_options_;
  86. CompactionJobStatsTest() : env_(Env::Default()) {
  87. env_->SetBackgroundThreads(1, Env::LOW);
  88. env_->SetBackgroundThreads(1, Env::HIGH);
  89. dbname_ = test::PerThreadDBPath("compaction_job_stats_test");
  90. alternative_wal_dir_ = dbname_ + "/wal";
  91. Options options;
  92. options.create_if_missing = true;
  93. max_subcompactions_ = GetParam();
  94. options.max_subcompactions = max_subcompactions_;
  95. auto delete_options = options;
  96. delete_options.wal_dir = alternative_wal_dir_;
  97. EXPECT_OK(DestroyDB(dbname_, delete_options));
  98. // Destroy it for not alternative WAL dir is used.
  99. EXPECT_OK(DestroyDB(dbname_, options));
  100. db_ = nullptr;
  101. Reopen(options);
  102. }
  103. ~CompactionJobStatsTest() override {
  104. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  105. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency({});
  106. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearAllCallBacks();
  107. Close();
  108. Options options;
  109. options.db_paths.emplace_back(dbname_, 0);
  110. options.db_paths.emplace_back(dbname_ + "_2", 0);
  111. options.db_paths.emplace_back(dbname_ + "_3", 0);
  112. options.db_paths.emplace_back(dbname_ + "_4", 0);
  113. EXPECT_OK(DestroyDB(dbname_, options));
  114. }
  115. // Required if inheriting from testing::WithParamInterface<>
  116. static void SetUpTestCase() {}
  117. static void TearDownTestCase() {}
  118. DBImpl* dbfull() {
  119. return reinterpret_cast<DBImpl*>(db_);
  120. }
  121. void CreateColumnFamilies(const std::vector<std::string>& cfs,
  122. const Options& options) {
  123. ColumnFamilyOptions cf_opts(options);
  124. size_t cfi = handles_.size();
  125. handles_.resize(cfi + cfs.size());
  126. for (auto cf : cfs) {
  127. ASSERT_OK(db_->CreateColumnFamily(cf_opts, cf, &handles_[cfi++]));
  128. }
  129. }
  130. void CreateAndReopenWithCF(const std::vector<std::string>& cfs,
  131. const Options& options) {
  132. CreateColumnFamilies(cfs, options);
  133. std::vector<std::string> cfs_plus_default = cfs;
  134. cfs_plus_default.insert(cfs_plus_default.begin(), kDefaultColumnFamilyName);
  135. ReopenWithColumnFamilies(cfs_plus_default, options);
  136. }
  137. void ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
  138. const std::vector<Options>& options) {
  139. ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
  140. }
  141. void ReopenWithColumnFamilies(const std::vector<std::string>& cfs,
  142. const Options& options) {
  143. ASSERT_OK(TryReopenWithColumnFamilies(cfs, options));
  144. }
  145. Status TryReopenWithColumnFamilies(
  146. const std::vector<std::string>& cfs,
  147. const std::vector<Options>& options) {
  148. Close();
  149. EXPECT_EQ(cfs.size(), options.size());
  150. std::vector<ColumnFamilyDescriptor> column_families;
  151. for (size_t i = 0; i < cfs.size(); ++i) {
  152. column_families.push_back(ColumnFamilyDescriptor(cfs[i], options[i]));
  153. }
  154. DBOptions db_opts = DBOptions(options[0]);
  155. return DB::Open(db_opts, dbname_, column_families, &handles_, &db_);
  156. }
  157. Status TryReopenWithColumnFamilies(const std::vector<std::string>& cfs,
  158. const Options& options) {
  159. Close();
  160. std::vector<Options> v_opts(cfs.size(), options);
  161. return TryReopenWithColumnFamilies(cfs, v_opts);
  162. }
  163. void Reopen(const Options& options) {
  164. ASSERT_OK(TryReopen(options));
  165. }
  166. void Close() {
  167. for (auto h : handles_) {
  168. delete h;
  169. }
  170. handles_.clear();
  171. delete db_;
  172. db_ = nullptr;
  173. }
  174. void DestroyAndReopen(const Options& options) {
  175. // Destroy using last options
  176. Destroy(last_options_);
  177. ASSERT_OK(TryReopen(options));
  178. }
  179. void Destroy(const Options& options) {
  180. Close();
  181. ASSERT_OK(DestroyDB(dbname_, options));
  182. }
  183. Status ReadOnlyReopen(const Options& options) {
  184. return DB::OpenForReadOnly(options, dbname_, &db_);
  185. }
  186. Status TryReopen(const Options& options) {
  187. Close();
  188. last_options_ = options;
  189. return DB::Open(options, dbname_, &db_);
  190. }
  191. Status Flush(int cf = 0) {
  192. if (cf == 0) {
  193. return db_->Flush(FlushOptions());
  194. } else {
  195. return db_->Flush(FlushOptions(), handles_[cf]);
  196. }
  197. }
  198. Status Put(const Slice& k, const Slice& v, WriteOptions wo = WriteOptions()) {
  199. return db_->Put(wo, k, v);
  200. }
  201. Status Put(int cf, const Slice& k, const Slice& v,
  202. WriteOptions wo = WriteOptions()) {
  203. return db_->Put(wo, handles_[cf], k, v);
  204. }
  205. Status Delete(const std::string& k) {
  206. return db_->Delete(WriteOptions(), k);
  207. }
  208. Status Delete(int cf, const std::string& k) {
  209. return db_->Delete(WriteOptions(), handles_[cf], k);
  210. }
  211. std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
  212. ReadOptions options;
  213. options.verify_checksums = true;
  214. options.snapshot = snapshot;
  215. std::string result;
  216. Status s = db_->Get(options, k, &result);
  217. if (s.IsNotFound()) {
  218. result = "NOT_FOUND";
  219. } else if (!s.ok()) {
  220. result = s.ToString();
  221. }
  222. return result;
  223. }
  224. std::string Get(int cf, const std::string& k,
  225. const Snapshot* snapshot = nullptr) {
  226. ReadOptions options;
  227. options.verify_checksums = true;
  228. options.snapshot = snapshot;
  229. std::string result;
  230. Status s = db_->Get(options, handles_[cf], k, &result);
  231. if (s.IsNotFound()) {
  232. result = "NOT_FOUND";
  233. } else if (!s.ok()) {
  234. result = s.ToString();
  235. }
  236. return result;
  237. }
  238. int NumTableFilesAtLevel(int level, int cf = 0) {
  239. std::string property;
  240. if (cf == 0) {
  241. // default cfd
  242. EXPECT_TRUE(db_->GetProperty(
  243. "rocksdb.num-files-at-level" + NumberToString(level), &property));
  244. } else {
  245. EXPECT_TRUE(db_->GetProperty(
  246. handles_[cf], "rocksdb.num-files-at-level" + NumberToString(level),
  247. &property));
  248. }
  249. return atoi(property.c_str());
  250. }
  251. // Return spread of files per level
  252. std::string FilesPerLevel(int cf = 0) {
  253. int num_levels =
  254. (cf == 0) ? db_->NumberLevels() : db_->NumberLevels(handles_[1]);
  255. std::string result;
  256. size_t last_non_zero_offset = 0;
  257. for (int level = 0; level < num_levels; level++) {
  258. int f = NumTableFilesAtLevel(level, cf);
  259. char buf[100];
  260. snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
  261. result += buf;
  262. if (f > 0) {
  263. last_non_zero_offset = result.size();
  264. }
  265. }
  266. result.resize(last_non_zero_offset);
  267. return result;
  268. }
  269. uint64_t Size(const Slice& start, const Slice& limit, int cf = 0) {
  270. Range r(start, limit);
  271. uint64_t size;
  272. if (cf == 0) {
  273. db_->GetApproximateSizes(&r, 1, &size);
  274. } else {
  275. db_->GetApproximateSizes(handles_[1], &r, 1, &size);
  276. }
  277. return size;
  278. }
  279. void Compact(int cf, const Slice& start, const Slice& limit,
  280. uint32_t target_path_id) {
  281. CompactRangeOptions compact_options;
  282. compact_options.target_path_id = target_path_id;
  283. ASSERT_OK(db_->CompactRange(compact_options, handles_[cf], &start, &limit));
  284. }
  285. void Compact(int cf, const Slice& start, const Slice& limit) {
  286. ASSERT_OK(
  287. db_->CompactRange(CompactRangeOptions(), handles_[cf], &start, &limit));
  288. }
  289. void Compact(const Slice& start, const Slice& limit) {
  290. ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &start, &limit));
  291. }
  292. void TEST_Compact(int level, int cf, const Slice& start, const Slice& limit) {
  293. ASSERT_OK(dbfull()->TEST_CompactRange(level, &start, &limit, handles_[cf],
  294. true /* disallow trivial move */));
  295. }
  296. // Do n memtable compactions, each of which produces an sstable
  297. // covering the range [small,large].
  298. void MakeTables(int n, const std::string& small, const std::string& large,
  299. int cf = 0) {
  300. for (int i = 0; i < n; i++) {
  301. ASSERT_OK(Put(cf, small, "begin"));
  302. ASSERT_OK(Put(cf, large, "end"));
  303. ASSERT_OK(Flush(cf));
  304. }
  305. }
  306. static void SetDeletionCompactionStats(
  307. CompactionJobStats *stats, uint64_t input_deletions,
  308. uint64_t expired_deletions, uint64_t records_replaced) {
  309. stats->num_input_deletion_records = input_deletions;
  310. stats->num_expired_deletion_records = expired_deletions;
  311. stats->num_records_replaced = records_replaced;
  312. }
  313. void MakeTableWithKeyValues(
  314. Random* rnd, uint64_t smallest, uint64_t largest,
  315. int key_size, int value_size, uint64_t interval,
  316. double ratio, int cf = 0) {
  317. for (auto key = smallest; key < largest; key += interval) {
  318. ASSERT_OK(Put(cf, Slice(Key(key, key_size)),
  319. Slice(RandomString(rnd, value_size, ratio))));
  320. }
  321. ASSERT_OK(Flush(cf));
  322. }
  323. // This function behaves with the implicit understanding that two
  324. // rounds of keys are inserted into the database, as per the behavior
  325. // of the DeletionStatsTest.
  326. void SelectivelyDeleteKeys(uint64_t smallest, uint64_t largest,
  327. uint64_t interval, int deletion_interval, int key_size,
  328. uint64_t cutoff_key_num, CompactionJobStats* stats, int cf = 0) {
  329. // interval needs to be >= 2 so that deletion entries can be inserted
  330. // that are intended to not result in an actual key deletion by using
  331. // an offset of 1 from another existing key
  332. ASSERT_GE(interval, 2);
  333. uint64_t ctr = 1;
  334. uint32_t deletions_made = 0;
  335. uint32_t num_deleted = 0;
  336. uint32_t num_expired = 0;
  337. for (auto key = smallest; key <= largest; key += interval, ctr++) {
  338. if (ctr % deletion_interval == 0) {
  339. ASSERT_OK(Delete(cf, Key(key, key_size)));
  340. deletions_made++;
  341. num_deleted++;
  342. if (key > cutoff_key_num) {
  343. num_expired++;
  344. }
  345. }
  346. }
  347. // Insert some deletions for keys that don't exist that
  348. // are both in and out of the key range
  349. ASSERT_OK(Delete(cf, Key(smallest+1, key_size)));
  350. deletions_made++;
  351. ASSERT_OK(Delete(cf, Key(smallest-1, key_size)));
  352. deletions_made++;
  353. num_expired++;
  354. ASSERT_OK(Delete(cf, Key(smallest-9, key_size)));
  355. deletions_made++;
  356. num_expired++;
  357. ASSERT_OK(Flush(cf));
  358. SetDeletionCompactionStats(stats, deletions_made, num_expired,
  359. num_deleted);
  360. }
  361. };
  362. // An EventListener which helps verify the compaction results in
  363. // test CompactionJobStatsTest.
  364. class CompactionJobStatsChecker : public EventListener {
  365. public:
  366. CompactionJobStatsChecker()
  367. : compression_enabled_(false), verify_next_comp_io_stats_(false) {}
  368. size_t NumberOfUnverifiedStats() { return expected_stats_.size(); }
  369. void set_verify_next_comp_io_stats(bool v) { verify_next_comp_io_stats_ = v; }
  370. // Once a compaction completed, this function will verify the returned
  371. // CompactionJobInfo with the oldest CompactionJobInfo added earlier
  372. // in "expected_stats_" which has not yet being used for verification.
  373. void OnCompactionCompleted(DB* /*db*/, const CompactionJobInfo& ci) override {
  374. if (verify_next_comp_io_stats_) {
  375. ASSERT_GT(ci.stats.file_write_nanos, 0);
  376. ASSERT_GT(ci.stats.file_range_sync_nanos, 0);
  377. ASSERT_GT(ci.stats.file_fsync_nanos, 0);
  378. ASSERT_GT(ci.stats.file_prepare_write_nanos, 0);
  379. verify_next_comp_io_stats_ = false;
  380. }
  381. std::lock_guard<std::mutex> lock(mutex_);
  382. if (expected_stats_.size()) {
  383. Verify(ci.stats, expected_stats_.front());
  384. expected_stats_.pop();
  385. }
  386. }
  387. // A helper function which verifies whether two CompactionJobStats
  388. // match. The verification of all compaction stats are done by
  389. // ASSERT_EQ except for the total input / output bytes, which we
  390. // use ASSERT_GE and ASSERT_LE with a reasonable bias ---
  391. // 10% in uncompressed case and 20% when compression is used.
  392. virtual void Verify(const CompactionJobStats& current_stats,
  393. const CompactionJobStats& stats) {
  394. // time
  395. ASSERT_GT(current_stats.elapsed_micros, 0U);
  396. ASSERT_EQ(current_stats.num_input_records,
  397. stats.num_input_records);
  398. ASSERT_EQ(current_stats.num_input_files,
  399. stats.num_input_files);
  400. ASSERT_EQ(current_stats.num_input_files_at_output_level,
  401. stats.num_input_files_at_output_level);
  402. ASSERT_EQ(current_stats.num_output_records,
  403. stats.num_output_records);
  404. ASSERT_EQ(current_stats.num_output_files,
  405. stats.num_output_files);
  406. ASSERT_EQ(current_stats.is_manual_compaction,
  407. stats.is_manual_compaction);
  408. // file size
  409. double kFileSizeBias = compression_enabled_ ? 0.20 : 0.10;
  410. ASSERT_GE(current_stats.total_input_bytes * (1.00 + kFileSizeBias),
  411. stats.total_input_bytes);
  412. ASSERT_LE(current_stats.total_input_bytes,
  413. stats.total_input_bytes * (1.00 + kFileSizeBias));
  414. ASSERT_GE(current_stats.total_output_bytes * (1.00 + kFileSizeBias),
  415. stats.total_output_bytes);
  416. ASSERT_LE(current_stats.total_output_bytes,
  417. stats.total_output_bytes * (1.00 + kFileSizeBias));
  418. ASSERT_EQ(current_stats.total_input_raw_key_bytes,
  419. stats.total_input_raw_key_bytes);
  420. ASSERT_EQ(current_stats.total_input_raw_value_bytes,
  421. stats.total_input_raw_value_bytes);
  422. ASSERT_EQ(current_stats.num_records_replaced,
  423. stats.num_records_replaced);
  424. ASSERT_EQ(current_stats.num_corrupt_keys,
  425. stats.num_corrupt_keys);
  426. ASSERT_EQ(
  427. std::string(current_stats.smallest_output_key_prefix),
  428. std::string(stats.smallest_output_key_prefix));
  429. ASSERT_EQ(
  430. std::string(current_stats.largest_output_key_prefix),
  431. std::string(stats.largest_output_key_prefix));
  432. }
  433. // Add an expected compaction stats, which will be used to
  434. // verify the CompactionJobStats returned by the OnCompactionCompleted()
  435. // callback.
  436. void AddExpectedStats(const CompactionJobStats& stats) {
  437. std::lock_guard<std::mutex> lock(mutex_);
  438. expected_stats_.push(stats);
  439. }
  440. void EnableCompression(bool flag) {
  441. compression_enabled_ = flag;
  442. }
  443. bool verify_next_comp_io_stats() const { return verify_next_comp_io_stats_; }
  444. private:
  445. std::mutex mutex_;
  446. std::queue<CompactionJobStats> expected_stats_;
  447. bool compression_enabled_;
  448. bool verify_next_comp_io_stats_;
  449. };
  450. // An EventListener which helps verify the compaction statistics in
  451. // the test DeletionStatsTest.
  452. class CompactionJobDeletionStatsChecker : public CompactionJobStatsChecker {
  453. public:
  454. // Verifies whether two CompactionJobStats match.
  455. void Verify(const CompactionJobStats& current_stats,
  456. const CompactionJobStats& stats) override {
  457. ASSERT_EQ(
  458. current_stats.num_input_deletion_records,
  459. stats.num_input_deletion_records);
  460. ASSERT_EQ(
  461. current_stats.num_expired_deletion_records,
  462. stats.num_expired_deletion_records);
  463. ASSERT_EQ(
  464. current_stats.num_records_replaced,
  465. stats.num_records_replaced);
  466. ASSERT_EQ(current_stats.num_corrupt_keys,
  467. stats.num_corrupt_keys);
  468. }
  469. };
  470. namespace {
  471. uint64_t EstimatedFileSize(
  472. uint64_t num_records, size_t key_size, size_t value_size,
  473. double compression_ratio = 1.0,
  474. size_t block_size = 4096,
  475. int bloom_bits_per_key = 10) {
  476. const size_t kPerKeyOverhead = 8;
  477. const size_t kFooterSize = 512;
  478. uint64_t data_size =
  479. static_cast<uint64_t>(
  480. num_records * (key_size + value_size * compression_ratio +
  481. kPerKeyOverhead));
  482. return data_size + kFooterSize
  483. + num_records * bloom_bits_per_key / 8 // filter block
  484. + data_size * (key_size + 8) / block_size; // index block
  485. }
  486. namespace {
  487. void CopyPrefix(
  488. const Slice& src, size_t prefix_length, std::string* dst) {
  489. assert(prefix_length > 0);
  490. size_t length = src.size() > prefix_length ? prefix_length : src.size();
  491. dst->assign(src.data(), length);
  492. }
  493. } // namespace
  494. CompactionJobStats NewManualCompactionJobStats(
  495. const std::string& smallest_key, const std::string& largest_key,
  496. size_t num_input_files, size_t num_input_files_at_output_level,
  497. uint64_t num_input_records, size_t key_size, size_t value_size,
  498. size_t num_output_files, uint64_t num_output_records,
  499. double compression_ratio, uint64_t num_records_replaced,
  500. bool is_manual = true) {
  501. CompactionJobStats stats;
  502. stats.Reset();
  503. stats.num_input_records = num_input_records;
  504. stats.num_input_files = num_input_files;
  505. stats.num_input_files_at_output_level = num_input_files_at_output_level;
  506. stats.num_output_records = num_output_records;
  507. stats.num_output_files = num_output_files;
  508. stats.total_input_bytes =
  509. EstimatedFileSize(
  510. num_input_records / num_input_files,
  511. key_size, value_size, compression_ratio) * num_input_files;
  512. stats.total_output_bytes =
  513. EstimatedFileSize(
  514. num_output_records / num_output_files,
  515. key_size, value_size, compression_ratio) * num_output_files;
  516. stats.total_input_raw_key_bytes =
  517. num_input_records * (key_size + 8);
  518. stats.total_input_raw_value_bytes =
  519. num_input_records * value_size;
  520. stats.is_manual_compaction = is_manual;
  521. stats.num_records_replaced = num_records_replaced;
  522. CopyPrefix(smallest_key,
  523. CompactionJobStats::kMaxPrefixLength,
  524. &stats.smallest_output_key_prefix);
  525. CopyPrefix(largest_key,
  526. CompactionJobStats::kMaxPrefixLength,
  527. &stats.largest_output_key_prefix);
  528. return stats;
  529. }
  530. CompressionType GetAnyCompression() {
  531. if (Snappy_Supported()) {
  532. return kSnappyCompression;
  533. } else if (Zlib_Supported()) {
  534. return kZlibCompression;
  535. } else if (BZip2_Supported()) {
  536. return kBZip2Compression;
  537. } else if (LZ4_Supported()) {
  538. return kLZ4Compression;
  539. } else if (XPRESS_Supported()) {
  540. return kXpressCompression;
  541. }
  542. return kNoCompression;
  543. }
  544. } // namespace
  545. TEST_P(CompactionJobStatsTest, CompactionJobStatsTest) {
  546. Random rnd(301);
  547. const int kBufSize = 100;
  548. char buf[kBufSize];
  549. uint64_t key_base = 100000000l;
  550. // Note: key_base must be multiple of num_keys_per_L0_file
  551. int num_keys_per_L0_file = 100;
  552. const int kTestScale = 8;
  553. const int kKeySize = 10;
  554. const int kValueSize = 1000;
  555. const double kCompressionRatio = 0.5;
  556. double compression_ratio = 1.0;
  557. uint64_t key_interval = key_base / num_keys_per_L0_file;
  558. // Whenever a compaction completes, this listener will try to
  559. // verify whether the returned CompactionJobStats matches
  560. // what we expect. The expected CompactionJobStats is added
  561. // via AddExpectedStats().
  562. auto* stats_checker = new CompactionJobStatsChecker();
  563. Options options;
  564. options.listeners.emplace_back(stats_checker);
  565. options.create_if_missing = true;
  566. // just enough setting to hold off auto-compaction.
  567. options.level0_file_num_compaction_trigger = kTestScale + 1;
  568. options.num_levels = 3;
  569. options.compression = kNoCompression;
  570. options.max_subcompactions = max_subcompactions_;
  571. options.bytes_per_sync = 512 * 1024;
  572. options.report_bg_io_stats = true;
  573. for (int test = 0; test < 2; ++test) {
  574. DestroyAndReopen(options);
  575. CreateAndReopenWithCF({"pikachu"}, options);
  576. // 1st Phase: generate "num_L0_files" L0 files.
  577. int num_L0_files = 0;
  578. for (uint64_t start_key = key_base;
  579. start_key <= key_base * kTestScale;
  580. start_key += key_base) {
  581. MakeTableWithKeyValues(
  582. &rnd, start_key, start_key + key_base - 1,
  583. kKeySize, kValueSize, key_interval,
  584. compression_ratio, 1);
  585. snprintf(buf, kBufSize, "%d", ++num_L0_files);
  586. ASSERT_EQ(std::string(buf), FilesPerLevel(1));
  587. }
  588. ASSERT_EQ(ToString(num_L0_files), FilesPerLevel(1));
  589. // 2nd Phase: perform L0 -> L1 compaction.
  590. int L0_compaction_count = 6;
  591. int count = 1;
  592. std::string smallest_key;
  593. std::string largest_key;
  594. for (uint64_t start_key = key_base;
  595. start_key <= key_base * L0_compaction_count;
  596. start_key += key_base, count++) {
  597. smallest_key = Key(start_key, 10);
  598. largest_key = Key(start_key + key_base - key_interval, 10);
  599. stats_checker->AddExpectedStats(
  600. NewManualCompactionJobStats(
  601. smallest_key, largest_key,
  602. 1, 0, num_keys_per_L0_file,
  603. kKeySize, kValueSize,
  604. 1, num_keys_per_L0_file,
  605. compression_ratio, 0));
  606. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U);
  607. TEST_Compact(0, 1, smallest_key, largest_key);
  608. snprintf(buf, kBufSize, "%d,%d", num_L0_files - count, count);
  609. ASSERT_EQ(std::string(buf), FilesPerLevel(1));
  610. }
  611. // compact two files into one in the last L0 -> L1 compaction
  612. int num_remaining_L0 = num_L0_files - L0_compaction_count;
  613. smallest_key = Key(key_base * (L0_compaction_count + 1), 10);
  614. largest_key = Key(key_base * (kTestScale + 1) - key_interval, 10);
  615. stats_checker->AddExpectedStats(
  616. NewManualCompactionJobStats(
  617. smallest_key, largest_key,
  618. num_remaining_L0,
  619. 0, num_keys_per_L0_file * num_remaining_L0,
  620. kKeySize, kValueSize,
  621. 1, num_keys_per_L0_file * num_remaining_L0,
  622. compression_ratio, 0));
  623. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U);
  624. TEST_Compact(0, 1, smallest_key, largest_key);
  625. int num_L1_files = num_L0_files - num_remaining_L0 + 1;
  626. num_L0_files = 0;
  627. snprintf(buf, kBufSize, "%d,%d", num_L0_files, num_L1_files);
  628. ASSERT_EQ(std::string(buf), FilesPerLevel(1));
  629. // 3rd Phase: generate sparse L0 files (wider key-range, same num of keys)
  630. int sparseness = 2;
  631. for (uint64_t start_key = key_base;
  632. start_key <= key_base * kTestScale;
  633. start_key += key_base * sparseness) {
  634. MakeTableWithKeyValues(
  635. &rnd, start_key, start_key + key_base * sparseness - 1,
  636. kKeySize, kValueSize,
  637. key_base * sparseness / num_keys_per_L0_file,
  638. compression_ratio, 1);
  639. snprintf(buf, kBufSize, "%d,%d", ++num_L0_files, num_L1_files);
  640. ASSERT_EQ(std::string(buf), FilesPerLevel(1));
  641. }
  642. // 4th Phase: perform L0 -> L1 compaction again, expect higher write amp
  643. // When subcompactions are enabled, the number of output files increases
  644. // by 1 because multiple threads are consuming the input and generating
  645. // output files without coordinating to see if the output could fit into
  646. // a smaller number of files like it does when it runs sequentially
  647. int num_output_files = options.max_subcompactions > 1 ? 2 : 1;
  648. for (uint64_t start_key = key_base;
  649. num_L0_files > 1;
  650. start_key += key_base * sparseness) {
  651. smallest_key = Key(start_key, 10);
  652. largest_key =
  653. Key(start_key + key_base * sparseness - key_interval, 10);
  654. stats_checker->AddExpectedStats(
  655. NewManualCompactionJobStats(
  656. smallest_key, largest_key,
  657. 3, 2, num_keys_per_L0_file * 3,
  658. kKeySize, kValueSize,
  659. num_output_files,
  660. num_keys_per_L0_file * 2, // 1/3 of the data will be updated.
  661. compression_ratio,
  662. num_keys_per_L0_file));
  663. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U);
  664. Compact(1, smallest_key, largest_key);
  665. if (options.max_subcompactions == 1) {
  666. --num_L1_files;
  667. }
  668. snprintf(buf, kBufSize, "%d,%d", --num_L0_files, num_L1_files);
  669. ASSERT_EQ(std::string(buf), FilesPerLevel(1));
  670. }
  671. // 5th Phase: Do a full compaction, which involves in two sub-compactions.
  672. // Here we expect to have 1 L0 files and 4 L1 files
  673. // In the first sub-compaction, we expect L0 compaction.
  674. smallest_key = Key(key_base, 10);
  675. largest_key = Key(key_base * (kTestScale + 1) - key_interval, 10);
  676. stats_checker->AddExpectedStats(
  677. NewManualCompactionJobStats(
  678. Key(key_base * (kTestScale + 1 - sparseness), 10), largest_key,
  679. 2, 1, num_keys_per_L0_file * 3,
  680. kKeySize, kValueSize,
  681. 1, num_keys_per_L0_file * 2,
  682. compression_ratio,
  683. num_keys_per_L0_file));
  684. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 1U);
  685. Compact(1, smallest_key, largest_key);
  686. num_L1_files = options.max_subcompactions > 1 ? 7 : 4;
  687. char L1_buf[4];
  688. snprintf(L1_buf, sizeof(L1_buf), "0,%d", num_L1_files);
  689. std::string L1_files(L1_buf);
  690. ASSERT_EQ(L1_files, FilesPerLevel(1));
  691. options.compression = GetAnyCompression();
  692. if (options.compression == kNoCompression) {
  693. break;
  694. }
  695. stats_checker->EnableCompression(true);
  696. compression_ratio = kCompressionRatio;
  697. for (int i = 0; i < 5; i++) {
  698. ASSERT_OK(Put(1, Slice(Key(key_base + i, 10)),
  699. Slice(RandomString(&rnd, 512 * 1024, 1))));
  700. }
  701. ASSERT_OK(Flush(1));
  702. reinterpret_cast<DBImpl*>(db_)->TEST_WaitForCompact();
  703. stats_checker->set_verify_next_comp_io_stats(true);
  704. std::atomic<bool> first_prepare_write(true);
  705. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  706. "WritableFileWriter::Append:BeforePrepareWrite", [&](void* /*arg*/) {
  707. if (first_prepare_write.load()) {
  708. options.env->SleepForMicroseconds(3);
  709. first_prepare_write.store(false);
  710. }
  711. });
  712. std::atomic<bool> first_flush(true);
  713. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  714. "WritableFileWriter::Flush:BeforeAppend", [&](void* /*arg*/) {
  715. if (first_flush.load()) {
  716. options.env->SleepForMicroseconds(3);
  717. first_flush.store(false);
  718. }
  719. });
  720. std::atomic<bool> first_sync(true);
  721. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  722. "WritableFileWriter::SyncInternal:0", [&](void* /*arg*/) {
  723. if (first_sync.load()) {
  724. options.env->SleepForMicroseconds(3);
  725. first_sync.store(false);
  726. }
  727. });
  728. std::atomic<bool> first_range_sync(true);
  729. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  730. "WritableFileWriter::RangeSync:0", [&](void* /*arg*/) {
  731. if (first_range_sync.load()) {
  732. options.env->SleepForMicroseconds(3);
  733. first_range_sync.store(false);
  734. }
  735. });
  736. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  737. Compact(1, smallest_key, largest_key);
  738. ASSERT_TRUE(!stats_checker->verify_next_comp_io_stats());
  739. ASSERT_TRUE(!first_prepare_write.load());
  740. ASSERT_TRUE(!first_flush.load());
  741. ASSERT_TRUE(!first_sync.load());
  742. ASSERT_TRUE(!first_range_sync.load());
  743. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  744. }
  745. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 0U);
  746. }
  747. TEST_P(CompactionJobStatsTest, DeletionStatsTest) {
  748. Random rnd(301);
  749. uint64_t key_base = 100000l;
  750. // Note: key_base must be multiple of num_keys_per_L0_file
  751. int num_keys_per_L0_file = 20;
  752. const int kTestScale = 8; // make sure this is even
  753. const int kKeySize = 10;
  754. const int kValueSize = 100;
  755. double compression_ratio = 1.0;
  756. uint64_t key_interval = key_base / num_keys_per_L0_file;
  757. uint64_t largest_key_num = key_base * (kTestScale + 1) - key_interval;
  758. uint64_t cutoff_key_num = key_base * (kTestScale / 2 + 1) - key_interval;
  759. const std::string smallest_key = Key(key_base - 10, kKeySize);
  760. const std::string largest_key = Key(largest_key_num + 10, kKeySize);
  761. // Whenever a compaction completes, this listener will try to
  762. // verify whether the returned CompactionJobStats matches
  763. // what we expect.
  764. auto* stats_checker = new CompactionJobDeletionStatsChecker();
  765. Options options;
  766. options.listeners.emplace_back(stats_checker);
  767. options.create_if_missing = true;
  768. options.level0_file_num_compaction_trigger = kTestScale+1;
  769. options.num_levels = 3;
  770. options.compression = kNoCompression;
  771. options.max_bytes_for_level_multiplier = 2;
  772. options.max_subcompactions = max_subcompactions_;
  773. DestroyAndReopen(options);
  774. CreateAndReopenWithCF({"pikachu"}, options);
  775. // Stage 1: Generate several L0 files and then send them to L2 by
  776. // using CompactRangeOptions and CompactRange(). These files will
  777. // have a strict subset of the keys from the full key-range
  778. for (uint64_t start_key = key_base;
  779. start_key <= key_base * kTestScale / 2;
  780. start_key += key_base) {
  781. MakeTableWithKeyValues(
  782. &rnd, start_key, start_key + key_base - 1,
  783. kKeySize, kValueSize, key_interval,
  784. compression_ratio, 1);
  785. }
  786. CompactRangeOptions cr_options;
  787. cr_options.change_level = true;
  788. cr_options.target_level = 2;
  789. db_->CompactRange(cr_options, handles_[1], nullptr, nullptr);
  790. ASSERT_GT(NumTableFilesAtLevel(2, 1), 0);
  791. // Stage 2: Generate files including keys from the entire key range
  792. for (uint64_t start_key = key_base;
  793. start_key <= key_base * kTestScale;
  794. start_key += key_base) {
  795. MakeTableWithKeyValues(
  796. &rnd, start_key, start_key + key_base - 1,
  797. kKeySize, kValueSize, key_interval,
  798. compression_ratio, 1);
  799. }
  800. // Send these L0 files to L1
  801. TEST_Compact(0, 1, smallest_key, largest_key);
  802. ASSERT_GT(NumTableFilesAtLevel(1, 1), 0);
  803. // Add a new record and flush so now there is a L0 file
  804. // with a value too (not just deletions from the next step)
  805. ASSERT_OK(Put(1, Key(key_base-6, kKeySize), "test"));
  806. ASSERT_OK(Flush(1));
  807. // Stage 3: Generate L0 files with some deletions so now
  808. // there are files with the same key range in L0, L1, and L2
  809. int deletion_interval = 3;
  810. CompactionJobStats first_compaction_stats;
  811. SelectivelyDeleteKeys(key_base, largest_key_num,
  812. key_interval, deletion_interval, kKeySize, cutoff_key_num,
  813. &first_compaction_stats, 1);
  814. stats_checker->AddExpectedStats(first_compaction_stats);
  815. // Stage 4: Trigger compaction and verify the stats
  816. TEST_Compact(0, 1, smallest_key, largest_key);
  817. }
  818. namespace {
  819. int GetUniversalCompactionInputUnits(uint32_t num_flushes) {
  820. uint32_t compaction_input_units;
  821. for (compaction_input_units = 1;
  822. num_flushes >= compaction_input_units;
  823. compaction_input_units *= 2) {
  824. if ((num_flushes & compaction_input_units) != 0) {
  825. return compaction_input_units > 1 ? compaction_input_units : 0;
  826. }
  827. }
  828. return 0;
  829. }
  830. } // namespace
  831. TEST_P(CompactionJobStatsTest, UniversalCompactionTest) {
  832. Random rnd(301);
  833. uint64_t key_base = 100000000l;
  834. // Note: key_base must be multiple of num_keys_per_L0_file
  835. int num_keys_per_table = 100;
  836. const uint32_t kTestScale = 6;
  837. const int kKeySize = 10;
  838. const int kValueSize = 900;
  839. double compression_ratio = 1.0;
  840. uint64_t key_interval = key_base / num_keys_per_table;
  841. auto* stats_checker = new CompactionJobStatsChecker();
  842. Options options;
  843. options.listeners.emplace_back(stats_checker);
  844. options.create_if_missing = true;
  845. options.num_levels = 3;
  846. options.compression = kNoCompression;
  847. options.level0_file_num_compaction_trigger = 2;
  848. options.target_file_size_base = num_keys_per_table * 1000;
  849. options.compaction_style = kCompactionStyleUniversal;
  850. options.compaction_options_universal.size_ratio = 1;
  851. options.compaction_options_universal.max_size_amplification_percent = 1000;
  852. options.max_subcompactions = max_subcompactions_;
  853. DestroyAndReopen(options);
  854. CreateAndReopenWithCF({"pikachu"}, options);
  855. // Generates the expected CompactionJobStats for each compaction
  856. for (uint32_t num_flushes = 2; num_flushes <= kTestScale; num_flushes++) {
  857. // Here we treat one newly flushed file as an unit.
  858. //
  859. // For example, if a newly flushed file is 100k, and a compaction has
  860. // 4 input units, then this compaction inputs 400k.
  861. uint32_t num_input_units = GetUniversalCompactionInputUnits(num_flushes);
  862. if (num_input_units == 0) {
  863. continue;
  864. }
  865. // The following statement determines the expected smallest key
  866. // based on whether it is a full compaction. A full compaction only
  867. // happens when the number of flushes equals to the number of compaction
  868. // input runs.
  869. uint64_t smallest_key =
  870. (num_flushes == num_input_units) ?
  871. key_base : key_base * (num_flushes - 1);
  872. stats_checker->AddExpectedStats(
  873. NewManualCompactionJobStats(
  874. Key(smallest_key, 10),
  875. Key(smallest_key + key_base * num_input_units - key_interval, 10),
  876. num_input_units,
  877. num_input_units > 2 ? num_input_units / 2 : 0,
  878. num_keys_per_table * num_input_units,
  879. kKeySize, kValueSize,
  880. num_input_units,
  881. num_keys_per_table * num_input_units,
  882. 1.0, 0, false));
  883. dbfull()->TEST_WaitForCompact();
  884. }
  885. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 3U);
  886. for (uint64_t start_key = key_base;
  887. start_key <= key_base * kTestScale;
  888. start_key += key_base) {
  889. MakeTableWithKeyValues(
  890. &rnd, start_key, start_key + key_base - 1,
  891. kKeySize, kValueSize, key_interval,
  892. compression_ratio, 1);
  893. reinterpret_cast<DBImpl*>(db_)->TEST_WaitForCompact();
  894. }
  895. ASSERT_EQ(stats_checker->NumberOfUnverifiedStats(), 0U);
  896. }
  897. INSTANTIATE_TEST_CASE_P(CompactionJobStatsTest, CompactionJobStatsTest,
  898. ::testing::Values(1, 4));
  899. } // namespace ROCKSDB_NAMESPACE
  900. int main(int argc, char** argv) {
  901. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  902. ::testing::InitGoogleTest(&argc, argv);
  903. return RUN_ALL_TESTS();
  904. }
  905. #else
  906. #include <stdio.h>
  907. int main(int /*argc*/, char** /*argv*/) {
  908. fprintf(stderr, "SKIPPED, not supported in ROCKSDB_LITE\n");
  909. return 0;
  910. }
  911. #endif // !ROCKSDB_LITE
  912. #else
  913. int main(int /*argc*/, char** /*argv*/) { return 0; }
  914. #endif // !defined(IOS_CROSS_COMPILE)