db_iter_stress_test.cc 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #include "db/db_iter.h"
  6. #include "db/dbformat.h"
  7. #include "rocksdb/comparator.h"
  8. #include "rocksdb/options.h"
  9. #include "rocksdb/slice.h"
  10. #include "test_util/testharness.h"
  11. #include "util/random.h"
  12. #include "util/string_util.h"
  13. #include "utilities/merge_operators.h"
  14. #ifdef GFLAGS
  15. #include "util/gflags_compat.h"
  16. using GFLAGS_NAMESPACE::ParseCommandLineFlags;
  17. DEFINE_bool(verbose, false,
  18. "Print huge, detailed trace. Intended for debugging failures.");
  19. #else
  20. void ParseCommandLineFlags(int*, char***, bool) {}
  21. bool FLAGS_verbose = false;
  22. #endif
  23. namespace ROCKSDB_NAMESPACE {
  24. class DBIteratorStressTest : public testing::Test {
  25. public:
  26. Env* env_;
  27. DBIteratorStressTest() : env_(Env::Default()) {}
  28. };
  29. namespace {
  30. struct Entry {
  31. std::string key;
  32. ValueType type; // kTypeValue, kTypeDeletion, kTypeMerge
  33. uint64_t sequence;
  34. std::string ikey; // internal key, made from `key`, `sequence` and `type`
  35. std::string value;
  36. // If false, we'll pretend that this entry doesn't exist.
  37. bool visible = true;
  38. bool operator<(const Entry& e) const {
  39. if (key != e.key) {
  40. return key < e.key;
  41. }
  42. return std::tie(sequence, type) > std::tie(e.sequence, e.type);
  43. }
  44. };
  45. struct Data {
  46. std::vector<Entry> entries;
  47. // Indices in `entries` with `visible` = false.
  48. std::vector<size_t> hidden;
  49. // Keys of entries whose `visible` changed since the last seek of iterators.
  50. std::set<std::string> recently_touched_keys;
  51. };
  52. struct StressTestIterator : public InternalIterator {
  53. Data* data;
  54. Random64* rnd;
  55. InternalKeyComparator cmp;
  56. // Each operation will return error with this probability...
  57. double error_probability = 0;
  58. // ... and add/remove entries with this probability.
  59. double mutation_probability = 0;
  60. // The probability of adding vs removing entries will be chosen so that the
  61. // amount of removed entries stays somewhat close to this number.
  62. double target_hidden_fraction = 0;
  63. // If true, print all mutations to stdout for debugging.
  64. bool trace = false;
  65. int iter = -1;
  66. Status status_;
  67. StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp)
  68. : data(_data), rnd(_rnd), cmp(_cmp) {}
  69. bool Valid() const override {
  70. if (iter >= 0 && iter < (int)data->entries.size()) {
  71. assert(status_.ok());
  72. return true;
  73. }
  74. return false;
  75. }
  76. Status status() const override { return status_; }
  77. bool MaybeFail() {
  78. if (rnd->Next() >=
  79. static_cast<double>(std::numeric_limits<uint64_t>::max()) *
  80. error_probability) {
  81. return false;
  82. }
  83. if (rnd->Next() % 2) {
  84. status_ = Status::Incomplete("test");
  85. } else {
  86. status_ = Status::IOError("test");
  87. }
  88. if (trace) {
  89. std::cout << "injecting " << status_.ToString() << std::endl;
  90. }
  91. iter = -1;
  92. return true;
  93. }
  94. void MaybeMutate() {
  95. if (rnd->Next() >=
  96. static_cast<double>(std::numeric_limits<uint64_t>::max()) *
  97. mutation_probability) {
  98. return;
  99. }
  100. do {
  101. // If too many entries are hidden, hide less, otherwise hide more.
  102. double hide_probability =
  103. data->hidden.size() > data->entries.size() * target_hidden_fraction
  104. ? 1. / 3
  105. : 2. / 3;
  106. if (data->hidden.empty()) {
  107. hide_probability = 1;
  108. }
  109. bool do_hide = rnd->Next() <
  110. static_cast<double>(std::numeric_limits<uint64_t>::max()) *
  111. hide_probability;
  112. if (do_hide) {
  113. // Hide a random entry.
  114. size_t idx = rnd->Next() % data->entries.size();
  115. Entry& e = data->entries[idx];
  116. if (e.visible) {
  117. if (trace) {
  118. std::cout << "hiding idx " << idx << std::endl;
  119. }
  120. e.visible = false;
  121. data->hidden.push_back(idx);
  122. data->recently_touched_keys.insert(e.key);
  123. } else {
  124. // Already hidden. Let's go unhide something instead, just because
  125. // it's easy and it doesn't really matter what we do.
  126. do_hide = false;
  127. }
  128. }
  129. if (!do_hide) {
  130. // Unhide a random entry.
  131. size_t hi = rnd->Next() % data->hidden.size();
  132. size_t idx = data->hidden[hi];
  133. if (trace) {
  134. std::cout << "unhiding idx " << idx << std::endl;
  135. }
  136. Entry& e = data->entries[idx];
  137. assert(!e.visible);
  138. e.visible = true;
  139. data->hidden[hi] = data->hidden.back();
  140. data->hidden.pop_back();
  141. data->recently_touched_keys.insert(e.key);
  142. }
  143. } while (rnd->Next() % 3 != 0); // do 3 mutations on average
  144. }
  145. void SkipForward() {
  146. while (iter < (int)data->entries.size() && !data->entries[iter].visible) {
  147. ++iter;
  148. }
  149. }
  150. void SkipBackward() {
  151. while (iter >= 0 && !data->entries[iter].visible) {
  152. --iter;
  153. }
  154. }
  155. void SeekToFirst() override {
  156. if (MaybeFail()) {
  157. return;
  158. }
  159. MaybeMutate();
  160. status_ = Status::OK();
  161. iter = 0;
  162. SkipForward();
  163. }
  164. void SeekToLast() override {
  165. if (MaybeFail()) {
  166. return;
  167. }
  168. MaybeMutate();
  169. status_ = Status::OK();
  170. iter = (int)data->entries.size() - 1;
  171. SkipBackward();
  172. }
  173. void Seek(const Slice& target) override {
  174. if (MaybeFail()) {
  175. return;
  176. }
  177. MaybeMutate();
  178. status_ = Status::OK();
  179. // Binary search.
  180. auto it = std::partition_point(
  181. data->entries.begin(), data->entries.end(),
  182. [&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; });
  183. iter = (int)(it - data->entries.begin());
  184. SkipForward();
  185. }
  186. void SeekForPrev(const Slice& target) override {
  187. if (MaybeFail()) {
  188. return;
  189. }
  190. MaybeMutate();
  191. status_ = Status::OK();
  192. // Binary search.
  193. auto it = std::partition_point(
  194. data->entries.begin(), data->entries.end(),
  195. [&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; });
  196. iter = (int)(it - data->entries.begin());
  197. --iter;
  198. SkipBackward();
  199. }
  200. void Next() override {
  201. assert(Valid());
  202. if (MaybeFail()) {
  203. return;
  204. }
  205. MaybeMutate();
  206. ++iter;
  207. SkipForward();
  208. }
  209. void Prev() override {
  210. assert(Valid());
  211. if (MaybeFail()) {
  212. return;
  213. }
  214. MaybeMutate();
  215. --iter;
  216. SkipBackward();
  217. }
  218. Slice key() const override {
  219. assert(Valid());
  220. return data->entries[iter].ikey;
  221. }
  222. Slice value() const override {
  223. assert(Valid());
  224. return data->entries[iter].value;
  225. }
  226. bool IsKeyPinned() const override { return true; }
  227. bool IsValuePinned() const override { return true; }
  228. };
  229. // A small reimplementation of DBIter, supporting only some of the features,
  230. // and doing everything in O(log n).
  231. // Skips all keys that are in recently_touched_keys.
  232. struct ReferenceIterator {
  233. Data* data;
  234. uint64_t sequence; // ignore entries with sequence number below this
  235. bool valid = false;
  236. std::string key;
  237. std::string value;
  238. ReferenceIterator(Data* _data, uint64_t _sequence)
  239. : data(_data), sequence(_sequence) {}
  240. bool Valid() const { return valid; }
  241. // Finds the first entry with key
  242. // greater/less/greater-or-equal/less-or-equal than `key`, depending on
  243. // arguments: if `skip`, inequality is strict; if `forward`, it's
  244. // greater/greater-or-equal, otherwise less/less-or-equal.
  245. // Sets `key` to the result.
  246. // If no such key exists, returns false. Doesn't check `visible`.
  247. bool FindNextKey(bool skip, bool forward) {
  248. valid = false;
  249. auto it = std::partition_point(data->entries.begin(), data->entries.end(),
  250. [&](const Entry& e) {
  251. if (forward != skip) {
  252. return e.key < key;
  253. } else {
  254. return e.key <= key;
  255. }
  256. });
  257. if (forward) {
  258. if (it != data->entries.end()) {
  259. key = it->key;
  260. return true;
  261. }
  262. } else {
  263. if (it != data->entries.begin()) {
  264. --it;
  265. key = it->key;
  266. return true;
  267. }
  268. }
  269. return false;
  270. }
  271. bool FindValueForCurrentKey() {
  272. if (data->recently_touched_keys.count(key)) {
  273. return false;
  274. }
  275. // Find the first entry for the key. The caller promises that it exists.
  276. auto it = std::partition_point(data->entries.begin(), data->entries.end(),
  277. [&](const Entry& e) {
  278. if (e.key != key) {
  279. return e.key < key;
  280. }
  281. return e.sequence > sequence;
  282. });
  283. // Find the first visible entry.
  284. for (;; ++it) {
  285. if (it == data->entries.end()) {
  286. return false;
  287. }
  288. Entry& e = *it;
  289. if (e.key != key) {
  290. return false;
  291. }
  292. assert(e.sequence <= sequence);
  293. if (!e.visible) {
  294. continue;
  295. }
  296. if (e.type == kTypeDeletion) {
  297. return false;
  298. }
  299. if (e.type == kTypeValue) {
  300. value = e.value;
  301. valid = true;
  302. return true;
  303. }
  304. assert(e.type == kTypeMerge);
  305. break;
  306. }
  307. // Collect merge operands.
  308. std::vector<Slice> operands;
  309. for (; it != data->entries.end(); ++it) {
  310. Entry& e = *it;
  311. if (e.key != key) {
  312. break;
  313. }
  314. assert(e.sequence <= sequence);
  315. if (!e.visible) {
  316. continue;
  317. }
  318. if (e.type == kTypeDeletion) {
  319. break;
  320. }
  321. operands.emplace_back(e.value);
  322. if (e.type == kTypeValue) {
  323. break;
  324. }
  325. }
  326. // Do a merge.
  327. value = operands.back().ToString();
  328. for (int i = (int)operands.size() - 2; i >= 0; --i) {
  329. value.append(",");
  330. value.append(operands[i].data(), operands[i].size());
  331. }
  332. valid = true;
  333. return true;
  334. }
  335. // Start at `key` and move until we encounter a valid value.
  336. // `forward` defines the direction of movement.
  337. // If `skip` is true, we're looking for key not equal to `key`.
  338. void DoTheThing(bool skip, bool forward) {
  339. while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) {
  340. skip = true;
  341. }
  342. }
  343. void Seek(const Slice& target) {
  344. key = target.ToString();
  345. DoTheThing(false, true);
  346. }
  347. void SeekForPrev(const Slice& target) {
  348. key = target.ToString();
  349. DoTheThing(false, false);
  350. }
  351. void SeekToFirst() { Seek(""); }
  352. void SeekToLast() {
  353. key = data->entries.back().key;
  354. DoTheThing(false, false);
  355. }
  356. void Next() {
  357. assert(Valid());
  358. DoTheThing(true, true);
  359. }
  360. void Prev() {
  361. assert(Valid());
  362. DoTheThing(true, false);
  363. }
  364. };
  365. } // anonymous namespace
  366. // Use an internal iterator that sometimes returns errors and sometimes
  367. // adds/removes entries on the fly. Do random operations on a DBIter and
  368. // check results.
  369. // TODO: can be improved for more coverage:
  370. // * Override IsKeyPinned() and IsValuePinned() to actually use
  371. // PinnedIteratorManager and check that there's no use-after free.
  372. // * Try different combinations of prefix_extractor, total_order_seek,
  373. // prefix_same_as_start, iterate_lower_bound, iterate_upper_bound.
  374. TEST_F(DBIteratorStressTest, StressTest) {
  375. // We use a deterministic RNG, and everything happens in a single thread.
  376. Random64 rnd(826909345792864532ll);
  377. auto gen_key = [&](int max_key) {
  378. assert(max_key > 0);
  379. int len = 0;
  380. int a = max_key;
  381. while (a) {
  382. a /= 10;
  383. ++len;
  384. }
  385. std::string s = std::to_string(rnd.Next() % static_cast<uint64_t>(max_key));
  386. s.insert(0, len - (int)s.size(), '0');
  387. return s;
  388. };
  389. Options options;
  390. options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
  391. ReadOptions ropt;
  392. size_t num_matching = 0;
  393. size_t num_at_end = 0;
  394. size_t num_not_ok = 0;
  395. size_t num_recently_removed = 0;
  396. // Number of iterations for each combination of parameters
  397. // (there are ~250 of those).
  398. // Tweak this to change the test run time.
  399. // As of the time of writing, the test takes ~4 seconds for value of 5000.
  400. const int num_iterations = 5000;
  401. // Enable this to print all the operations for debugging.
  402. bool trace = FLAGS_verbose;
  403. for (int num_entries : {5, 10, 100}) {
  404. for (double key_space : {0.1, 1.0, 3.0}) {
  405. for (ValueType prevalent_entry_type :
  406. {kTypeValue, kTypeDeletion, kTypeMerge}) {
  407. for (double error_probability : {0.01, 0.1}) {
  408. for (double mutation_probability : {0.01, 0.5}) {
  409. for (double target_hidden_fraction : {0.1, 0.5}) {
  410. std::string trace_str =
  411. "entries: " + std::to_string(num_entries) +
  412. ", key_space: " + std::to_string(key_space) +
  413. ", error_probability: " + std::to_string(error_probability) +
  414. ", mutation_probability: " +
  415. std::to_string(mutation_probability) +
  416. ", target_hidden_fraction: " +
  417. std::to_string(target_hidden_fraction);
  418. SCOPED_TRACE(trace_str);
  419. if (trace) {
  420. std::cout << trace_str << std::endl;
  421. }
  422. // Generate data.
  423. Data data;
  424. int max_key = (int)(num_entries * key_space) + 1;
  425. for (int i = 0; i < num_entries; ++i) {
  426. Entry e;
  427. e.key = gen_key(max_key);
  428. if (rnd.Next() % 10 != 0) {
  429. e.type = prevalent_entry_type;
  430. } else {
  431. const ValueType types[] = {kTypeValue, kTypeDeletion,
  432. kTypeMerge};
  433. e.type =
  434. types[rnd.Next() % (sizeof(types) / sizeof(types[0]))];
  435. }
  436. e.sequence = i;
  437. e.value = "v" + std::to_string(i);
  438. ParsedInternalKey internal_key(e.key, e.sequence, e.type);
  439. AppendInternalKey(&e.ikey, internal_key);
  440. data.entries.push_back(e);
  441. }
  442. std::sort(data.entries.begin(), data.entries.end());
  443. if (trace) {
  444. std::cout << "entries:";
  445. for (size_t i = 0; i < data.entries.size(); ++i) {
  446. Entry& e = data.entries[i];
  447. std::cout << "\n idx " << i << ": \"" << e.key << "\": \""
  448. << e.value << "\" seq: " << e.sequence << " type: "
  449. << (e.type == kTypeValue ? "val"
  450. : e.type == kTypeDeletion ? "del"
  451. : "merge");
  452. }
  453. std::cout << std::endl;
  454. }
  455. std::unique_ptr<Iterator> db_iter;
  456. std::unique_ptr<ReferenceIterator> ref_iter;
  457. for (int iteration = 0; iteration < num_iterations; ++iteration) {
  458. SCOPED_TRACE(iteration);
  459. // Create a new iterator every ~30 operations.
  460. if (db_iter == nullptr || rnd.Next() % 30 == 0) {
  461. uint64_t sequence = rnd.Next() % (data.entries.size() + 2);
  462. ref_iter.reset(new ReferenceIterator(&data, sequence));
  463. if (trace) {
  464. std::cout << "new iterator, seq: " << sequence << std::endl;
  465. }
  466. auto internal_iter =
  467. new StressTestIterator(&data, &rnd, BytewiseComparator());
  468. internal_iter->error_probability = error_probability;
  469. internal_iter->mutation_probability = mutation_probability;
  470. internal_iter->target_hidden_fraction =
  471. target_hidden_fraction;
  472. internal_iter->trace = trace;
  473. db_iter.reset(DBIter::NewIter(
  474. env_, ropt, ImmutableOptions(options),
  475. MutableCFOptions(options), BytewiseComparator(),
  476. internal_iter, /*version=*/nullptr, sequence,
  477. nullptr /*read_callback*/, /*active_mem=*/nullptr));
  478. }
  479. // Do a random operation. It's important to do it on ref_it
  480. // later than on db_iter to make sure ref_it sees the correct
  481. // recently_touched_keys.
  482. std::string old_key;
  483. bool forward = rnd.Next() % 2 > 0;
  484. // Do Next()/Prev() ~90% of the time.
  485. bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0;
  486. if (trace) {
  487. std::cout << iteration << ": ";
  488. }
  489. if (!seek) {
  490. assert(db_iter->Valid());
  491. old_key = ref_iter->key;
  492. if (trace) {
  493. std::cout << (forward ? "Next" : "Prev") << std::endl;
  494. }
  495. if (forward) {
  496. db_iter->Next();
  497. ref_iter->Next();
  498. } else {
  499. db_iter->Prev();
  500. ref_iter->Prev();
  501. }
  502. } else {
  503. data.recently_touched_keys.clear();
  504. // Do SeekToFirst less often than Seek.
  505. if (rnd.Next() % 4 == 0) {
  506. if (trace) {
  507. std::cout << (forward ? "SeekToFirst" : "SeekToLast")
  508. << std::endl;
  509. }
  510. if (forward) {
  511. old_key = "";
  512. db_iter->SeekToFirst();
  513. ref_iter->SeekToFirst();
  514. } else {
  515. old_key = data.entries.back().key;
  516. db_iter->SeekToLast();
  517. ref_iter->SeekToLast();
  518. }
  519. } else {
  520. old_key = gen_key(max_key);
  521. if (trace) {
  522. std::cout << (forward ? "Seek" : "SeekForPrev") << " \""
  523. << old_key << '"' << std::endl;
  524. }
  525. if (forward) {
  526. db_iter->Seek(old_key);
  527. ref_iter->Seek(old_key);
  528. } else {
  529. db_iter->SeekForPrev(old_key);
  530. ref_iter->SeekForPrev(old_key);
  531. }
  532. }
  533. }
  534. // Check the result.
  535. if (db_iter->Valid()) {
  536. ASSERT_TRUE(db_iter->status().ok());
  537. if (data.recently_touched_keys.count(
  538. db_iter->key().ToString())) {
  539. // Ended on a key that may have been mutated during the
  540. // operation. Reference iterator skips such keys, so we
  541. // can't check the exact result.
  542. // Check that the key moved in the right direction.
  543. if (forward) {
  544. if (seek) {
  545. ASSERT_GE(db_iter->key().ToString(), old_key);
  546. } else {
  547. ASSERT_GT(db_iter->key().ToString(), old_key);
  548. }
  549. } else {
  550. if (seek) {
  551. ASSERT_LE(db_iter->key().ToString(), old_key);
  552. } else {
  553. ASSERT_LT(db_iter->key().ToString(), old_key);
  554. }
  555. }
  556. if (ref_iter->Valid()) {
  557. // Check that DBIter didn't miss any non-mutated key.
  558. if (forward) {
  559. ASSERT_LT(db_iter->key().ToString(), ref_iter->key);
  560. } else {
  561. ASSERT_GT(db_iter->key().ToString(), ref_iter->key);
  562. }
  563. }
  564. // Tell the next iteration of the loop to reseek the
  565. // iterators.
  566. ref_iter->valid = false;
  567. ++num_recently_removed;
  568. } else {
  569. ASSERT_TRUE(ref_iter->Valid());
  570. ASSERT_EQ(ref_iter->key, db_iter->key().ToString());
  571. ASSERT_EQ(ref_iter->value, db_iter->value());
  572. ++num_matching;
  573. }
  574. } else if (db_iter->status().ok()) {
  575. ASSERT_FALSE(ref_iter->Valid());
  576. ++num_at_end;
  577. } else {
  578. // Non-ok status. Nothing to check here.
  579. // Tell the next iteration of the loop to reseek the
  580. // iterators.
  581. ref_iter->valid = false;
  582. ++num_not_ok;
  583. }
  584. }
  585. }
  586. }
  587. }
  588. }
  589. }
  590. }
  591. // Check that all cases were hit many times.
  592. EXPECT_GT(num_matching, 10000);
  593. EXPECT_GT(num_at_end, 10000);
  594. EXPECT_GT(num_not_ok, 10000);
  595. EXPECT_GT(num_recently_removed, 10000);
  596. std::cout << "stats:\n exact matches: " << num_matching
  597. << "\n end reached: " << num_at_end
  598. << "\n non-ok status: " << num_not_ok
  599. << "\n mutated on the fly: " << num_recently_removed << std::endl;
  600. }
  601. } // namespace ROCKSDB_NAMESPACE
  602. int main(int argc, char** argv) {
  603. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  604. ::testing::InitGoogleTest(&argc, argv);
  605. ParseCommandLineFlags(&argc, &argv, true);
  606. return RUN_ALL_TESTS();
  607. }