merging_iterator.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "table/merging_iterator.h"
  10. #include <string>
  11. #include <vector>
  12. #include "db/dbformat.h"
  13. #include "db/pinned_iterators_manager.h"
  14. #include "memory/arena.h"
  15. #include "monitoring/perf_context_imp.h"
  16. #include "rocksdb/comparator.h"
  17. #include "rocksdb/iterator.h"
  18. #include "rocksdb/options.h"
  19. #include "table/internal_iterator.h"
  20. #include "table/iter_heap.h"
  21. #include "table/iterator_wrapper.h"
  22. #include "test_util/sync_point.h"
  23. #include "util/autovector.h"
  24. #include "util/heap.h"
  25. #include "util/stop_watch.h"
  26. namespace ROCKSDB_NAMESPACE {
  27. // Without anonymous namespace here, we fail the warning -Wmissing-prototypes
  28. namespace {
  29. typedef BinaryHeap<IteratorWrapper*, MaxIteratorComparator> MergerMaxIterHeap;
  30. typedef BinaryHeap<IteratorWrapper*, MinIteratorComparator> MergerMinIterHeap;
  31. } // namespace
  32. const size_t kNumIterReserve = 4;
  33. /*MergingIterator其实就是维护了多个子迭代器到一个最小堆里,
  34. 堆顶总是所有子迭代器中key最小的那个子迭代器
  35. 每次调用Next的时候,都需要在所有迭代器中查找;(其实只需要堆顶的子迭代器调Next(),
  36. 然后和其他子迭代器的当前key进行比较,哪个子迭代器最小谁就是堆顶的子迭代器
  37. 如多个子迭代器为:
  38. iter1 = 1, 3, 6
  39. iter2 = 2, 3, 8
  40. 那么此时堆顶就是iter1
  41. 测试流程见 TEST_F(DBIterWithMergeIterTest, InnerMergeIterator1)
  42. */
  43. class MergingIterator : public InternalIterator {
  44. public:
  45. MergingIterator(const InternalKeyComparator* comparator,
  46. InternalIterator** children, int n, bool is_arena_mode,
  47. bool prefix_seek_mode)
  48. : is_arena_mode_(is_arena_mode),
  49. comparator_(comparator),
  50. current_(nullptr),
  51. direction_(kForward),
  52. minHeap_(comparator_),
  53. prefix_seek_mode_(prefix_seek_mode),
  54. pinned_iters_mgr_(nullptr) {
  55. children_.resize(n);
  56. for (int i = 0; i < n; i++) {
  57. children_[i].Set(children[i]);
  58. }
  59. for (auto& child : children_) {
  60. AddToMinHeapOrCheckStatus(&child);
  61. }
  62. current_ = CurrentForward();
  63. }
  64. void considerStatus(Status s) {
  65. if (!s.ok() && status_.ok()) {
  66. status_ = s;
  67. }
  68. }
  69. virtual void AddIterator(InternalIterator* iter) {
  70. assert(direction_ == kForward);
  71. children_.emplace_back(iter);
  72. if (pinned_iters_mgr_) {
  73. iter->SetPinnedItersMgr(pinned_iters_mgr_);
  74. }
  75. auto new_wrapper = children_.back();
  76. AddToMinHeapOrCheckStatus(&new_wrapper);
  77. if (new_wrapper.Valid()) {
  78. current_ = CurrentForward();
  79. }
  80. }
  81. ~MergingIterator() override {
  82. for (auto& child : children_) {
  83. child.DeleteIter(is_arena_mode_);
  84. }
  85. }
  86. bool Valid() const override { return current_ != nullptr && status_.ok(); }
  87. Status status() const override { return status_; }
  88. void SeekToFirst() override {
  89. ClearHeaps();
  90. status_ = Status::OK();
  91. for (auto& child : children_) {
  92. child.SeekToFirst();
  93. AddToMinHeapOrCheckStatus(&child);
  94. }
  95. direction_ = kForward;
  96. current_ = CurrentForward();
  97. }
  98. void SeekToLast() override {
  99. ClearHeaps();
  100. InitMaxHeap();
  101. status_ = Status::OK();
  102. for (auto& child : children_) {
  103. child.SeekToLast();
  104. AddToMaxHeapOrCheckStatus(&child);
  105. }
  106. direction_ = kReverse;
  107. current_ = CurrentReverse();
  108. }
  109. void Seek(const Slice& target) override {
  110. ClearHeaps();
  111. status_ = Status::OK();
  112. for (auto& child : children_) {
  113. {
  114. PERF_TIMER_GUARD(seek_child_seek_time);
  115. child.Seek(target);
  116. }
  117. PERF_COUNTER_ADD(seek_child_seek_count, 1);
  118. {
  119. // Strictly, we timed slightly more than min heap operation,
  120. // but these operations are very cheap.
  121. PERF_TIMER_GUARD(seek_min_heap_time);
  122. AddToMinHeapOrCheckStatus(&child);
  123. }
  124. }
  125. direction_ = kForward;
  126. {
  127. PERF_TIMER_GUARD(seek_min_heap_time);
  128. current_ = CurrentForward();
  129. }
  130. }
  131. void SeekForPrev(const Slice& target) override {
  132. ClearHeaps();
  133. InitMaxHeap();
  134. status_ = Status::OK();
  135. for (auto& child : children_) {
  136. {
  137. PERF_TIMER_GUARD(seek_child_seek_time);
  138. child.SeekForPrev(target);
  139. }
  140. PERF_COUNTER_ADD(seek_child_seek_count, 1);
  141. {
  142. PERF_TIMER_GUARD(seek_max_heap_time);
  143. AddToMaxHeapOrCheckStatus(&child);
  144. }
  145. }
  146. direction_ = kReverse;
  147. {
  148. PERF_TIMER_GUARD(seek_max_heap_time);
  149. current_ = CurrentReverse();
  150. }
  151. }
  152. void Next() override {
  153. assert(Valid());
  154. // Ensure that all children are positioned after key().
  155. // If we are moving in the forward direction, it is already
  156. // true for all of the non-current children since current_ is
  157. // the smallest child and key() == current_->key().
  158. if (direction_ != kForward) {
  159. SwitchToForward();
  160. // The loop advanced all non-current children to be > key() so current_
  161. // should still be strictly the smallest key.
  162. }
  163. // For the heap modifications below to be correct, current_ must be the
  164. // current top of the heap.
  165. assert(current_ == CurrentForward());
  166. // as the current points to the current record. move the iterator forward.
  167. current_->Next();
  168. if (current_->Valid()) {
  169. // current is still valid after the Next() call above. Call
  170. // replace_top() to restore the heap property. When the same child
  171. // iterator yields a sequence of keys, this is cheap.
  172. assert(current_->status().ok());
  173. minHeap_.replace_top(current_);
  174. } else {
  175. // current stopped being valid, remove it from the heap.
  176. considerStatus(current_->status());
  177. minHeap_.pop();
  178. }
  179. current_ = CurrentForward();
  180. }
  181. bool NextAndGetResult(IterateResult* result) override {
  182. Next();
  183. bool is_valid = Valid();
  184. if (is_valid) {
  185. result->key = key();
  186. result->may_be_out_of_upper_bound = MayBeOutOfUpperBound();
  187. }
  188. return is_valid;
  189. }
  190. void Prev() override {
  191. assert(Valid());
  192. // Ensure that all children are positioned before key().
  193. // If we are moving in the reverse direction, it is already
  194. // true for all of the non-current children since current_ is
  195. // the largest child and key() == current_->key().
  196. if (direction_ != kReverse) {
  197. // Otherwise, retreat the non-current children. We retreat current_
  198. // just after the if-block.
  199. SwitchToBackward();
  200. }
  201. // For the heap modifications below to be correct, current_ must be the
  202. // current top of the heap.
  203. assert(current_ == CurrentReverse());
  204. current_->Prev();
  205. if (current_->Valid()) {
  206. // current is still valid after the Prev() call above. Call
  207. // replace_top() to restore the heap property. When the same child
  208. // iterator yields a sequence of keys, this is cheap.
  209. assert(current_->status().ok());
  210. maxHeap_->replace_top(current_);
  211. } else {
  212. // current stopped being valid, remove it from the heap.
  213. considerStatus(current_->status());
  214. maxHeap_->pop();
  215. }
  216. current_ = CurrentReverse();
  217. }
  218. Slice key() const override {
  219. assert(Valid());
  220. return current_->key();
  221. }
  222. Slice value() const override {
  223. assert(Valid());
  224. return current_->value();
  225. }
  226. // Here we simply relay MayBeOutOfLowerBound/MayBeOutOfUpperBound result
  227. // from current child iterator. Potentially as long as one of child iterator
  228. // report out of bound is not possible, we know current key is within bound.
  229. bool MayBeOutOfLowerBound() override {
  230. assert(Valid());
  231. return current_->MayBeOutOfLowerBound();
  232. }
  233. bool MayBeOutOfUpperBound() override {
  234. assert(Valid());
  235. return current_->MayBeOutOfUpperBound();
  236. }
  237. void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
  238. pinned_iters_mgr_ = pinned_iters_mgr;
  239. for (auto& child : children_) {
  240. child.SetPinnedItersMgr(pinned_iters_mgr);
  241. }
  242. }
  243. bool IsKeyPinned() const override {
  244. assert(Valid());
  245. return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
  246. current_->IsKeyPinned();
  247. }
  248. bool IsValuePinned() const override {
  249. assert(Valid());
  250. return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
  251. current_->IsValuePinned();
  252. }
  253. private:
  254. // Clears heaps for both directions, used when changing direction or seeking
  255. void ClearHeaps();
  256. // Ensures that maxHeap_ is initialized when starting to go in the reverse
  257. // direction
  258. void InitMaxHeap();
  259. bool is_arena_mode_;
  260. const InternalKeyComparator* comparator_;
  261. autovector<IteratorWrapper, kNumIterReserve> children_;
  262. // Cached pointer to child iterator with the current key, or nullptr if no
  263. // child iterators are valid. This is the top of minHeap_ or maxHeap_
  264. // depending on the direction.
  265. IteratorWrapper* current_;
  266. // If any of the children have non-ok status, this is one of them.
  267. Status status_;
  268. // Which direction is the iterator moving?
  269. enum Direction {
  270. kForward,
  271. kReverse
  272. };
  273. Direction direction_;
  274. MergerMinIterHeap minHeap_;
  275. bool prefix_seek_mode_;
  276. // Max heap is used for reverse iteration, which is way less common than
  277. // forward. Lazily initialize it to save memory.
  278. std::unique_ptr<MergerMaxIterHeap> maxHeap_;
  279. PinnedIteratorsManager* pinned_iters_mgr_;
  280. // In forward direction, process a child that is not in the min heap.
  281. // If valid, add to the min heap. Otherwise, check status.
  282. void AddToMinHeapOrCheckStatus(IteratorWrapper*);
  283. // In backward direction, process a child that is not in the max heap.
  284. // If valid, add to the min heap. Otherwise, check status.
  285. void AddToMaxHeapOrCheckStatus(IteratorWrapper*);
  286. void SwitchToForward();
  287. // Switch the direction from forward to backward without changing the
  288. // position. Iterator should still be valid.
  289. void SwitchToBackward();
  290. IteratorWrapper* CurrentForward() const {
  291. assert(direction_ == kForward);
  292. return !minHeap_.empty() ? minHeap_.top() : nullptr;
  293. }
  294. IteratorWrapper* CurrentReverse() const {
  295. assert(direction_ == kReverse);
  296. assert(maxHeap_);
  297. return !maxHeap_->empty() ? maxHeap_->top() : nullptr;
  298. }
  299. };
  300. void MergingIterator::AddToMinHeapOrCheckStatus(IteratorWrapper* child) {
  301. if (child->Valid()) {
  302. assert(child->status().ok());
  303. minHeap_.push(child);
  304. } else {
  305. considerStatus(child->status());
  306. }
  307. }
  308. void MergingIterator::AddToMaxHeapOrCheckStatus(IteratorWrapper* child) {
  309. if (child->Valid()) {
  310. assert(child->status().ok());
  311. maxHeap_->push(child);
  312. } else {
  313. considerStatus(child->status());
  314. }
  315. }
  316. void MergingIterator::SwitchToForward() {
  317. // Otherwise, advance the non-current children. We advance current_
  318. // just after the if-block.
  319. ClearHeaps();
  320. Slice target = key();
  321. for (auto& child : children_) {
  322. if (&child != current_) {
  323. child.Seek(target);
  324. if (child.Valid() && comparator_->Equal(target, child.key())) {
  325. assert(child.status().ok());
  326. child.Next();
  327. }
  328. }
  329. AddToMinHeapOrCheckStatus(&child);
  330. }
  331. direction_ = kForward;
  332. }
  333. void MergingIterator::SwitchToBackward() {
  334. ClearHeaps();
  335. InitMaxHeap();
  336. Slice target = key();
  337. for (auto& child : children_) {
  338. if (&child != current_) {
  339. child.SeekForPrev(target);
  340. TEST_SYNC_POINT_CALLBACK("MergeIterator::Prev:BeforePrev", &child);
  341. if (child.Valid() && comparator_->Equal(target, child.key())) {
  342. assert(child.status().ok());
  343. child.Prev();
  344. }
  345. }
  346. AddToMaxHeapOrCheckStatus(&child);
  347. }
  348. direction_ = kReverse;
  349. if (!prefix_seek_mode_) {
  350. // Note that we don't do assert(current_ == CurrentReverse()) here
  351. // because it is possible to have some keys larger than the seek-key
  352. // inserted between Seek() and SeekToLast(), which makes current_ not
  353. // equal to CurrentReverse().
  354. current_ = CurrentReverse();
  355. }
  356. assert(current_ == CurrentReverse());
  357. }
  358. void MergingIterator::ClearHeaps() {
  359. minHeap_.clear();
  360. if (maxHeap_) {
  361. maxHeap_->clear();
  362. }
  363. }
  364. void MergingIterator::InitMaxHeap() {
  365. if (!maxHeap_) {
  366. maxHeap_.reset(new MergerMaxIterHeap(comparator_));
  367. }
  368. }
  369. InternalIterator* NewMergingIterator(const InternalKeyComparator* cmp,
  370. InternalIterator** list, int n,
  371. Arena* arena, bool prefix_seek_mode) {
  372. assert(n >= 0);
  373. if (n == 0) {
  374. return NewEmptyInternalIterator<Slice>(arena);
  375. } else if (n == 1) {
  376. return list[0];
  377. } else {
  378. if (arena == nullptr) {
  379. return new MergingIterator(cmp, list, n, false, prefix_seek_mode);
  380. } else {
  381. auto mem = arena->AllocateAligned(sizeof(MergingIterator));
  382. return new (mem) MergingIterator(cmp, list, n, true, prefix_seek_mode);
  383. }
  384. }
  385. }
  386. MergeIteratorBuilder::MergeIteratorBuilder(
  387. const InternalKeyComparator* comparator, Arena* a, bool prefix_seek_mode)
  388. : first_iter(nullptr), use_merging_iter(false), arena(a) {
  389. auto mem = arena->AllocateAligned(sizeof(MergingIterator));
  390. merge_iter =
  391. new (mem) MergingIterator(comparator, nullptr, 0, true, prefix_seek_mode);
  392. }
  393. MergeIteratorBuilder::~MergeIteratorBuilder() {
  394. if (first_iter != nullptr) {
  395. first_iter->~InternalIterator();
  396. }
  397. if (merge_iter != nullptr) {
  398. merge_iter->~MergingIterator();
  399. }
  400. }
  401. void MergeIteratorBuilder::AddIterator(InternalIterator* iter) {
  402. if (!use_merging_iter && first_iter != nullptr) {
  403. merge_iter->AddIterator(first_iter);
  404. use_merging_iter = true;
  405. first_iter = nullptr;
  406. }
  407. if (use_merging_iter) {
  408. merge_iter->AddIterator(iter);
  409. } else {
  410. first_iter = iter;
  411. }
  412. }
  413. InternalIterator* MergeIteratorBuilder::Finish() {
  414. InternalIterator* ret = nullptr;
  415. if (!use_merging_iter) {
  416. ret = first_iter;
  417. first_iter = nullptr;
  418. } else {
  419. ret = merge_iter;
  420. merge_iter = nullptr;
  421. }
  422. return ret;
  423. }
  424. } // namespace ROCKSDB_NAMESPACE