range_tombstone_fragmenter.h 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <list>
  7. #include <memory>
  8. #include <set>
  9. #include <string>
  10. #include <vector>
  11. #include "db/dbformat.h"
  12. #include "db/pinned_iterators_manager.h"
  13. #include "rocksdb/status.h"
  14. #include "table/internal_iterator.h"
  15. namespace ROCKSDB_NAMESPACE {
  16. struct FragmentedRangeTombstoneList {
  17. public:
  18. // A compact representation of a "stack" of range tombstone fragments, which
  19. // start and end at the same user keys but have different sequence numbers.
  20. // The members seq_start_idx and seq_end_idx are intended to be parameters to
  21. // seq_iter().
  22. struct RangeTombstoneStack {
  23. RangeTombstoneStack(const Slice& start, const Slice& end, size_t start_idx,
  24. size_t end_idx)
  25. : start_key(start),
  26. end_key(end),
  27. seq_start_idx(start_idx),
  28. seq_end_idx(end_idx) {}
  29. Slice start_key;
  30. Slice end_key;
  31. size_t seq_start_idx;
  32. size_t seq_end_idx;
  33. };
  34. FragmentedRangeTombstoneList(
  35. std::unique_ptr<InternalIterator> unfragmented_tombstones,
  36. const InternalKeyComparator& icmp, bool for_compaction = false,
  37. const std::vector<SequenceNumber>& snapshots = {});
  38. std::vector<RangeTombstoneStack>::const_iterator begin() const {
  39. return tombstones_.begin();
  40. }
  41. std::vector<RangeTombstoneStack>::const_iterator end() const {
  42. return tombstones_.end();
  43. }
  44. std::vector<SequenceNumber>::const_iterator seq_iter(size_t idx) const {
  45. return std::next(tombstone_seqs_.begin(), idx);
  46. }
  47. std::vector<SequenceNumber>::const_iterator seq_begin() const {
  48. return tombstone_seqs_.begin();
  49. }
  50. std::vector<SequenceNumber>::const_iterator seq_end() const {
  51. return tombstone_seqs_.end();
  52. }
  53. bool empty() const { return tombstones_.empty(); }
  54. // Returns true if the stored tombstones contain with one with a sequence
  55. // number in [lower, upper].
  56. bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const;
  57. private:
  58. // Given an ordered range tombstone iterator unfragmented_tombstones,
  59. // "fragment" the tombstones into non-overlapping pieces, and store them in
  60. // tombstones_ and tombstone_seqs_.
  61. void FragmentTombstones(
  62. std::unique_ptr<InternalIterator> unfragmented_tombstones,
  63. const InternalKeyComparator& icmp, bool for_compaction,
  64. const std::vector<SequenceNumber>& snapshots);
  65. std::vector<RangeTombstoneStack> tombstones_;
  66. std::vector<SequenceNumber> tombstone_seqs_;
  67. std::set<SequenceNumber> seq_set_;
  68. std::list<std::string> pinned_slices_;
  69. PinnedIteratorsManager pinned_iters_mgr_;
  70. };
  71. // FragmentedRangeTombstoneIterator converts an InternalIterator of a range-del
  72. // meta block into an iterator over non-overlapping tombstone fragments. The
  73. // tombstone fragmentation process should be more efficient than the range
  74. // tombstone collapsing algorithm in RangeDelAggregator because this leverages
  75. // the internal key ordering already provided by the input iterator, if
  76. // applicable (when the iterator is unsorted, a new sorted iterator is created
  77. // before proceeding). If there are few overlaps, creating a
  78. // FragmentedRangeTombstoneIterator should be O(n), while the RangeDelAggregator
  79. // tombstone collapsing is always O(n log n).
  80. class FragmentedRangeTombstoneIterator : public InternalIterator {
  81. public:
  82. FragmentedRangeTombstoneIterator(
  83. const FragmentedRangeTombstoneList* tombstones,
  84. const InternalKeyComparator& icmp, SequenceNumber upper_bound,
  85. SequenceNumber lower_bound = 0);
  86. FragmentedRangeTombstoneIterator(
  87. const std::shared_ptr<const FragmentedRangeTombstoneList>& tombstones,
  88. const InternalKeyComparator& icmp, SequenceNumber upper_bound,
  89. SequenceNumber lower_bound = 0);
  90. void SeekToFirst() override;
  91. void SeekToLast() override;
  92. void SeekToTopFirst();
  93. void SeekToTopLast();
  94. // NOTE: Seek and SeekForPrev do not behave in the way InternalIterator
  95. // seeking should behave. This is OK because they are not currently used, but
  96. // eventually FragmentedRangeTombstoneIterator should no longer implement
  97. // InternalIterator.
  98. //
  99. // Seeks to the range tombstone that covers target at a seqnum in the
  100. // snapshot. If no such tombstone exists, seek to the earliest tombstone in
  101. // the snapshot that ends after target.
  102. void Seek(const Slice& target) override;
  103. // Seeks to the range tombstone that covers target at a seqnum in the
  104. // snapshot. If no such tombstone exists, seek to the latest tombstone in the
  105. // snapshot that starts before target.
  106. void SeekForPrev(const Slice& target) override;
  107. void Next() override;
  108. void Prev() override;
  109. void TopNext();
  110. void TopPrev();
  111. bool Valid() const override;
  112. Slice key() const override {
  113. MaybePinKey();
  114. return current_start_key_.Encode();
  115. }
  116. Slice value() const override { return pos_->end_key; }
  117. bool IsKeyPinned() const override { return false; }
  118. bool IsValuePinned() const override { return true; }
  119. Status status() const override { return Status::OK(); }
  120. bool empty() const { return tombstones_->empty(); }
  121. void Invalidate() {
  122. pos_ = tombstones_->end();
  123. seq_pos_ = tombstones_->seq_end();
  124. pinned_pos_ = tombstones_->end();
  125. pinned_seq_pos_ = tombstones_->seq_end();
  126. }
  127. RangeTombstone Tombstone() const {
  128. return RangeTombstone(start_key(), end_key(), seq());
  129. }
  130. Slice start_key() const { return pos_->start_key; }
  131. Slice end_key() const { return pos_->end_key; }
  132. SequenceNumber seq() const { return *seq_pos_; }
  133. ParsedInternalKey parsed_start_key() const {
  134. return ParsedInternalKey(pos_->start_key, kMaxSequenceNumber,
  135. kTypeRangeDeletion);
  136. }
  137. ParsedInternalKey parsed_end_key() const {
  138. return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber,
  139. kTypeRangeDeletion);
  140. }
  141. SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key);
  142. // Splits the iterator into n+1 iterators (where n is the number of
  143. // snapshots), each providing a view over a "stripe" of sequence numbers. The
  144. // iterators are keyed by the upper bound of their ranges (the provided
  145. // snapshots + kMaxSequenceNumber).
  146. //
  147. // NOTE: the iterators in the returned map are no longer valid if their
  148. // parent iterator is deleted, since they do not modify the refcount of the
  149. // underlying tombstone list. Therefore, this map should be deleted before
  150. // the parent iterator.
  151. std::map<SequenceNumber, std::unique_ptr<FragmentedRangeTombstoneIterator>>
  152. SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
  153. SequenceNumber upper_bound() const { return upper_bound_; }
  154. SequenceNumber lower_bound() const { return lower_bound_; }
  155. private:
  156. using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack;
  157. struct RangeTombstoneStackStartComparator {
  158. explicit RangeTombstoneStackStartComparator(const Comparator* c) : cmp(c) {}
  159. bool operator()(const RangeTombstoneStack& a,
  160. const RangeTombstoneStack& b) const {
  161. return cmp->Compare(a.start_key, b.start_key) < 0;
  162. }
  163. bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
  164. return cmp->Compare(a.start_key, b) < 0;
  165. }
  166. bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
  167. return cmp->Compare(a, b.start_key) < 0;
  168. }
  169. const Comparator* cmp;
  170. };
  171. struct RangeTombstoneStackEndComparator {
  172. explicit RangeTombstoneStackEndComparator(const Comparator* c) : cmp(c) {}
  173. bool operator()(const RangeTombstoneStack& a,
  174. const RangeTombstoneStack& b) const {
  175. return cmp->Compare(a.end_key, b.end_key) < 0;
  176. }
  177. bool operator()(const RangeTombstoneStack& a, const Slice& b) const {
  178. return cmp->Compare(a.end_key, b) < 0;
  179. }
  180. bool operator()(const Slice& a, const RangeTombstoneStack& b) const {
  181. return cmp->Compare(a, b.end_key) < 0;
  182. }
  183. const Comparator* cmp;
  184. };
  185. void MaybePinKey() const {
  186. if (pos_ != tombstones_->end() && seq_pos_ != tombstones_->seq_end() &&
  187. (pinned_pos_ != pos_ || pinned_seq_pos_ != seq_pos_)) {
  188. current_start_key_.Set(pos_->start_key, *seq_pos_, kTypeRangeDeletion);
  189. pinned_pos_ = pos_;
  190. pinned_seq_pos_ = seq_pos_;
  191. }
  192. }
  193. void SeekToCoveringTombstone(const Slice& key);
  194. void SeekForPrevToCoveringTombstone(const Slice& key);
  195. void ScanForwardToVisibleTombstone();
  196. void ScanBackwardToVisibleTombstone();
  197. bool ValidPos() const {
  198. return Valid() && seq_pos_ != tombstones_->seq_iter(pos_->seq_end_idx);
  199. }
  200. const RangeTombstoneStackStartComparator tombstone_start_cmp_;
  201. const RangeTombstoneStackEndComparator tombstone_end_cmp_;
  202. const InternalKeyComparator* icmp_;
  203. const Comparator* ucmp_;
  204. std::shared_ptr<const FragmentedRangeTombstoneList> tombstones_ref_;
  205. const FragmentedRangeTombstoneList* tombstones_;
  206. SequenceNumber upper_bound_;
  207. SequenceNumber lower_bound_;
  208. std::vector<RangeTombstoneStack>::const_iterator pos_;
  209. std::vector<SequenceNumber>::const_iterator seq_pos_;
  210. mutable std::vector<RangeTombstoneStack>::const_iterator pinned_pos_;
  211. mutable std::vector<SequenceNumber>::const_iterator pinned_seq_pos_;
  212. mutable InternalKey current_start_key_;
  213. };
  214. } // namespace ROCKSDB_NAMESPACE