range_del_aggregator.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <algorithm>
  7. #include <iterator>
  8. #include <list>
  9. #include <map>
  10. #include <set>
  11. #include <string>
  12. #include <vector>
  13. #include "db/compaction/compaction_iteration_stats.h"
  14. #include "db/dbformat.h"
  15. #include "db/pinned_iterators_manager.h"
  16. #include "db/range_del_aggregator.h"
  17. #include "db/range_tombstone_fragmenter.h"
  18. #include "db/version_edit.h"
  19. #include "rocksdb/comparator.h"
  20. #include "rocksdb/types.h"
  21. #include "table/internal_iterator.h"
  22. #include "table/table_builder.h"
  23. #include "util/heap.h"
  24. #include "util/kv_map.h"
  25. namespace ROCKSDB_NAMESPACE {
  26. class TruncatedRangeDelIterator {
  27. public:
  28. TruncatedRangeDelIterator(
  29. std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
  30. const InternalKeyComparator* icmp, const InternalKey* smallest,
  31. const InternalKey* largest);
  32. void SetRangeDelReadSeqno(SequenceNumber read_seqno) {
  33. iter_->SetRangeDelReadSeqno(read_seqno);
  34. }
  35. bool Valid() const;
  36. void Next() { iter_->TopNext(); }
  37. void Prev() { iter_->TopPrev(); }
  38. void InternalNext() { iter_->Next(); }
  39. // Seeks to the tombstone with the highest visible sequence number that covers
  40. // target (a user key). If no such tombstone exists, the position will be at
  41. // the earliest tombstone that ends after target.
  42. // REQUIRES: target is a user key.
  43. void Seek(const Slice& target);
  44. // Seeks to the first range tombstone with end_key() > target.
  45. void SeekInternalKey(const Slice& target);
  46. // Seeks to the tombstone with the highest visible sequence number that covers
  47. // target (a user key). If no such tombstone exists, the position will be at
  48. // the latest tombstone that starts before target.
  49. void SeekForPrev(const Slice& target);
  50. void SeekToFirst();
  51. void SeekToLast();
  52. ParsedInternalKey start_key() const {
  53. return (smallest_ == nullptr ||
  54. icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
  55. ? iter_->parsed_start_key()
  56. : *smallest_;
  57. }
  58. ParsedInternalKey end_key() const {
  59. return (largest_ == nullptr ||
  60. icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
  61. ? iter_->parsed_end_key()
  62. : *largest_;
  63. }
  64. SequenceNumber seq() const { return iter_->seq(); }
  65. Slice timestamp() const {
  66. assert(icmp_->user_comparator()->timestamp_size());
  67. return iter_->timestamp();
  68. }
  69. void SetTimestampUpperBound(const Slice* ts_upper_bound) {
  70. iter_->SetTimestampUpperBound(ts_upper_bound);
  71. }
  72. std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
  73. SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
  74. SequenceNumber upper_bound() const { return iter_->upper_bound(); }
  75. SequenceNumber lower_bound() const { return iter_->lower_bound(); }
  76. private:
  77. std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
  78. const InternalKeyComparator* icmp_;
  79. const ParsedInternalKey* smallest_ = nullptr;
  80. const ParsedInternalKey* largest_ = nullptr;
  81. std::list<ParsedInternalKey> pinned_bounds_;
  82. const InternalKey* smallest_ikey_;
  83. const InternalKey* largest_ikey_;
  84. };
  85. struct SeqMaxComparator {
  86. bool operator()(const TruncatedRangeDelIterator* a,
  87. const TruncatedRangeDelIterator* b) const {
  88. return a->seq() > b->seq();
  89. }
  90. };
  91. struct StartKeyMinComparator {
  92. explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
  93. bool operator()(const TruncatedRangeDelIterator* a,
  94. const TruncatedRangeDelIterator* b) const {
  95. return icmp->Compare(a->start_key(), b->start_key()) > 0;
  96. }
  97. const InternalKeyComparator* icmp;
  98. };
  99. class ForwardRangeDelIterator {
  100. public:
  101. explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp);
  102. bool ShouldDelete(const ParsedInternalKey& parsed);
  103. void Invalidate();
  104. void AddNewIter(TruncatedRangeDelIterator* iter,
  105. const ParsedInternalKey& parsed) {
  106. iter->Seek(parsed.user_key);
  107. PushIter(iter, parsed);
  108. assert(active_iters_.size() == active_seqnums_.size());
  109. }
  110. size_t UnusedIdx() const { return unused_idx_; }
  111. void IncUnusedIdx() { unused_idx_++; }
  112. private:
  113. using ActiveSeqSet =
  114. std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
  115. struct EndKeyMinComparator {
  116. explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
  117. bool operator()(const ActiveSeqSet::const_iterator& a,
  118. const ActiveSeqSet::const_iterator& b) const {
  119. return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
  120. }
  121. const InternalKeyComparator* icmp;
  122. };
  123. void PushIter(TruncatedRangeDelIterator* iter,
  124. const ParsedInternalKey& parsed) {
  125. if (!iter->Valid()) {
  126. // The iterator has been fully consumed, so we don't need to add it to
  127. // either of the heaps.
  128. return;
  129. }
  130. int cmp = icmp_->Compare(parsed, iter->start_key());
  131. if (cmp < 0) {
  132. PushInactiveIter(iter);
  133. } else {
  134. PushActiveIter(iter);
  135. }
  136. }
  137. void PushActiveIter(TruncatedRangeDelIterator* iter) {
  138. auto seq_pos = active_seqnums_.insert(iter);
  139. active_iters_.push(seq_pos);
  140. }
  141. TruncatedRangeDelIterator* PopActiveIter() {
  142. auto active_top = active_iters_.top();
  143. auto iter = *active_top;
  144. active_iters_.pop();
  145. active_seqnums_.erase(active_top);
  146. return iter;
  147. }
  148. void PushInactiveIter(TruncatedRangeDelIterator* iter) {
  149. inactive_iters_.push(iter);
  150. }
  151. TruncatedRangeDelIterator* PopInactiveIter() {
  152. auto* iter = inactive_iters_.top();
  153. inactive_iters_.pop();
  154. return iter;
  155. }
  156. const InternalKeyComparator* icmp_;
  157. size_t unused_idx_;
  158. ActiveSeqSet active_seqnums_;
  159. BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
  160. BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
  161. };
  162. class ReverseRangeDelIterator {
  163. public:
  164. explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp);
  165. bool ShouldDelete(const ParsedInternalKey& parsed);
  166. void Invalidate();
  167. void AddNewIter(TruncatedRangeDelIterator* iter,
  168. const ParsedInternalKey& parsed) {
  169. iter->SeekForPrev(parsed.user_key);
  170. PushIter(iter, parsed);
  171. assert(active_iters_.size() == active_seqnums_.size());
  172. }
  173. size_t UnusedIdx() const { return unused_idx_; }
  174. void IncUnusedIdx() { unused_idx_++; }
  175. private:
  176. using ActiveSeqSet =
  177. std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
  178. struct EndKeyMaxComparator {
  179. explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
  180. bool operator()(const TruncatedRangeDelIterator* a,
  181. const TruncatedRangeDelIterator* b) const {
  182. return icmp->Compare(a->end_key(), b->end_key()) < 0;
  183. }
  184. const InternalKeyComparator* icmp;
  185. };
  186. struct StartKeyMaxComparator {
  187. explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
  188. bool operator()(const ActiveSeqSet::const_iterator& a,
  189. const ActiveSeqSet::const_iterator& b) const {
  190. return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
  191. }
  192. const InternalKeyComparator* icmp;
  193. };
  194. void PushIter(TruncatedRangeDelIterator* iter,
  195. const ParsedInternalKey& parsed) {
  196. if (!iter->Valid()) {
  197. // The iterator has been fully consumed, so we don't need to add it to
  198. // either of the heaps.
  199. } else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
  200. PushInactiveIter(iter);
  201. } else {
  202. PushActiveIter(iter);
  203. }
  204. }
  205. void PushActiveIter(TruncatedRangeDelIterator* iter) {
  206. auto seq_pos = active_seqnums_.insert(iter);
  207. active_iters_.push(seq_pos);
  208. }
  209. TruncatedRangeDelIterator* PopActiveIter() {
  210. auto active_top = active_iters_.top();
  211. auto iter = *active_top;
  212. active_iters_.pop();
  213. active_seqnums_.erase(active_top);
  214. return iter;
  215. }
  216. void PushInactiveIter(TruncatedRangeDelIterator* iter) {
  217. inactive_iters_.push(iter);
  218. }
  219. TruncatedRangeDelIterator* PopInactiveIter() {
  220. auto* iter = inactive_iters_.top();
  221. inactive_iters_.pop();
  222. return iter;
  223. }
  224. const InternalKeyComparator* icmp_;
  225. size_t unused_idx_;
  226. ActiveSeqSet active_seqnums_;
  227. BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
  228. BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
  229. };
  230. enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal };
  231. class RangeDelAggregator {
  232. public:
  233. explicit RangeDelAggregator(const InternalKeyComparator* icmp)
  234. : icmp_(icmp) {}
  235. virtual ~RangeDelAggregator() {}
  236. virtual void AddTombstones(
  237. std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
  238. const InternalKey* smallest = nullptr,
  239. const InternalKey* largest = nullptr) = 0;
  240. bool ShouldDelete(const Slice& ikey, RangeDelPositioningMode mode) {
  241. ParsedInternalKey parsed;
  242. Status pik_status =
  243. ParseInternalKey(ikey, &parsed, false /* log_err_key */); // TODO
  244. assert(pik_status.ok());
  245. if (!pik_status.ok()) {
  246. return false;
  247. }
  248. return ShouldDelete(parsed, mode);
  249. }
  250. virtual bool ShouldDelete(const ParsedInternalKey& parsed,
  251. RangeDelPositioningMode mode) = 0;
  252. virtual void InvalidateRangeDelMapPositions() = 0;
  253. virtual bool IsEmpty() const = 0;
  254. bool AddFile(uint64_t file_number) {
  255. return files_seen_.insert(file_number).second;
  256. }
  257. protected:
  258. class StripeRep {
  259. public:
  260. StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound,
  261. SequenceNumber lower_bound)
  262. : icmp_(icmp),
  263. forward_iter_(icmp),
  264. reverse_iter_(icmp),
  265. upper_bound_(upper_bound),
  266. lower_bound_(lower_bound) {}
  267. void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) {
  268. iters_.push_back(std::move(input_iter));
  269. }
  270. bool IsEmpty() const { return iters_.empty(); }
  271. bool ShouldDelete(const ParsedInternalKey& parsed,
  272. RangeDelPositioningMode mode);
  273. void Invalidate() {
  274. if (!IsEmpty()) {
  275. InvalidateForwardIter();
  276. InvalidateReverseIter();
  277. }
  278. }
  279. // If user-defined timestamp is enabled, `start` and `end` are user keys
  280. // with timestamp.
  281. bool IsRangeOverlapped(const Slice& start, const Slice& end);
  282. private:
  283. bool InStripe(SequenceNumber seq) const {
  284. return lower_bound_ <= seq && seq <= upper_bound_;
  285. }
  286. void InvalidateForwardIter() { forward_iter_.Invalidate(); }
  287. void InvalidateReverseIter() { reverse_iter_.Invalidate(); }
  288. const InternalKeyComparator* icmp_;
  289. std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
  290. ForwardRangeDelIterator forward_iter_;
  291. ReverseRangeDelIterator reverse_iter_;
  292. SequenceNumber upper_bound_;
  293. SequenceNumber lower_bound_;
  294. };
  295. const InternalKeyComparator* icmp_;
  296. private:
  297. std::set<uint64_t> files_seen_;
  298. };
  299. class ReadRangeDelAggregator final : public RangeDelAggregator {
  300. public:
  301. ReadRangeDelAggregator(const InternalKeyComparator* icmp,
  302. SequenceNumber upper_bound)
  303. : RangeDelAggregator(icmp),
  304. rep_(icmp, upper_bound, 0 /* lower_bound */) {}
  305. ~ReadRangeDelAggregator() override {}
  306. using RangeDelAggregator::ShouldDelete;
  307. void AddTombstones(
  308. std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
  309. const InternalKey* smallest = nullptr,
  310. const InternalKey* largest = nullptr) override;
  311. bool ShouldDelete(const ParsedInternalKey& parsed,
  312. RangeDelPositioningMode mode) final override {
  313. if (rep_.IsEmpty()) {
  314. return false;
  315. }
  316. return ShouldDeleteImpl(parsed, mode);
  317. }
  318. bool IsRangeOverlapped(const Slice& start, const Slice& end);
  319. void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); }
  320. bool IsEmpty() const override { return rep_.IsEmpty(); }
  321. private:
  322. StripeRep rep_;
  323. bool ShouldDeleteImpl(const ParsedInternalKey& parsed,
  324. RangeDelPositioningMode mode);
  325. };
  326. class CompactionRangeDelAggregator : public RangeDelAggregator {
  327. public:
  328. CompactionRangeDelAggregator(const InternalKeyComparator* icmp,
  329. const std::vector<SequenceNumber>& snapshots,
  330. const std::string* full_history_ts_low = nullptr,
  331. const std::string* trim_ts = nullptr)
  332. : RangeDelAggregator(icmp), snapshots_(&snapshots) {
  333. if (full_history_ts_low) {
  334. ts_upper_bound_ = *full_history_ts_low;
  335. }
  336. if (trim_ts) {
  337. trim_ts_ = *trim_ts;
  338. // Range tombstone newer than `trim_ts` or `full_history_ts_low` should
  339. // not be considered in ShouldDelete().
  340. if (ts_upper_bound_.empty()) {
  341. ts_upper_bound_ = trim_ts_;
  342. } else if (!trim_ts_.empty() && icmp->user_comparator()->CompareTimestamp(
  343. trim_ts_, ts_upper_bound_) < 0) {
  344. ts_upper_bound_ = trim_ts_;
  345. }
  346. }
  347. }
  348. ~CompactionRangeDelAggregator() override {}
  349. void AddTombstones(
  350. std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
  351. const InternalKey* smallest = nullptr,
  352. const InternalKey* largest = nullptr) override;
  353. using RangeDelAggregator::ShouldDelete;
  354. bool ShouldDelete(const ParsedInternalKey& parsed,
  355. RangeDelPositioningMode mode) override;
  356. bool IsRangeOverlapped(const Slice& start, const Slice& end);
  357. void InvalidateRangeDelMapPositions() override {
  358. for (auto& rep : reps_) {
  359. rep.second.Invalidate();
  360. }
  361. }
  362. bool IsEmpty() const override {
  363. for (const auto& rep : reps_) {
  364. if (!rep.second.IsEmpty()) {
  365. return false;
  366. }
  367. }
  368. return true;
  369. }
  370. // Creates an iterator over all the range tombstones in the aggregator, for
  371. // use in compaction.
  372. //
  373. // NOTE: the internal key boundaries are used for optimization purposes to
  374. // reduce the number of tombstones that are passed to the fragmenter; they do
  375. // not guarantee that the resulting iterator only contains range tombstones
  376. // that cover keys in the provided range. If required, these bounds must be
  377. // enforced during iteration.
  378. std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator(
  379. const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr);
  380. private:
  381. std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_;
  382. std::map<SequenceNumber, StripeRep> reps_;
  383. const std::vector<SequenceNumber>* snapshots_;
  384. // min over full_history_ts_low and trim_ts_
  385. Slice ts_upper_bound_{};
  386. Slice trim_ts_{};
  387. };
  388. } // namespace ROCKSDB_NAMESPACE