| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441 |
- // Copyright (c) 2018-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- #pragma once
- #include <algorithm>
- #include <iterator>
- #include <list>
- #include <map>
- #include <set>
- #include <string>
- #include <vector>
- #include "db/compaction/compaction_iteration_stats.h"
- #include "db/dbformat.h"
- #include "db/pinned_iterators_manager.h"
- #include "db/range_del_aggregator.h"
- #include "db/range_tombstone_fragmenter.h"
- #include "db/version_edit.h"
- #include "rocksdb/comparator.h"
- #include "rocksdb/types.h"
- #include "table/internal_iterator.h"
- #include "table/scoped_arena_iterator.h"
- #include "table/table_builder.h"
- #include "util/heap.h"
- #include "util/kv_map.h"
- namespace ROCKSDB_NAMESPACE {
- class TruncatedRangeDelIterator {
- public:
- TruncatedRangeDelIterator(
- std::unique_ptr<FragmentedRangeTombstoneIterator> iter,
- const InternalKeyComparator* icmp, const InternalKey* smallest,
- const InternalKey* largest);
- bool Valid() const;
- void Next();
- void Prev();
- void InternalNext();
- // Seeks to the tombstone with the highest viisble sequence number that covers
- // target (a user key). If no such tombstone exists, the position will be at
- // the earliest tombstone that ends after target.
- void Seek(const Slice& target);
- // Seeks to the tombstone with the highest viisble sequence number that covers
- // target (a user key). If no such tombstone exists, the position will be at
- // the latest tombstone that starts before target.
- void SeekForPrev(const Slice& target);
- void SeekToFirst();
- void SeekToLast();
- ParsedInternalKey start_key() const {
- return (smallest_ == nullptr ||
- icmp_->Compare(*smallest_, iter_->parsed_start_key()) <= 0)
- ? iter_->parsed_start_key()
- : *smallest_;
- }
- ParsedInternalKey end_key() const {
- return (largest_ == nullptr ||
- icmp_->Compare(iter_->parsed_end_key(), *largest_) <= 0)
- ? iter_->parsed_end_key()
- : *largest_;
- }
- SequenceNumber seq() const { return iter_->seq(); }
- std::map<SequenceNumber, std::unique_ptr<TruncatedRangeDelIterator>>
- SplitBySnapshot(const std::vector<SequenceNumber>& snapshots);
- SequenceNumber upper_bound() const { return iter_->upper_bound(); }
- SequenceNumber lower_bound() const { return iter_->lower_bound(); }
- private:
- std::unique_ptr<FragmentedRangeTombstoneIterator> iter_;
- const InternalKeyComparator* icmp_;
- const ParsedInternalKey* smallest_ = nullptr;
- const ParsedInternalKey* largest_ = nullptr;
- std::list<ParsedInternalKey> pinned_bounds_;
- const InternalKey* smallest_ikey_;
- const InternalKey* largest_ikey_;
- };
- struct SeqMaxComparator {
- bool operator()(const TruncatedRangeDelIterator* a,
- const TruncatedRangeDelIterator* b) const {
- return a->seq() > b->seq();
- }
- };
- struct StartKeyMinComparator {
- explicit StartKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
- bool operator()(const TruncatedRangeDelIterator* a,
- const TruncatedRangeDelIterator* b) const {
- return icmp->Compare(a->start_key(), b->start_key()) > 0;
- }
- const InternalKeyComparator* icmp;
- };
- class ForwardRangeDelIterator {
- public:
- explicit ForwardRangeDelIterator(const InternalKeyComparator* icmp);
- bool ShouldDelete(const ParsedInternalKey& parsed);
- void Invalidate();
- void AddNewIter(TruncatedRangeDelIterator* iter,
- const ParsedInternalKey& parsed) {
- iter->Seek(parsed.user_key);
- PushIter(iter, parsed);
- assert(active_iters_.size() == active_seqnums_.size());
- }
- size_t UnusedIdx() const { return unused_idx_; }
- void IncUnusedIdx() { unused_idx_++; }
- private:
- using ActiveSeqSet =
- std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
- struct EndKeyMinComparator {
- explicit EndKeyMinComparator(const InternalKeyComparator* c) : icmp(c) {}
- bool operator()(const ActiveSeqSet::const_iterator& a,
- const ActiveSeqSet::const_iterator& b) const {
- return icmp->Compare((*a)->end_key(), (*b)->end_key()) > 0;
- }
- const InternalKeyComparator* icmp;
- };
- void PushIter(TruncatedRangeDelIterator* iter,
- const ParsedInternalKey& parsed) {
- if (!iter->Valid()) {
- // The iterator has been fully consumed, so we don't need to add it to
- // either of the heaps.
- return;
- }
- int cmp = icmp_->Compare(parsed, iter->start_key());
- if (cmp < 0) {
- PushInactiveIter(iter);
- } else {
- PushActiveIter(iter);
- }
- }
- void PushActiveIter(TruncatedRangeDelIterator* iter) {
- auto seq_pos = active_seqnums_.insert(iter);
- active_iters_.push(seq_pos);
- }
- TruncatedRangeDelIterator* PopActiveIter() {
- auto active_top = active_iters_.top();
- auto iter = *active_top;
- active_iters_.pop();
- active_seqnums_.erase(active_top);
- return iter;
- }
- void PushInactiveIter(TruncatedRangeDelIterator* iter) {
- inactive_iters_.push(iter);
- }
- TruncatedRangeDelIterator* PopInactiveIter() {
- auto* iter = inactive_iters_.top();
- inactive_iters_.pop();
- return iter;
- }
- const InternalKeyComparator* icmp_;
- size_t unused_idx_;
- ActiveSeqSet active_seqnums_;
- BinaryHeap<ActiveSeqSet::const_iterator, EndKeyMinComparator> active_iters_;
- BinaryHeap<TruncatedRangeDelIterator*, StartKeyMinComparator> inactive_iters_;
- };
- class ReverseRangeDelIterator {
- public:
- explicit ReverseRangeDelIterator(const InternalKeyComparator* icmp);
- bool ShouldDelete(const ParsedInternalKey& parsed);
- void Invalidate();
- void AddNewIter(TruncatedRangeDelIterator* iter,
- const ParsedInternalKey& parsed) {
- iter->SeekForPrev(parsed.user_key);
- PushIter(iter, parsed);
- assert(active_iters_.size() == active_seqnums_.size());
- }
- size_t UnusedIdx() const { return unused_idx_; }
- void IncUnusedIdx() { unused_idx_++; }
- private:
- using ActiveSeqSet =
- std::multiset<TruncatedRangeDelIterator*, SeqMaxComparator>;
- struct EndKeyMaxComparator {
- explicit EndKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
- bool operator()(const TruncatedRangeDelIterator* a,
- const TruncatedRangeDelIterator* b) const {
- return icmp->Compare(a->end_key(), b->end_key()) < 0;
- }
- const InternalKeyComparator* icmp;
- };
- struct StartKeyMaxComparator {
- explicit StartKeyMaxComparator(const InternalKeyComparator* c) : icmp(c) {}
- bool operator()(const ActiveSeqSet::const_iterator& a,
- const ActiveSeqSet::const_iterator& b) const {
- return icmp->Compare((*a)->start_key(), (*b)->start_key()) < 0;
- }
- const InternalKeyComparator* icmp;
- };
- void PushIter(TruncatedRangeDelIterator* iter,
- const ParsedInternalKey& parsed) {
- if (!iter->Valid()) {
- // The iterator has been fully consumed, so we don't need to add it to
- // either of the heaps.
- } else if (icmp_->Compare(iter->end_key(), parsed) <= 0) {
- PushInactiveIter(iter);
- } else {
- PushActiveIter(iter);
- }
- }
- void PushActiveIter(TruncatedRangeDelIterator* iter) {
- auto seq_pos = active_seqnums_.insert(iter);
- active_iters_.push(seq_pos);
- }
- TruncatedRangeDelIterator* PopActiveIter() {
- auto active_top = active_iters_.top();
- auto iter = *active_top;
- active_iters_.pop();
- active_seqnums_.erase(active_top);
- return iter;
- }
- void PushInactiveIter(TruncatedRangeDelIterator* iter) {
- inactive_iters_.push(iter);
- }
- TruncatedRangeDelIterator* PopInactiveIter() {
- auto* iter = inactive_iters_.top();
- inactive_iters_.pop();
- return iter;
- }
- const InternalKeyComparator* icmp_;
- size_t unused_idx_;
- ActiveSeqSet active_seqnums_;
- BinaryHeap<ActiveSeqSet::const_iterator, StartKeyMaxComparator> active_iters_;
- BinaryHeap<TruncatedRangeDelIterator*, EndKeyMaxComparator> inactive_iters_;
- };
- enum class RangeDelPositioningMode { kForwardTraversal, kBackwardTraversal };
- class RangeDelAggregator {
- public:
- explicit RangeDelAggregator(const InternalKeyComparator* icmp)
- : icmp_(icmp) {}
- virtual ~RangeDelAggregator() {}
- virtual void AddTombstones(
- std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
- const InternalKey* smallest = nullptr,
- const InternalKey* largest = nullptr) = 0;
- bool ShouldDelete(const Slice& key, RangeDelPositioningMode mode) {
- ParsedInternalKey parsed;
- if (!ParseInternalKey(key, &parsed)) {
- return false;
- }
- return ShouldDelete(parsed, mode);
- }
- virtual bool ShouldDelete(const ParsedInternalKey& parsed,
- RangeDelPositioningMode mode) = 0;
- virtual void InvalidateRangeDelMapPositions() = 0;
- virtual bool IsEmpty() const = 0;
- bool AddFile(uint64_t file_number) {
- return files_seen_.insert(file_number).second;
- }
- protected:
- class StripeRep {
- public:
- StripeRep(const InternalKeyComparator* icmp, SequenceNumber upper_bound,
- SequenceNumber lower_bound)
- : icmp_(icmp),
- forward_iter_(icmp),
- reverse_iter_(icmp),
- upper_bound_(upper_bound),
- lower_bound_(lower_bound) {}
- void AddTombstones(std::unique_ptr<TruncatedRangeDelIterator> input_iter) {
- iters_.push_back(std::move(input_iter));
- }
- bool IsEmpty() const { return iters_.empty(); }
- bool ShouldDelete(const ParsedInternalKey& parsed,
- RangeDelPositioningMode mode);
- void Invalidate() {
- if (!IsEmpty()) {
- InvalidateForwardIter();
- InvalidateReverseIter();
- }
- }
- bool IsRangeOverlapped(const Slice& start, const Slice& end);
- private:
- bool InStripe(SequenceNumber seq) const {
- return lower_bound_ <= seq && seq <= upper_bound_;
- }
- void InvalidateForwardIter() { forward_iter_.Invalidate(); }
- void InvalidateReverseIter() { reverse_iter_.Invalidate(); }
- const InternalKeyComparator* icmp_;
- std::vector<std::unique_ptr<TruncatedRangeDelIterator>> iters_;
- ForwardRangeDelIterator forward_iter_;
- ReverseRangeDelIterator reverse_iter_;
- SequenceNumber upper_bound_;
- SequenceNumber lower_bound_;
- };
- const InternalKeyComparator* icmp_;
- private:
- std::set<uint64_t> files_seen_;
- };
- class ReadRangeDelAggregator final : public RangeDelAggregator {
- public:
- ReadRangeDelAggregator(const InternalKeyComparator* icmp,
- SequenceNumber upper_bound)
- : RangeDelAggregator(icmp),
- rep_(icmp, upper_bound, 0 /* lower_bound */) {}
- ~ReadRangeDelAggregator() override {}
- using RangeDelAggregator::ShouldDelete;
- void AddTombstones(
- std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
- const InternalKey* smallest = nullptr,
- const InternalKey* largest = nullptr) override;
- bool ShouldDelete(const ParsedInternalKey& parsed,
- RangeDelPositioningMode mode) final override {
- if (rep_.IsEmpty()) {
- return false;
- }
- return ShouldDeleteImpl(parsed, mode);
- }
- bool IsRangeOverlapped(const Slice& start, const Slice& end);
- void InvalidateRangeDelMapPositions() override { rep_.Invalidate(); }
- bool IsEmpty() const override { return rep_.IsEmpty(); }
- private:
- StripeRep rep_;
- bool ShouldDeleteImpl(const ParsedInternalKey& parsed,
- RangeDelPositioningMode mode);
- };
- class CompactionRangeDelAggregator : public RangeDelAggregator {
- public:
- CompactionRangeDelAggregator(const InternalKeyComparator* icmp,
- const std::vector<SequenceNumber>& snapshots)
- : RangeDelAggregator(icmp), snapshots_(&snapshots) {}
- ~CompactionRangeDelAggregator() override {}
- void AddTombstones(
- std::unique_ptr<FragmentedRangeTombstoneIterator> input_iter,
- const InternalKey* smallest = nullptr,
- const InternalKey* largest = nullptr) override;
- using RangeDelAggregator::ShouldDelete;
- bool ShouldDelete(const ParsedInternalKey& parsed,
- RangeDelPositioningMode mode) override;
- bool IsRangeOverlapped(const Slice& start, const Slice& end);
- void InvalidateRangeDelMapPositions() override {
- for (auto& rep : reps_) {
- rep.second.Invalidate();
- }
- }
- bool IsEmpty() const override {
- for (const auto& rep : reps_) {
- if (!rep.second.IsEmpty()) {
- return false;
- }
- }
- return true;
- }
- // Creates an iterator over all the range tombstones in the aggregator, for
- // use in compaction. Nullptr arguments indicate that the iterator range is
- // unbounded.
- // NOTE: the boundaries are used for optimization purposes to reduce the
- // number of tombstones that are passed to the fragmenter; they do not
- // guarantee that the resulting iterator only contains range tombstones that
- // cover keys in the provided range. If required, these bounds must be
- // enforced during iteration.
- std::unique_ptr<FragmentedRangeTombstoneIterator> NewIterator(
- const Slice* lower_bound = nullptr, const Slice* upper_bound = nullptr,
- bool upper_bound_inclusive = false);
- private:
- std::vector<std::unique_ptr<TruncatedRangeDelIterator>> parent_iters_;
- std::map<SequenceNumber, StripeRep> reps_;
- const std::vector<SequenceNumber>* snapshots_;
- };
- } // namespace ROCKSDB_NAMESPACE
|