| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 | //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.//  This source code is licensed under both the GPLv2 (found in the//  COPYING file in the root directory) and Apache 2.0 License//  (found in the LICENSE.Apache file in the root directory).#pragma once#include <algorithm>#include <cstdint>#include <functional>#include "port/port.h"#include "util/autovector.h"namespace ROCKSDB_NAMESPACE {// Binary heap implementation optimized for use in multi-way merge sort.// Comparison to std::priority_queue:// - In libstdc++, std::priority_queue::pop() usually performs just over logN//   comparisons but never fewer.// - std::priority_queue does not have a replace-top operation, requiring a//   pop+push.  If the replacement element is the new top, this requires//   around 2logN comparisons.// - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN//   comparisons.// - This heap provides a replace_top() operation which requires [1, 2logN]//   comparisons.  When the replacement element is also the new top, this//   takes just 1 or 2 comparisons.//// The last property can yield an order-of-magnitude performance improvement// when merge-sorting real-world non-random data.  If the merge operation is// likely to take chunks of elements from the same input stream, only 1// comparison per element is needed.  In RocksDB-land, this happens when// compacting a database where keys are not randomly distributed across L0// files but nearby keys are likely to be in the same L0 file.//// The container uses the same counterintuitive ordering as// std::priority_queue: the comparison operator is expected to provide the// less-than relation, but top() will return the maximum.template<typename T, typename Compare = std::less<T>>class BinaryHeap { public:  BinaryHeap() { }  explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { }  void push(const T& value) {    data_.push_back(value);    upheap(data_.size() - 1);  }  void push(T&& value) {    data_.push_back(std::move(value));    upheap(data_.size() - 1);  }  const T& top() const {    assert(!empty());    return data_.front();  }  //替换top的元素,替换完了后还要调整堆  void replace_top(const T& value) {    assert(!empty());    data_.front() = value;    downheap(get_root());  }  //替换top的元素,替换完了后还要调整堆  void replace_top(T&& value) {    assert(!empty());    data_.front() = std::move(value);    downheap(get_root());  }  void pop() {    assert(!empty());    data_.front() = std::move(data_.back());    data_.pop_back();    if (!empty()) {      downheap(get_root());    } else {      reset_root_cmp_cache();    }  }  void swap(BinaryHeap &other) {    std::swap(cmp_, other.cmp_);    data_.swap(other.data_);    std::swap(root_cmp_cache_, other.root_cmp_cache_);  }  void clear() {    data_.clear();    reset_root_cmp_cache();  }  bool empty() const { return data_.empty(); }  size_t size() const { return data_.size(); }  void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; } private:  static inline size_t get_root() { return 0; }  static inline size_t get_parent(size_t index) { return (index - 1) / 2; }  static inline size_t get_left(size_t index) { return 2 * index + 1; }  static inline size_t get_right(size_t index) { return 2 * index + 2; }  void upheap(size_t index) {    T v = std::move(data_[index]);    while (index > get_root()) {      const size_t parent = get_parent(index);      if (!cmp_(data_[parent], v)) {        break;      }      data_[index] = std::move(data_[parent]);      index = parent;    }    data_[index] = std::move(v);    reset_root_cmp_cache();  }  void downheap(size_t index) {    T v = std::move(data_[index]);    size_t picked_child = port::kMaxSizet;    while (1) {      const size_t left_child = get_left(index);      //超过了堆容量,直接break      if (get_left(index) >= data_.size()) {        break;      }      const size_t right_child = left_child + 1;//获取右节点      assert(right_child == get_right(index));//说明使用get_right和left_child+1都是一样的      //下面就是一个向下调整的过程,最终就形成一个最小堆      picked_child = left_child;      if (index == 0 && root_cmp_cache_ < data_.size()) {        picked_child = root_cmp_cache_;      } else if (right_child < data_.size() &&                 cmp_(data_[left_child], data_[right_child])) {        picked_child = right_child;      }      if (!cmp_(v, data_[picked_child])) {        break;      }      data_[index] = std::move(data_[picked_child]);      index = picked_child;    }    if (index == 0) {      // We did not change anything in the tree except for the value      // of the root node, left and right child did not change, we can      // cache that `picked_child` is the smallest child      // so next time we compare againist it directly      root_cmp_cache_ = picked_child;    } else {      // the tree changed, reset cache      reset_root_cmp_cache();    }    data_[index] = std::move(v);  }  Compare cmp_;  autovector<T> data_;  // Used to reduce number of cmp_ calls in downheap()  size_t root_cmp_cache_ = port::kMaxSizet;};}  // namespace ROCKSDB_NAMESPACE
 |