heap.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <algorithm>
  7. #include <cstdint>
  8. #include <functional>
  9. #include "port/port.h"
  10. #include "util/autovector.h"
  11. namespace ROCKSDB_NAMESPACE {
  12. // Binary heap implementation optimized for use in multi-way merge sort.
  13. // Comparison to std::priority_queue:
  14. // - In libstdc++, std::priority_queue::pop() usually performs just over logN
  15. // comparisons but never fewer.
  16. // - std::priority_queue does not have a replace-top operation, requiring a
  17. // pop+push. If the replacement element is the new top, this requires
  18. // around 2logN comparisons.
  19. // - This heap's pop() uses a "schoolbook" downheap which requires up to ~2logN
  20. // comparisons.
  21. // - This heap provides a replace_top() operation which requires [1, 2logN]
  22. // comparisons. When the replacement element is also the new top, this
  23. // takes just 1 or 2 comparisons.
  24. //
  25. // The last property can yield an order-of-magnitude performance improvement
  26. // when merge-sorting real-world non-random data. If the merge operation is
  27. // likely to take chunks of elements from the same input stream, only 1
  28. // comparison per element is needed. In RocksDB-land, this happens when
  29. // compacting a database where keys are not randomly distributed across L0
  30. // files but nearby keys are likely to be in the same L0 file.
  31. //
  32. // The container uses the same counterintuitive ordering as
  33. // std::priority_queue: the comparison operator is expected to provide the
  34. // less-than relation, but top() will return the maximum.
  35. template<typename T, typename Compare = std::less<T>>
  36. class BinaryHeap {
  37. public:
  38. BinaryHeap() { }
  39. explicit BinaryHeap(Compare cmp) : cmp_(std::move(cmp)) { }
  40. void push(const T& value) {
  41. data_.push_back(value);
  42. upheap(data_.size() - 1);
  43. }
  44. void push(T&& value) {
  45. data_.push_back(std::move(value));
  46. upheap(data_.size() - 1);
  47. }
  48. const T& top() const {
  49. assert(!empty());
  50. return data_.front();
  51. }
  52. //替换top的元素,替换完了后还要调整堆
  53. void replace_top(const T& value) {
  54. assert(!empty());
  55. data_.front() = value;
  56. downheap(get_root());
  57. }
  58. //替换top的元素,替换完了后还要调整堆
  59. void replace_top(T&& value) {
  60. assert(!empty());
  61. data_.front() = std::move(value);
  62. downheap(get_root());
  63. }
  64. void pop() {
  65. assert(!empty());
  66. data_.front() = std::move(data_.back());
  67. data_.pop_back();
  68. if (!empty()) {
  69. downheap(get_root());
  70. } else {
  71. reset_root_cmp_cache();
  72. }
  73. }
  74. void swap(BinaryHeap &other) {
  75. std::swap(cmp_, other.cmp_);
  76. data_.swap(other.data_);
  77. std::swap(root_cmp_cache_, other.root_cmp_cache_);
  78. }
  79. void clear() {
  80. data_.clear();
  81. reset_root_cmp_cache();
  82. }
  83. bool empty() const { return data_.empty(); }
  84. size_t size() const { return data_.size(); }
  85. void reset_root_cmp_cache() { root_cmp_cache_ = port::kMaxSizet; }
  86. private:
  87. static inline size_t get_root() { return 0; }
  88. static inline size_t get_parent(size_t index) { return (index - 1) / 2; }
  89. static inline size_t get_left(size_t index) { return 2 * index + 1; }
  90. static inline size_t get_right(size_t index) { return 2 * index + 2; }
  91. void upheap(size_t index) {
  92. T v = std::move(data_[index]);
  93. while (index > get_root()) {
  94. const size_t parent = get_parent(index);
  95. if (!cmp_(data_[parent], v)) {
  96. break;
  97. }
  98. data_[index] = std::move(data_[parent]);
  99. index = parent;
  100. }
  101. data_[index] = std::move(v);
  102. reset_root_cmp_cache();
  103. }
  104. void downheap(size_t index) {
  105. T v = std::move(data_[index]);
  106. size_t picked_child = port::kMaxSizet;
  107. while (1) {
  108. const size_t left_child = get_left(index);
  109. //超过了堆容量,直接break
  110. if (get_left(index) >= data_.size()) {
  111. break;
  112. }
  113. const size_t right_child = left_child + 1;//获取右节点
  114. assert(right_child == get_right(index));//说明使用get_right和left_child+1都是一样的
  115. //下面就是一个向下调整的过程,最终就形成一个最小堆
  116. picked_child = left_child;
  117. if (index == 0 && root_cmp_cache_ < data_.size()) {
  118. picked_child = root_cmp_cache_;
  119. } else if (right_child < data_.size() &&
  120. cmp_(data_[left_child], data_[right_child])) {
  121. picked_child = right_child;
  122. }
  123. if (!cmp_(v, data_[picked_child])) {
  124. break;
  125. }
  126. data_[index] = std::move(data_[picked_child]);
  127. index = picked_child;
  128. }
  129. if (index == 0) {
  130. // We did not change anything in the tree except for the value
  131. // of the root node, left and right child did not change, we can
  132. // cache that `picked_child` is the smallest child
  133. // so next time we compare againist it directly
  134. root_cmp_cache_ = picked_child;
  135. } else {
  136. // the tree changed, reset cache
  137. reset_root_cmp_cache();
  138. }
  139. data_[index] = std::move(v);
  140. }
  141. Compare cmp_;
  142. autovector<T> data_;
  143. // Used to reduce number of cmp_ calls in downheap()
  144. size_t root_cmp_cache_ = port::kMaxSizet;
  145. };
  146. } // namespace ROCKSDB_NAMESPACE