multiget_context.h 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <algorithm>
  7. #include <array>
  8. #include <string>
  9. #include "db/lookup_key.h"
  10. #include "db/merge_context.h"
  11. #include "rocksdb/env.h"
  12. #include "rocksdb/statistics.h"
  13. #include "rocksdb/types.h"
  14. #include "util/autovector.h"
  15. namespace ROCKSDB_NAMESPACE {
  16. class GetContext;
  17. struct KeyContext {
  18. const Slice* key;
  19. LookupKey* lkey;
  20. Slice ukey;
  21. Slice ikey;
  22. ColumnFamilyHandle* column_family;
  23. Status* s;
  24. MergeContext merge_context;
  25. SequenceNumber max_covering_tombstone_seq;
  26. bool key_exists;
  27. void* cb_arg;
  28. PinnableSlice* value;
  29. GetContext* get_context;
  30. KeyContext(ColumnFamilyHandle* col_family, const Slice& user_key,
  31. PinnableSlice* val, Status* stat)
  32. : key(&user_key),
  33. lkey(nullptr),
  34. column_family(col_family),
  35. s(stat),
  36. max_covering_tombstone_seq(0),
  37. key_exists(false),
  38. cb_arg(nullptr),
  39. value(val),
  40. get_context(nullptr) {}
  41. KeyContext() = default;
  42. };
  43. // The MultiGetContext class is a container for the sorted list of keys that
  44. // we need to lookup in a batch. Its main purpose is to make batch execution
  45. // easier by allowing various stages of the MultiGet lookups to operate on
  46. // subsets of keys, potentially non-contiguous. In order to accomplish this,
  47. // it defines the following classes -
  48. //
  49. // MultiGetContext::Range
  50. // MultiGetContext::Range::Iterator
  51. // MultiGetContext::Range::IteratorWrapper
  52. //
  53. // Here is an example of how this can be used -
  54. //
  55. // {
  56. // MultiGetContext ctx(...);
  57. // MultiGetContext::Range range = ctx.GetMultiGetRange();
  58. //
  59. // // Iterate to determine some subset of the keys
  60. // MultiGetContext::Range::Iterator start = range.begin();
  61. // MultiGetContext::Range::Iterator end = ...;
  62. //
  63. // // Make a new range with a subset of keys
  64. // MultiGetContext::Range subrange(range, start, end);
  65. //
  66. // // Define an auxillary vector, if needed, to hold additional data for
  67. // // each key
  68. // std::array<Foo, MultiGetContext::MAX_BATCH_SIZE> aux;
  69. //
  70. // // Iterate over the subrange and the auxillary vector simultaneously
  71. // MultiGetContext::Range::Iterator iter = subrange.begin();
  72. // for (; iter != subrange.end(); ++iter) {
  73. // KeyContext& key = *iter;
  74. // Foo& aux_key = aux_iter[iter.index()];
  75. // ...
  76. // }
  77. // }
  78. class MultiGetContext {
  79. public:
  80. // Limit the number of keys in a batch to this number. Benchmarks show that
  81. // there is negligible benefit for batches exceeding this. Keeping this < 64
  82. // simplifies iteration, as well as reduces the amount of stack allocations
  83. // htat need to be performed
  84. static const int MAX_BATCH_SIZE = 32;
  85. MultiGetContext(autovector<KeyContext*, MAX_BATCH_SIZE>* sorted_keys,
  86. size_t begin, size_t num_keys, SequenceNumber snapshot)
  87. : num_keys_(num_keys),
  88. value_mask_(0),
  89. lookup_key_ptr_(reinterpret_cast<LookupKey*>(lookup_key_stack_buf)) {
  90. if (num_keys > MAX_LOOKUP_KEYS_ON_STACK) {
  91. lookup_key_heap_buf.reset(new char[sizeof(LookupKey) * num_keys]);
  92. lookup_key_ptr_ = reinterpret_cast<LookupKey*>(
  93. lookup_key_heap_buf.get());
  94. }
  95. for (size_t iter = 0; iter != num_keys_; ++iter) {
  96. // autovector may not be contiguous storage, so make a copy
  97. sorted_keys_[iter] = (*sorted_keys)[begin + iter];
  98. sorted_keys_[iter]->lkey = new (&lookup_key_ptr_[iter])
  99. LookupKey(*sorted_keys_[iter]->key, snapshot);
  100. sorted_keys_[iter]->ukey = sorted_keys_[iter]->lkey->user_key();
  101. sorted_keys_[iter]->ikey = sorted_keys_[iter]->lkey->internal_key();
  102. }
  103. }
  104. ~MultiGetContext() {
  105. for (size_t i = 0; i < num_keys_; ++i) {
  106. lookup_key_ptr_[i].~LookupKey();
  107. }
  108. }
  109. private:
  110. static const int MAX_LOOKUP_KEYS_ON_STACK = 16;
  111. alignas(alignof(LookupKey))
  112. char lookup_key_stack_buf[sizeof(LookupKey) * MAX_LOOKUP_KEYS_ON_STACK];
  113. std::array<KeyContext*, MAX_BATCH_SIZE> sorted_keys_;
  114. size_t num_keys_;
  115. uint64_t value_mask_;
  116. std::unique_ptr<char[]> lookup_key_heap_buf;
  117. LookupKey* lookup_key_ptr_;
  118. public:
  119. // MultiGetContext::Range - Specifies a range of keys, by start and end index,
  120. // from the parent MultiGetContext. Each range contains a bit vector that
  121. // indicates whether the corresponding keys need to be processed or skipped.
  122. // A Range object can be copy constructed, and the new object inherits the
  123. // original Range's bit vector. This is useful for progressively skipping
  124. // keys as the lookup goes through various stages. For example, when looking
  125. // up keys in the same SST file, a Range is created excluding keys not
  126. // belonging to that file. A new Range is then copy constructed and individual
  127. // keys are skipped based on bloom filter lookup.
  128. class Range {
  129. public:
  130. // MultiGetContext::Range::Iterator - A forward iterator that iterates over
  131. // non-skippable keys in a Range, as well as keys whose final value has been
  132. // found. The latter is tracked by MultiGetContext::value_mask_
  133. class Iterator {
  134. public:
  135. // -- iterator traits
  136. typedef Iterator self_type;
  137. typedef KeyContext value_type;
  138. typedef KeyContext& reference;
  139. typedef KeyContext* pointer;
  140. typedef int difference_type;
  141. typedef std::forward_iterator_tag iterator_category;
  142. Iterator(const Range* range, size_t idx)
  143. : range_(range), ctx_(range->ctx_), index_(idx) {
  144. while (index_ < range_->end_ &&
  145. (1ull << index_) &
  146. (range_->ctx_->value_mask_ | range_->skip_mask_))
  147. index_++;
  148. }
  149. Iterator(const Iterator&) = default;
  150. Iterator& operator=(const Iterator&) = default;
  151. Iterator& operator++() {
  152. while (++index_ < range_->end_ &&
  153. (1ull << index_) &
  154. (range_->ctx_->value_mask_ | range_->skip_mask_))
  155. ;
  156. return *this;
  157. }
  158. bool operator==(Iterator other) const {
  159. assert(range_->ctx_ == other.range_->ctx_);
  160. return index_ == other.index_;
  161. }
  162. bool operator!=(Iterator other) const {
  163. assert(range_->ctx_ == other.range_->ctx_);
  164. return index_ != other.index_;
  165. }
  166. KeyContext& operator*() {
  167. assert(index_ < range_->end_ && index_ >= range_->start_);
  168. return *(ctx_->sorted_keys_[index_]);
  169. }
  170. KeyContext* operator->() {
  171. assert(index_ < range_->end_ && index_ >= range_->start_);
  172. return ctx_->sorted_keys_[index_];
  173. }
  174. size_t index() { return index_; }
  175. private:
  176. friend Range;
  177. const Range* range_;
  178. const MultiGetContext* ctx_;
  179. size_t index_;
  180. };
  181. Range(const Range& mget_range,
  182. const Iterator& first,
  183. const Iterator& last) {
  184. ctx_ = mget_range.ctx_;
  185. start_ = first.index_;
  186. end_ = last.index_;
  187. skip_mask_ = mget_range.skip_mask_;
  188. }
  189. Range() = default;
  190. Iterator begin() const { return Iterator(this, start_); }
  191. Iterator end() const { return Iterator(this, end_); }
  192. bool empty() {
  193. return (((1ull << end_) - 1) & ~((1ull << start_) - 1) &
  194. ~(ctx_->value_mask_ | skip_mask_)) == 0;
  195. }
  196. void SkipKey(const Iterator& iter) { skip_mask_ |= 1ull << iter.index_; }
  197. // Update the value_mask_ in MultiGetContext so its
  198. // immediately reflected in all the Range Iterators
  199. void MarkKeyDone(Iterator& iter) {
  200. ctx_->value_mask_ |= (1ull << iter.index_);
  201. }
  202. bool CheckKeyDone(Iterator& iter) {
  203. return ctx_->value_mask_ & (1ull << iter.index_);
  204. }
  205. uint64_t KeysLeft() {
  206. uint64_t new_val = skip_mask_ | ctx_->value_mask_;
  207. uint64_t count = 0;
  208. while (new_val) {
  209. new_val = new_val & (new_val - 1);
  210. count++;
  211. }
  212. return end_ - count;
  213. }
  214. private:
  215. friend MultiGetContext;
  216. MultiGetContext* ctx_;
  217. size_t start_;
  218. size_t end_;
  219. uint64_t skip_mask_;
  220. Range(MultiGetContext* ctx, size_t num_keys)
  221. : ctx_(ctx), start_(0), end_(num_keys), skip_mask_(0) {}
  222. };
  223. // Return the initial range that encompasses all the keys in the batch
  224. Range GetMultiGetRange() { return Range(this, num_keys_); }
  225. };
  226. } // namespace ROCKSDB_NAMESPACE