get_context.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <string>
  7. #include "db/read_callback.h"
  8. #include "rocksdb/types.h"
  9. namespace ROCKSDB_NAMESPACE {
  10. class BlobFetcher;
  11. class Comparator;
  12. class Logger;
  13. class MergeContext;
  14. class MergeOperator;
  15. class PinnableWideColumns;
  16. class PinnedIteratorsManager;
  17. class Statistics;
  18. class SystemClock;
  19. struct ParsedInternalKey;
  20. // Data structure for accumulating statistics during a point lookup. At the
  21. // end of the point lookup, the corresponding ticker stats are updated. This
  22. // avoids the overhead of frequent ticker stats updates
  23. struct GetContextStats {
  24. uint64_t num_cache_hit = 0;
  25. uint64_t num_cache_index_hit = 0;
  26. uint64_t num_cache_data_hit = 0;
  27. uint64_t num_cache_filter_hit = 0;
  28. uint64_t num_cache_compression_dict_hit = 0;
  29. uint64_t num_cache_index_miss = 0;
  30. uint64_t num_cache_filter_miss = 0;
  31. uint64_t num_cache_data_miss = 0;
  32. uint64_t num_cache_compression_dict_miss = 0;
  33. uint64_t num_cache_bytes_read = 0;
  34. uint64_t num_cache_miss = 0;
  35. uint64_t num_cache_add = 0;
  36. uint64_t num_cache_add_redundant = 0;
  37. uint64_t num_cache_bytes_write = 0;
  38. uint64_t num_cache_index_add = 0;
  39. uint64_t num_cache_index_add_redundant = 0;
  40. uint64_t num_cache_index_bytes_insert = 0;
  41. uint64_t num_cache_data_add = 0;
  42. uint64_t num_cache_data_add_redundant = 0;
  43. uint64_t num_cache_data_bytes_insert = 0;
  44. uint64_t num_cache_filter_add = 0;
  45. uint64_t num_cache_filter_add_redundant = 0;
  46. uint64_t num_cache_filter_bytes_insert = 0;
  47. uint64_t num_cache_compression_dict_add = 0;
  48. uint64_t num_cache_compression_dict_add_redundant = 0;
  49. uint64_t num_cache_compression_dict_bytes_insert = 0;
  50. // MultiGet stats.
  51. uint64_t num_filter_read = 0;
  52. uint64_t num_index_read = 0;
  53. uint64_t num_sst_read = 0;
  54. };
  55. // A class to hold context about a point lookup, such as pointer to value
  56. // slice, key, merge context etc, as well as the current state of the
  57. // lookup. Any user using GetContext to track the lookup result must call
  58. // SaveValue() whenever the internal key is found. This can happen
  59. // repeatedly in case of merge operands. In case the key may exist with
  60. // high probability, but IO is required to confirm and the user doesn't allow
  61. // it, MarkKeyMayExist() must be called instead of SaveValue().
  62. class GetContext {
  63. public:
  64. // Current state of the point lookup. All except kNotFound and kMerge are
  65. // terminal states
  66. enum GetState {
  67. kNotFound,
  68. kFound,
  69. kDeleted,
  70. kCorrupt,
  71. kMerge, // saver contains the current merge result (the operands)
  72. kUnexpectedBlobIndex,
  73. kMergeOperatorFailed,
  74. };
  75. GetContextStats get_context_stats_;
  76. // Constructor
  77. // @param value Holds the value corresponding to user_key. If its nullptr
  78. // then return all merge operands corresponding to user_key
  79. // via merge_context
  80. // @param value_found If non-nullptr, set to false if key may be present
  81. // but we can't be certain because we cannot do IO
  82. // @param max_covering_tombstone_seq Pointer to highest sequence number of
  83. // range deletion covering the key. When an internal key
  84. // is found with smaller sequence number, the lookup
  85. // terminates
  86. // @param seq If non-nullptr, the sequence number of the found key will be
  87. // saved here
  88. // @param callback Pointer to ReadCallback to perform additional checks
  89. // for visibility of a key
  90. // @param is_blob_index If non-nullptr, will be used to indicate if a found
  91. // key is of type blob index
  92. // @param do_merge True if value associated with user_key has to be returned
  93. // and false if all the merge operands associated with user_key has to be
  94. // returned. Id do_merge=false then all the merge operands are stored in
  95. // merge_context and they are never merged. The value pointer is untouched.
  96. GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
  97. Logger* logger, Statistics* statistics, GetState init_state,
  98. const Slice& user_key, PinnableSlice* value,
  99. PinnableWideColumns* columns, bool* value_found,
  100. MergeContext* merge_context, bool do_merge,
  101. SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
  102. SequenceNumber* seq = nullptr,
  103. PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
  104. ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
  105. uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
  106. GetContext(const Comparator* ucmp, const MergeOperator* merge_operator,
  107. Logger* logger, Statistics* statistics, GetState init_state,
  108. const Slice& user_key, PinnableSlice* value,
  109. PinnableWideColumns* columns, std::string* timestamp,
  110. bool* value_found, MergeContext* merge_context, bool do_merge,
  111. SequenceNumber* max_covering_tombstone_seq, SystemClock* clock,
  112. SequenceNumber* seq = nullptr,
  113. PinnedIteratorsManager* _pinned_iters_mgr = nullptr,
  114. ReadCallback* callback = nullptr, bool* is_blob_index = nullptr,
  115. uint64_t tracing_get_id = 0, BlobFetcher* blob_fetcher = nullptr);
  116. GetContext() = delete;
  117. // This can be called to indicate that a key may be present, but cannot be
  118. // confirmed due to IO not allowed
  119. void MarkKeyMayExist();
  120. // Records this key, value, and any meta-data (such as sequence number and
  121. // state) into this GetContext.
  122. //
  123. // If the parsed_key matches the user key that we are looking for, sets
  124. // matched to true.
  125. //
  126. // Returns True if more keys need to be read (due to merges) or
  127. // False if the complete value has been found.
  128. bool SaveValue(const ParsedInternalKey& parsed_key, const Slice& value,
  129. bool* matched, Status* read_status,
  130. Cleanable* value_pinner = nullptr);
  131. // Simplified version of the previous function. Should only be used when we
  132. // know that the operation is a Put.
  133. void SaveValue(const Slice& value, SequenceNumber seq);
  134. GetState State() const { return state_; }
  135. SequenceNumber* max_covering_tombstone_seq() {
  136. return max_covering_tombstone_seq_;
  137. }
  138. bool NeedTimestamp() { return timestamp_ != nullptr; }
  139. inline size_t TimestampSize() { return ucmp_->timestamp_size(); }
  140. void SetTimestampFromRangeTombstone(const Slice& timestamp) {
  141. assert(timestamp_);
  142. timestamp_->assign(timestamp.data(), timestamp.size());
  143. ts_from_rangetombstone_ = true;
  144. }
  145. PinnedIteratorsManager* pinned_iters_mgr() { return pinned_iters_mgr_; }
  146. // If a non-null string is passed, all the SaveValue calls will be
  147. // logged into the string. The operations can then be replayed on
  148. // another GetContext with replayGetContextLog.
  149. void SetReplayLog(std::string* replay_log) { replay_log_ = replay_log; }
  150. // Do we need to fetch the SequenceNumber for this key?
  151. bool NeedToReadSequence() const { return (seq_ != nullptr); }
  152. bool sample() const { return sample_; }
  153. bool CheckCallback(SequenceNumber seq) {
  154. if (callback_) {
  155. return callback_->IsVisible(seq);
  156. }
  157. return true;
  158. }
  159. void ReportCounters();
  160. bool has_callback() const { return callback_ != nullptr; }
  161. const Slice& ukey_to_get_blob_value() const {
  162. if (!ukey_with_ts_found_.empty()) {
  163. return ukey_with_ts_found_;
  164. } else {
  165. return user_key_;
  166. }
  167. }
  168. uint64_t get_tracing_get_id() const { return tracing_get_id_; }
  169. void push_operand(const Slice& value, Cleanable* value_pinner);
  170. private:
  171. // Helper method that postprocesses the results of merge operations, e.g. it
  172. // sets the state correctly upon merge errors.
  173. void PostprocessMerge(const Status& merge_status);
  174. // The following methods perform the actual merge operation for the
  175. // no base value/plain base value/wide-column base value cases.
  176. void MergeWithNoBaseValue();
  177. void MergeWithPlainBaseValue(const Slice& value);
  178. void MergeWithWideColumnBaseValue(const Slice& entity);
  179. bool GetBlobValue(const Slice& user_key, const Slice& blob_index,
  180. PinnableSlice* blob_value, Status* read_status);
  181. void appendToReplayLog(ValueType type, Slice value, Slice ts);
  182. const Comparator* ucmp_;
  183. const MergeOperator* merge_operator_;
  184. // the merge operations encountered;
  185. Logger* logger_;
  186. Statistics* statistics_;
  187. GetState state_;
  188. Slice user_key_;
  189. // When a blob index is found with the user key containing timestamp,
  190. // this copies the corresponding user key on record in the sst file
  191. // and is later used for blob verification.
  192. PinnableSlice ukey_with_ts_found_;
  193. PinnableSlice* pinnable_val_;
  194. PinnableWideColumns* columns_;
  195. std::string* timestamp_;
  196. bool ts_from_rangetombstone_{false};
  197. bool* value_found_; // Is value set correctly? Used by KeyMayExist
  198. MergeContext* merge_context_;
  199. SequenceNumber* max_covering_tombstone_seq_;
  200. SystemClock* clock_;
  201. // If a key is found, seq_ will be set to the SequenceNumber of most recent
  202. // write to the key or kMaxSequenceNumber if unknown
  203. SequenceNumber* seq_;
  204. std::string* replay_log_;
  205. // Used to temporarily pin blocks when state_ == GetContext::kMerge
  206. PinnedIteratorsManager* pinned_iters_mgr_;
  207. ReadCallback* callback_;
  208. bool sample_;
  209. // Value is true if it's called as part of DB Get API and false if it's
  210. // called as part of DB GetMergeOperands API. When it's false merge operators
  211. // are never merged.
  212. bool do_merge_;
  213. bool* is_blob_index_;
  214. // Used for block cache tracing only. A tracing get id uniquely identifies a
  215. // Get or a MultiGet.
  216. const uint64_t tracing_get_id_;
  217. BlobFetcher* blob_fetcher_;
  218. };
  219. // Call this to replay a log and bring the get_context up to date. The replay
  220. // log must have been created by another GetContext object, whose replay log
  221. // must have been set by calling GetContext::SetReplayLog().
  222. Status replayGetContextLog(const Slice& replay_log, const Slice& user_key,
  223. GetContext* get_context,
  224. Cleanable* value_pinner = nullptr,
  225. SequenceNumber seq_no = kMaxSequenceNumber);
  226. } // namespace ROCKSDB_NAMESPACE