table_reader.h 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <memory>
  11. #include "db/range_tombstone_fragmenter.h"
  12. #if USE_COROUTINES
  13. #include "folly/coro/Coroutine.h"
  14. #include "folly/coro/Task.h"
  15. #endif
  16. #include "rocksdb/slice_transform.h"
  17. #include "rocksdb/table_reader_caller.h"
  18. #include "table/get_context.h"
  19. #include "table/internal_iterator.h"
  20. #include "table/multiget_context.h"
  21. namespace ROCKSDB_NAMESPACE {
  22. class Iterator;
  23. struct ParsedInternalKey;
  24. class Slice;
  25. class Arena;
  26. struct ReadOptions;
  27. struct TableProperties;
  28. class GetContext;
  29. class MultiGetContext;
  30. // A Table (also referred to as SST) is a sorted map from strings to strings.
  31. // Tables are immutable and persistent. A Table may be safely accessed from
  32. // multiple threads without external synchronization. Table readers are used
  33. // for reading various types of table formats supported by rocksdb including
  34. // BlockBasedTable, PlainTable and CuckooTable format.
  35. class TableReader {
  36. public:
  37. virtual ~TableReader() {}
  38. // Returns a new iterator over the table contents.
  39. // The result of NewIterator() is initially invalid (caller must
  40. // call one of the Seek methods on the iterator before using it).
  41. //
  42. // read_options: Must outlive the returned iterator.
  43. // arena: If not null, the arena needs to be used to allocate the Iterator.
  44. // When destroying the iterator, the caller will not call "delete"
  45. // but Iterator::~Iterator() directly. The destructor needs to destroy
  46. // all the states but those allocated in arena.
  47. // skip_filters: disables checking the bloom filters even if they exist. This
  48. // option is effective only for block-based table format.
  49. // compaction_readahead_size: its value will only be used if caller =
  50. // kCompaction
  51. virtual InternalIterator* NewIterator(
  52. const ReadOptions& read_options, const SliceTransform* prefix_extractor,
  53. Arena* arena, bool skip_filters, TableReaderCaller caller,
  54. size_t compaction_readahead_size = 0,
  55. bool allow_unprepared_value = false) = 0;
  56. // read_options.snapshot needs to outlive this call.
  57. virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
  58. const ReadOptions& /*read_options*/) {
  59. return nullptr;
  60. }
  61. virtual FragmentedRangeTombstoneIterator* NewRangeTombstoneIterator(
  62. SequenceNumber /* read_seqno */, const Slice* /* timestamp */) {
  63. return nullptr;
  64. }
  65. // Given a key, return an approximate byte offset in the file where
  66. // the data for that key begins (or would begin if the key were
  67. // present in the file). The returned value is in terms of file
  68. // bytes, and so includes effects like compression of the underlying data.
  69. // E.g., the approximate offset of the last key in the table will
  70. // be close to the file length.
  71. // TODO(peterd): Since this function is only used for approximate size
  72. // from beginning of file, reduce code duplication by removing this
  73. // function and letting ApproximateSize take optional start and end, so
  74. // that absolute start and end can be specified and optimized without
  75. // key / index work.
  76. virtual uint64_t ApproximateOffsetOf(const ReadOptions& read_options,
  77. const Slice& key,
  78. TableReaderCaller caller) = 0;
  79. // Given start and end keys, return the approximate data size in the file
  80. // between the keys. The returned value is in terms of file bytes, and so
  81. // includes effects like compression of the underlying data and applicable
  82. // portions of metadata including filters and indexes. Nullptr for start or
  83. // end (or both) indicates absolute start or end of the table.
  84. virtual uint64_t ApproximateSize(const ReadOptions& read_options,
  85. const Slice& start, const Slice& end,
  86. TableReaderCaller caller) = 0;
  87. struct Anchor {
  88. Anchor(const Slice& _user_key, size_t _range_size)
  89. : user_key(_user_key.ToStringView()), range_size(_range_size) {}
  90. std::string user_key;
  91. size_t range_size;
  92. };
  93. // Now try to return approximately 128 anchor keys.
  94. // The last one tends to be the largest key.
  95. virtual Status ApproximateKeyAnchors(const ReadOptions& /*read_options*/,
  96. std::vector<Anchor>& /*anchors*/) {
  97. return Status::NotSupported("ApproximateKeyAnchors() not supported.");
  98. }
  99. // Set up the table for Compaction. Might change some parameters with
  100. // posix_fadvise
  101. virtual void SetupForCompaction() = 0;
  102. virtual std::shared_ptr<const TableProperties> GetTableProperties() const = 0;
  103. // Prepare work that can be done before the real Get()
  104. virtual void Prepare(const Slice& /*target*/) {}
  105. // Report an approximation of how much memory has been used.
  106. virtual size_t ApproximateMemoryUsage() const = 0;
  107. // Calls get_context->SaveValue() repeatedly, starting with
  108. // the entry found after a call to Seek(key), until it returns false.
  109. // May not make such a call if filter policy says that key is not present.
  110. //
  111. // get_context->MarkKeyMayExist needs to be called when it is configured to be
  112. // memory only and the key is not found in the block cache.
  113. //
  114. // readOptions is the options for the read
  115. // key is the key to search for
  116. // skip_filters: disables checking the bloom filters even if they exist. This
  117. // option is effective only for block-based table format.
  118. virtual Status Get(const ReadOptions& readOptions, const Slice& key,
  119. GetContext* get_context,
  120. const SliceTransform* prefix_extractor,
  121. bool skip_filters = false) = 0;
  122. // Use bloom filters in the table file, if present, to filter out keys. The
  123. // mget_range will be updated to skip keys that get a negative result from
  124. // the filter lookup.
  125. virtual Status MultiGetFilter(const ReadOptions& /*readOptions*/,
  126. const SliceTransform* /*prefix_extractor*/,
  127. MultiGetContext::Range* /*mget_range*/) {
  128. return Status::NotSupported();
  129. }
  130. virtual void MultiGet(const ReadOptions& readOptions,
  131. const MultiGetContext::Range* mget_range,
  132. const SliceTransform* prefix_extractor,
  133. bool skip_filters = false) {
  134. for (auto iter = mget_range->begin(); iter != mget_range->end(); ++iter) {
  135. *iter->s = Get(readOptions, iter->ikey, iter->get_context,
  136. prefix_extractor, skip_filters);
  137. }
  138. }
  139. #if USE_COROUTINES
  140. virtual folly::coro::Task<void> MultiGetCoroutine(
  141. const ReadOptions& readOptions, const MultiGetContext::Range* mget_range,
  142. const SliceTransform* prefix_extractor, bool skip_filters = false) {
  143. MultiGet(readOptions, mget_range, prefix_extractor, skip_filters);
  144. co_return;
  145. }
  146. #endif // USE_COROUTINES
  147. // Prefetch data corresponding to a give range of keys
  148. // Typically this functionality is required for table implementations that
  149. // persists the data on a non volatile storage medium like disk/SSD
  150. virtual Status Prefetch(const ReadOptions& /* read_options */,
  151. const Slice* begin = nullptr,
  152. const Slice* end = nullptr) {
  153. (void)begin;
  154. (void)end;
  155. // Default implementation is NOOP.
  156. // The child class should implement functionality when applicable
  157. return Status::OK();
  158. }
  159. // convert db file to a human readable form
  160. virtual Status DumpTable(WritableFile* /*out_file*/) {
  161. return Status::NotSupported("DumpTable() not supported");
  162. }
  163. // check whether there is corruption in this db file
  164. virtual Status VerifyChecksum(const ReadOptions& /*read_options*/,
  165. TableReaderCaller /*caller*/) {
  166. return Status::NotSupported("VerifyChecksum() not supported");
  167. }
  168. // Tell the reader that the file should now be obsolete, e.g. as a hint
  169. // to delete relevant cache entries on destruction. (It might not be safe
  170. // to "unpin" cache entries until destruction time.) NOTE: must be thread
  171. // safe because multiple table cache references might all mark this file as
  172. // obsolete when they are released (the last of which destroys this reader).
  173. virtual void MarkObsolete(uint32_t /*uncache_aggressiveness*/) {
  174. // no-op as default
  175. }
  176. };
  177. } // namespace ROCKSDB_NAMESPACE