block_based_table_reader_impl.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <type_traits>
  11. #include "block.h"
  12. #include "block_cache.h"
  13. #include "table/block_based/block_based_table_reader.h"
  14. #include "table/block_based/reader_common.h"
  15. // The file contains some member functions of BlockBasedTable that
  16. // cannot be implemented in block_based_table_reader.cc because
  17. // it's called by other files (e.g. block_based_iterator.h) and
  18. // are templates.
  19. namespace ROCKSDB_NAMESPACE {
  20. namespace {
  21. using IterPlaceholderCacheInterface =
  22. PlaceholderCacheInterface<CacheEntryRole::kMisc>;
  23. template <typename TBlockIter>
  24. struct IterTraits {};
  25. template <>
  26. struct IterTraits<DataBlockIter> {
  27. using IterBlocklike = Block_kData;
  28. };
  29. template <>
  30. struct IterTraits<IndexBlockIter> {
  31. using IterBlocklike = Block_kIndex;
  32. };
  33. } // namespace
  34. // Convert an index iterator value (i.e., an encoded BlockHandle)
  35. // into an iterator over the contents of the corresponding block.
  36. // If input_iter is null, new a iterator
  37. // If input_iter is not null, update this iter and return it
  38. template <typename TBlockIter>
  39. TBlockIter* BlockBasedTable::NewDataBlockIterator(
  40. const ReadOptions& ro, const BlockHandle& handle, TBlockIter* input_iter,
  41. BlockType block_type, GetContext* get_context,
  42. BlockCacheLookupContext* lookup_context,
  43. FilePrefetchBuffer* prefetch_buffer, bool for_compaction, bool async_read,
  44. Status& s, bool use_block_cache_for_lookup) const {
  45. using IterBlocklike = typename IterTraits<TBlockIter>::IterBlocklike;
  46. PERF_TIMER_GUARD(new_table_block_iter_nanos);
  47. TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
  48. if (!s.ok()) {
  49. iter->Invalidate(s);
  50. return iter;
  51. }
  52. CachableEntry<Block> block;
  53. {
  54. CachableEntry<DecompressorDict> dict;
  55. Decompressor* decomp = rep_->decompressor.get();
  56. if (rep_->uncompression_dict_reader && block_type == BlockType::kData) {
  57. // For async scans, don't use the prefetch buffer since an async prefetch
  58. // might already be under way and this would invalidate it. Also, the
  59. // uncompression dict is typically at the end of the file and would
  60. // most likely break the sequentiality of the access pattern.
  61. // Same is with auto_readahead_size. It iterates over index to lookup for
  62. // data blocks. And this could break the the sequentiality of the access
  63. // pattern.
  64. s = rep_->uncompression_dict_reader->GetOrReadUncompressionDictionary(
  65. ((ro.async_io || ro.auto_readahead_size) ? nullptr : prefetch_buffer),
  66. ro, get_context, lookup_context, &dict);
  67. if (!s.ok()) {
  68. iter->Invalidate(s);
  69. return iter;
  70. }
  71. assert(dict.GetValue());
  72. if (dict.GetValue()) {
  73. decomp = dict.GetValue()->decompressor_.get();
  74. }
  75. }
  76. s = RetrieveBlock(
  77. prefetch_buffer, ro, handle, decomp, &block.As<IterBlocklike>(),
  78. get_context, lookup_context, for_compaction,
  79. /* use_cache */ true, async_read, use_block_cache_for_lookup);
  80. }
  81. if (s.IsTryAgain() && async_read) {
  82. return iter;
  83. }
  84. if (!s.ok()) {
  85. assert(block.IsEmpty());
  86. iter->Invalidate(s);
  87. return iter;
  88. }
  89. assert(block.GetValue() != nullptr);
  90. // Block contents are pinned and it is still pinned after the iterator
  91. // is destroyed as long as cleanup functions are moved to another object,
  92. // when:
  93. // 1. block cache handle is set to be released in cleanup function, or
  94. // 2. it's pointing to immortal source. If own_bytes is true then we are
  95. // not reading data from the original source, whether immortal or not.
  96. // Otherwise, the block is pinned iff the source is immortal.
  97. const bool block_contents_pinned =
  98. block.IsCached() ||
  99. (!block.GetValue()->own_bytes() && rep_->immortal_table);
  100. iter = InitBlockIterator<TBlockIter>(rep_, block.GetValue(), block_type, iter,
  101. block_contents_pinned);
  102. if (!block.IsCached()) {
  103. if (!ro.fill_cache) {
  104. IterPlaceholderCacheInterface block_cache{
  105. rep_->table_options.block_cache.get()};
  106. if (block_cache) {
  107. // insert a dummy record to block cache to track the memory usage
  108. Cache::Handle* cache_handle = nullptr;
  109. CacheKey key =
  110. CacheKey::CreateUniqueForCacheLifetime(block_cache.get());
  111. s = block_cache.Insert(key.AsSlice(),
  112. block.GetValue()->ApproximateMemoryUsage(),
  113. &cache_handle);
  114. if (s.ok()) {
  115. assert(cache_handle != nullptr);
  116. iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache.get(),
  117. cache_handle);
  118. }
  119. }
  120. }
  121. } else {
  122. iter->SetCacheHandle(block.GetCacheHandle());
  123. }
  124. block.TransferTo(iter);
  125. return iter;
  126. }
  127. // Convert an uncompressed data block (i.e CachableEntry<Block>)
  128. // into an iterator over the contents of the corresponding block.
  129. // If input_iter is null, new a iterator
  130. // If input_iter is not null, update this iter and return it
  131. template <typename TBlockIter>
  132. TBlockIter* BlockBasedTable::NewDataBlockIterator(const ReadOptions& ro,
  133. CachableEntry<Block>& block,
  134. TBlockIter* input_iter,
  135. Status s) const {
  136. PERF_TIMER_GUARD(new_table_block_iter_nanos);
  137. TBlockIter* iter = input_iter != nullptr ? input_iter : new TBlockIter;
  138. if (!s.ok()) {
  139. iter->Invalidate(s);
  140. return iter;
  141. }
  142. assert(block.GetValue() != nullptr);
  143. // Block contents are pinned and it is still pinned after the iterator
  144. // is destroyed as long as cleanup functions are moved to another object,
  145. // when:
  146. // 1. block cache handle is set to be released in cleanup function, or
  147. // 2. it's pointing to immortal source. If own_bytes is true then we are
  148. // not reading data from the original source, whether immortal or not.
  149. // Otherwise, the block is pinned iff the source is immortal.
  150. const bool block_contents_pinned =
  151. block.IsCached() ||
  152. (!block.GetValue()->own_bytes() && rep_->immortal_table);
  153. iter = InitBlockIterator<TBlockIter>(rep_, block.GetValue(), BlockType::kData,
  154. iter, block_contents_pinned);
  155. if (!block.IsCached()) {
  156. if (!ro.fill_cache) {
  157. IterPlaceholderCacheInterface block_cache{
  158. rep_->table_options.block_cache.get()};
  159. if (block_cache) {
  160. // insert a dummy record to block cache to track the memory usage
  161. Cache::Handle* cache_handle = nullptr;
  162. CacheKey key =
  163. CacheKey::CreateUniqueForCacheLifetime(block_cache.get());
  164. s = block_cache.Insert(key.AsSlice(),
  165. block.GetValue()->ApproximateMemoryUsage(),
  166. &cache_handle);
  167. if (s.ok()) {
  168. assert(cache_handle != nullptr);
  169. iter->RegisterCleanup(&ForceReleaseCachedEntry, block_cache.get(),
  170. cache_handle);
  171. }
  172. }
  173. }
  174. } else {
  175. iter->SetCacheHandle(block.GetCacheHandle());
  176. }
  177. block.TransferTo(iter);
  178. return iter;
  179. }
  180. } // namespace ROCKSDB_NAMESPACE