user_defined_index_wrapper.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. // Copyright (c) Meta Platforms, Inc. and affiliates.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <memory>
  7. #include <string>
  8. #include <unordered_map>
  9. #include "rocksdb/slice.h"
  10. #include "rocksdb/status.h"
  11. #include "rocksdb/user_defined_index.h"
  12. #include "table/block_based/block_based_table_reader.h"
  13. #include "table/block_based/block_type.h"
  14. #include "table/block_based/cachable_entry.h"
  15. #include "table/block_based/index_builder.h"
  16. namespace ROCKSDB_NAMESPACE {
  17. // UserDefinedIndexWrapper wraps around the existing index types in block based
  18. // table, and supports plugging in an additional user defined index. The wrapper
  19. // class forwards calls to both the wrapped internal index, and a user defined
  20. // index builder.
  21. class UserDefinedIndexBuilderWrapper : public IndexBuilder {
  22. public:
  23. UserDefinedIndexBuilderWrapper(
  24. const std::string& name,
  25. std::unique_ptr<IndexBuilder> internal_index_builder,
  26. std::unique_ptr<UserDefinedIndexBuilder> user_defined_index_builder,
  27. const InternalKeyComparator* comparator, size_t ts_sz,
  28. bool persist_user_defined_timestamps)
  29. : IndexBuilder(comparator, ts_sz, persist_user_defined_timestamps),
  30. name_(name),
  31. internal_index_builder_(std::move(internal_index_builder)),
  32. user_defined_index_builder_(std::move(user_defined_index_builder)) {}
  33. ~UserDefinedIndexBuilderWrapper() override = default;
  34. Slice AddIndexEntry(const Slice& last_key_in_current_block,
  35. const Slice* first_key_in_next_block,
  36. const BlockHandle& block_handle,
  37. std::string* separator_scratch,
  38. bool skip_delta_encoding) override {
  39. UserDefinedIndexBuilder::BlockHandle handle;
  40. handle.offset = block_handle.offset();
  41. handle.size = block_handle.size();
  42. // Forward the call to both index builders
  43. ParsedInternalKey pkey_last;
  44. ParsedInternalKey pkey_first;
  45. // There's no way to return an error here, so we remember the statsu and
  46. // return it in Finish()
  47. if (status_.ok()) {
  48. status_ = ParseInternalKey(last_key_in_current_block, &pkey_last,
  49. /*lof_err_key*/ false);
  50. }
  51. if (status_.ok() && first_key_in_next_block) {
  52. status_ = ParseInternalKey(*first_key_in_next_block, &pkey_first,
  53. /*lof_err_key*/ false);
  54. }
  55. if (status_.ok()) {
  56. user_defined_index_builder_->AddIndexEntry(
  57. pkey_last.user_key,
  58. first_key_in_next_block ? &pkey_first.user_key : nullptr, handle,
  59. separator_scratch);
  60. }
  61. return internal_index_builder_->AddIndexEntry(
  62. last_key_in_current_block, first_key_in_next_block, block_handle,
  63. separator_scratch, skip_delta_encoding);
  64. }
  65. // Not supported with parallel compression
  66. std::unique_ptr<PreparedIndexEntry> CreatePreparedIndexEntry() override {
  67. return nullptr;
  68. }
  69. void PrepareIndexEntry(const Slice& last_key_in_current_block,
  70. const Slice* first_key_in_next_block,
  71. PreparedIndexEntry* out) override {
  72. (void)last_key_in_current_block;
  73. (void)first_key_in_next_block;
  74. (void)out;
  75. assert(false);
  76. }
  77. void FinishIndexEntry(const BlockHandle& block_handle,
  78. PreparedIndexEntry* entry,
  79. bool skip_delta_encoding) override {
  80. (void)block_handle;
  81. (void)entry;
  82. (void)skip_delta_encoding;
  83. assert(false);
  84. }
  85. void OnKeyAdded(const Slice& key,
  86. const std::optional<Slice>& value) override {
  87. ParsedInternalKey pkey;
  88. if (status_.ok()) {
  89. if (!value.has_value()) {
  90. status_ = Status::InvalidArgument(
  91. "user_defined_index_factory not supported with parallel "
  92. "compression");
  93. } else {
  94. status_ = ParseInternalKey(key, &pkey, /*lof_err_key*/ false);
  95. if (status_.ok() && pkey.type != ValueType::kTypeValue) {
  96. status_ = Status::InvalidArgument(
  97. "user_defined_index_factory only supported with Puts");
  98. }
  99. }
  100. }
  101. if (!status_.ok()) {
  102. return;
  103. }
  104. // Forward the call to both index builders
  105. internal_index_builder_->OnKeyAdded(key, value);
  106. // Pass the user key to the UDI. We don't expect multiple entries with
  107. // different sequence numbers for the same key in the file. RocksDB may
  108. // enforce it in the future by allowing UDIs only for read only
  109. // bulkloaded use cases, and only allow ingestion of files with
  110. // sequence number 0.
  111. user_defined_index_builder_->OnKeyAdded(
  112. pkey.user_key, UserDefinedIndexBuilder::ValueType::kValue,
  113. value.value());
  114. }
  115. Status Finish(IndexBlocks* index_blocks,
  116. const BlockHandle& last_partition_block_handle) override {
  117. if (!status_.ok() && !status_.IsIncomplete()) {
  118. return status_;
  119. }
  120. if (!udi_finished_) {
  121. // Finish the user defined index builder
  122. Slice user_index_contents;
  123. status_ = user_defined_index_builder_->Finish(&user_index_contents);
  124. if (!status_.ok()) {
  125. return status_;
  126. }
  127. // Add the user defined index to the meta blocks
  128. std::string block_name = kUserDefinedIndexPrefix + name_;
  129. index_blocks->meta_blocks.insert(
  130. {block_name, {BlockType::kUserDefinedIndex, user_index_contents}});
  131. udi_finished_ = true;
  132. }
  133. // Finish the internal index builder
  134. status_ = internal_index_builder_->Finish(index_blocks,
  135. last_partition_block_handle);
  136. if (!status_.ok()) {
  137. return status_;
  138. }
  139. index_size_ = internal_index_builder_->IndexSize();
  140. return status_;
  141. }
  142. size_t IndexSize() const override { return index_size_; }
  143. uint64_t EstimateCurrentIndexSize() const override { return 0; }
  144. bool separator_is_key_plus_seq() override {
  145. return internal_index_builder_->separator_is_key_plus_seq();
  146. }
  147. private:
  148. const std::string name_;
  149. std::unique_ptr<IndexBuilder> internal_index_builder_;
  150. std::unique_ptr<UserDefinedIndexBuilder> user_defined_index_builder_;
  151. Status status_;
  152. bool udi_finished_ = false;
  153. };
  154. class UserDefinedIndexIteratorWrapper
  155. : public InternalIteratorBase<IndexValue> {
  156. public:
  157. explicit UserDefinedIndexIteratorWrapper(
  158. std::unique_ptr<UserDefinedIndexIterator>&& udi_iter)
  159. : udi_iter_(std::move(udi_iter)), valid_(false) {}
  160. bool Valid() const override { return valid_; }
  161. void SeekToFirst() override {
  162. status_ = Status::NotSupported("SeekToFirst not supported");
  163. }
  164. void SeekToLast() override {
  165. status_ = Status::NotSupported("SeekToLast not supported");
  166. }
  167. void Seek(const Slice& target) override {
  168. ParsedInternalKey pkey;
  169. status_ = ParseInternalKey(target, &pkey, /*log_err_key=*/false);
  170. if (status_.ok()) {
  171. status_ = udi_iter_->SeekAndGetResult(pkey.user_key, &result_);
  172. }
  173. if (status_.ok()) {
  174. valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
  175. if (valid_) {
  176. ikey_.Set(result_.key, 0, ValueType::kTypeValue);
  177. }
  178. } else {
  179. valid_ = false;
  180. }
  181. }
  182. void Next() override {
  183. status_ = udi_iter_->NextAndGetResult(&result_);
  184. if (status_.ok()) {
  185. valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
  186. if (valid_) {
  187. ikey_.Set(result_.key, 0, ValueType::kTypeValue);
  188. }
  189. } else {
  190. valid_ = false;
  191. }
  192. }
  193. bool NextAndGetResult(IterateResult* result) override {
  194. status_ = udi_iter_->NextAndGetResult(&result_);
  195. if (status_.ok()) {
  196. valid_ = result_.bound_check_result == IterBoundCheck::kInbound;
  197. if (valid_) {
  198. ikey_.Set(result_.key, 0, ValueType::kTypeValue);
  199. }
  200. if (status_.ok()) {
  201. *result = result_;
  202. }
  203. } else {
  204. valid_ = false;
  205. }
  206. return valid_;
  207. }
  208. void SeekForPrev(const Slice& /*target*/) override {
  209. status_ = Status::NotSupported("SeekForPrev not supported");
  210. }
  211. void Prev() override { status_ = Status::NotSupported("Prev not supported"); }
  212. Slice key() const override { return Slice(*ikey_.const_rep()); }
  213. IndexValue value() const override {
  214. auto handle = udi_iter_->value();
  215. IndexValue val(BlockHandle(handle.offset, handle.size), Slice());
  216. return val;
  217. }
  218. Status status() const override { return status_; }
  219. void Prepare(const MultiScanArgs* scan_opts) override {
  220. if (scan_opts) {
  221. udi_iter_->Prepare(scan_opts->GetScanRanges().data(),
  222. scan_opts->GetScanRanges().size());
  223. }
  224. }
  225. IterBoundCheck UpperBoundCheckResult() override {
  226. return result_.bound_check_result;
  227. }
  228. private:
  229. std::unique_ptr<UserDefinedIndexIterator> udi_iter_;
  230. IterateResult result_;
  231. InternalKey ikey_;
  232. Status status_;
  233. bool valid_;
  234. };
  235. class UserDefinedIndexReaderWrapper : public BlockBasedTable::IndexReader {
  236. public:
  237. UserDefinedIndexReaderWrapper(
  238. const std::string& name,
  239. std::unique_ptr<BlockBasedTable::IndexReader>&& reader,
  240. std::unique_ptr<UserDefinedIndexReader>&& udi_reader)
  241. : name_(name),
  242. reader_(std::move(reader)),
  243. udi_reader_(std::move(udi_reader)) {}
  244. virtual InternalIteratorBase<IndexValue>* NewIterator(
  245. const ReadOptions& read_options, bool disable_prefix_seek,
  246. IndexBlockIter* iter, GetContext* get_context,
  247. BlockCacheLookupContext* lookup_context) override {
  248. if (!read_options.table_index_factory) {
  249. return reader_->NewIterator(read_options, disable_prefix_seek, iter,
  250. get_context, lookup_context);
  251. }
  252. if (name_ != read_options.table_index_factory->Name()) {
  253. return NewErrorInternalIterator<IndexValue>(Status::InvalidArgument(
  254. "Bad index name" +
  255. std::string(read_options.table_index_factory->Name()) +
  256. ". Only supported UDI is " + name_));
  257. }
  258. std::unique_ptr<UserDefinedIndexIterator> udi_iter =
  259. udi_reader_->NewIterator(read_options);
  260. if (udi_iter) {
  261. InternalIteratorBase<IndexValue>* wrap_iter =
  262. new UserDefinedIndexIteratorWrapper(std::move(udi_iter));
  263. return wrap_iter;
  264. }
  265. return NewErrorInternalIterator<IndexValue>(
  266. Status::NotFound("COuld not create UDI iterator"));
  267. }
  268. virtual Status CacheDependencies(
  269. const ReadOptions& ro, bool pin,
  270. FilePrefetchBuffer* tail_prefetch_buffer) override {
  271. return reader_->CacheDependencies(ro, pin, tail_prefetch_buffer);
  272. }
  273. size_t ApproximateMemoryUsage() const override {
  274. return reader_->ApproximateMemoryUsage();
  275. }
  276. virtual void EraseFromCacheBeforeDestruction(
  277. uint32_t uncache_aggressiveness) override {
  278. reader_->EraseFromCacheBeforeDestruction(uncache_aggressiveness);
  279. }
  280. private:
  281. std::string name_;
  282. std::unique_ptr<BlockBasedTable::IndexReader> reader_;
  283. std::unique_ptr<UserDefinedIndexReader> udi_reader_;
  284. };
  285. } // namespace ROCKSDB_NAMESPACE