table_builder.h 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <stdint.h>
  11. #include <string>
  12. #include <utility>
  13. #include <vector>
  14. #include "db/dbformat.h"
  15. #include "db/seqno_to_time_mapping.h"
  16. #include "db/table_properties_collector.h"
  17. #include "file/writable_file_writer.h"
  18. #include "options/cf_options.h"
  19. #include "rocksdb/options.h"
  20. #include "rocksdb/table_properties.h"
  21. #include "table/unique_id_impl.h"
  22. #include "trace_replay/block_cache_tracer.h"
  23. #include "util/cast_util.h"
  24. namespace ROCKSDB_NAMESPACE {
  25. class Slice;
  26. class Status;
  27. struct TableReaderOptions {
  28. // @param skip_filters Disables loading/accessing the filter block
  29. TableReaderOptions(
  30. const ImmutableOptions& _ioptions,
  31. const std::shared_ptr<const SliceTransform>& _prefix_extractor,
  32. UnownedPtr<CompressionManager> _compression_manager,
  33. const EnvOptions& _env_options,
  34. const InternalKeyComparator& _internal_comparator,
  35. uint8_t _block_protection_bytes_per_key, bool _skip_filters = false,
  36. bool _immortal = false, bool _force_direct_prefetch = false,
  37. int _level = -1, BlockCacheTracer* const _block_cache_tracer = nullptr,
  38. size_t _max_file_size_for_l0_meta_pin = 0,
  39. const std::string& _cur_db_session_id = "", uint64_t _cur_file_num = 0,
  40. UniqueId64x2 _unique_id = {}, SequenceNumber _largest_seqno = 0,
  41. uint64_t _tail_size = 0, bool _user_defined_timestamps_persisted = true)
  42. : ioptions(_ioptions),
  43. prefix_extractor(_prefix_extractor),
  44. compression_manager(_compression_manager),
  45. env_options(_env_options),
  46. internal_comparator(_internal_comparator),
  47. skip_filters(_skip_filters),
  48. immortal(_immortal),
  49. force_direct_prefetch(_force_direct_prefetch),
  50. level(_level),
  51. largest_seqno(_largest_seqno),
  52. block_cache_tracer(_block_cache_tracer),
  53. max_file_size_for_l0_meta_pin(_max_file_size_for_l0_meta_pin),
  54. cur_db_session_id(_cur_db_session_id),
  55. cur_file_num(_cur_file_num),
  56. unique_id(_unique_id),
  57. block_protection_bytes_per_key(_block_protection_bytes_per_key),
  58. tail_size(_tail_size),
  59. user_defined_timestamps_persisted(_user_defined_timestamps_persisted) {}
  60. const ImmutableOptions& ioptions;
  61. const std::shared_ptr<const SliceTransform>& prefix_extractor;
  62. // NOTE: the compression manager is not saved, just potentially a decompressor
  63. // from it, so we don't need a shared_ptr copy
  64. UnownedPtr<CompressionManager> compression_manager;
  65. const EnvOptions& env_options;
  66. const InternalKeyComparator& internal_comparator;
  67. // This is only used for BlockBasedTable (reader)
  68. bool skip_filters;
  69. // Whether the table will be valid as long as the DB is open
  70. bool immortal;
  71. // When data prefetching is needed, even if direct I/O is off, read data to
  72. // fetch into RocksDB's buffer, rather than relying
  73. // RandomAccessFile::Prefetch().
  74. bool force_direct_prefetch;
  75. // What level this table/file is on, -1 for "not set, don't know." Used
  76. // for level-specific statistics.
  77. int level;
  78. // largest seqno in the table (or 0 means unknown???)
  79. SequenceNumber largest_seqno;
  80. BlockCacheTracer* const block_cache_tracer;
  81. // Largest L0 file size whose meta-blocks may be pinned (can be zero when
  82. // unknown).
  83. const size_t max_file_size_for_l0_meta_pin;
  84. std::string cur_db_session_id;
  85. uint64_t cur_file_num;
  86. // Known unique_id or {}, kNullUniqueId64x2 means unknown
  87. UniqueId64x2 unique_id;
  88. uint8_t block_protection_bytes_per_key;
  89. uint64_t tail_size;
  90. // Whether the key in the table contains user-defined timestamps.
  91. bool user_defined_timestamps_persisted;
  92. };
  93. struct TableBuilderOptions : public TablePropertiesCollectorFactory::Context {
  94. TableBuilderOptions(
  95. const ImmutableOptions& _ioptions, const MutableCFOptions& _moptions,
  96. const ReadOptions& _read_options, const WriteOptions& _write_options,
  97. const InternalKeyComparator& _internal_comparator,
  98. const InternalTblPropCollFactories* _internal_tbl_prop_coll_factories,
  99. CompressionType _compression_type,
  100. const CompressionOptions& _compression_opts, uint32_t _column_family_id,
  101. const std::string& _column_family_name, int _level,
  102. const int64_t _newest_key_time, bool _is_bottommost = false,
  103. TableFileCreationReason _reason = TableFileCreationReason::kMisc,
  104. const int64_t _oldest_key_time = 0,
  105. const uint64_t _file_creation_time = 0, const std::string& _db_id = "",
  106. const std::string& _db_session_id = "",
  107. const uint64_t _target_file_size = 0, const uint64_t _cur_file_num = 0,
  108. const SequenceNumber _last_level_inclusive_max_seqno_threshold =
  109. kMaxSequenceNumber)
  110. : TablePropertiesCollectorFactory::Context(
  111. _column_family_id, _level, _ioptions.num_levels,
  112. _last_level_inclusive_max_seqno_threshold),
  113. ioptions(_ioptions),
  114. moptions(_moptions),
  115. read_options(_read_options),
  116. write_options(_write_options),
  117. internal_comparator(_internal_comparator),
  118. internal_tbl_prop_coll_factories(_internal_tbl_prop_coll_factories),
  119. compression_type(_compression_type),
  120. compression_opts(_compression_opts),
  121. column_family_name(_column_family_name),
  122. oldest_key_time(_oldest_key_time),
  123. newest_key_time(_newest_key_time),
  124. target_file_size(_target_file_size),
  125. file_creation_time(_file_creation_time),
  126. db_id(_db_id),
  127. db_session_id(_db_session_id),
  128. is_bottommost(_is_bottommost),
  129. reason(_reason),
  130. cur_file_num(_cur_file_num) {}
  131. const ImmutableOptions& ioptions;
  132. const MutableCFOptions& moptions;
  133. const ReadOptions& read_options;
  134. const WriteOptions& write_options;
  135. const InternalKeyComparator& internal_comparator;
  136. const InternalTblPropCollFactories* internal_tbl_prop_coll_factories;
  137. const CompressionType compression_type;
  138. const CompressionOptions& compression_opts;
  139. const std::string& column_family_name;
  140. const int64_t oldest_key_time;
  141. const int64_t newest_key_time;
  142. const uint64_t target_file_size;
  143. const uint64_t file_creation_time;
  144. const std::string db_id;
  145. const std::string db_session_id;
  146. // BEGIN for FilterBuildingContext
  147. const bool is_bottommost;
  148. const TableFileCreationReason reason;
  149. // END for FilterBuildingContext
  150. // XXX: only used by BlockBasedTableBuilder for SstFileWriter. If you
  151. // want to skip filters, that should be (for example) null filter_policy
  152. // in the table options of the ioptions.table_factory
  153. bool skip_filters = false;
  154. const uint64_t cur_file_num;
  155. };
  156. // TableBuilder provides the interface used to build a Table
  157. // (an immutable and sorted map from keys to values).
  158. //
  159. // Multiple threads can invoke const methods on a TableBuilder without
  160. // external synchronization, but if any of the threads may call a
  161. // non-const method, all threads accessing the same TableBuilder must use
  162. // external synchronization.
  163. class TableBuilder {
  164. public:
  165. // REQUIRES: Either Finish() or Abandon() has been called.
  166. virtual ~TableBuilder() {}
  167. // Add key,value to the table being constructed.
  168. // REQUIRES: key is after any previously added key according to comparator.
  169. // REQUIRES: Finish(), Abandon() have not been called
  170. virtual void Add(const Slice& key, const Slice& value) = 0;
  171. // Return non-ok iff some error has been detected.
  172. virtual Status status() const = 0;
  173. // Return non-ok iff some error happens during IO.
  174. virtual IOStatus io_status() const = 0;
  175. // Finish building the table.
  176. // REQUIRES: Finish(), Abandon() have not been called
  177. virtual Status Finish() = 0;
  178. // Indicate that the contents of this builder should be abandoned.
  179. // If the caller is not going to call Finish(), it must call Abandon()
  180. // before destroying this builder.
  181. // REQUIRES: Finish(), Abandon() have not been called
  182. virtual void Abandon() = 0;
  183. // Number of calls to Add() so far.
  184. virtual uint64_t NumEntries() const = 0;
  185. // Whether the output file is completely empty. It has neither entries
  186. // or tombstones.
  187. virtual bool IsEmpty() const {
  188. return NumEntries() == 0 && GetTableProperties().num_range_deletions == 0;
  189. }
  190. // Size of the file before its content is compressed.
  191. virtual uint64_t PreCompressionSize() const { return 0; }
  192. // Size of the file generated so far. If invoked after a successful
  193. // Finish() call, returns the size of the final generated file.
  194. virtual uint64_t FileSize() const = 0;
  195. // Estimated size of the file generated so far. This is used when
  196. // FileSize() cannot estimate final SST size, e.g. parallel compression
  197. // is enabled.
  198. virtual uint64_t EstimatedFileSize() const { return FileSize(); }
  199. virtual uint64_t GetTailSize() const { return 0; }
  200. // If the user defined table properties collector suggest the file to
  201. // be further compacted.
  202. virtual bool NeedCompact() const { return false; }
  203. // Returns table properties
  204. virtual TableProperties GetTableProperties() const = 0;
  205. // Return file checksum
  206. virtual std::string GetFileChecksum() const = 0;
  207. // Return file checksum function name
  208. virtual const char* GetFileChecksumFuncName() const = 0;
  209. // Set the sequence number to time mapping. `relevant_mapping` must be in
  210. // enforced state (ready to encode to string).
  211. virtual void SetSeqnoTimeTableProperties(
  212. const SeqnoToTimeMapping& /*relevant_mapping*/,
  213. uint64_t /*oldest_ancestor_time*/) {}
  214. // If this builder used CPU work from threads other than the caller, return
  215. // the CPU microseconds used. 0 = no work outside calling thread, or not
  216. // supported.
  217. virtual uint64_t GetWorkerCPUMicros() const { return 0; }
  218. };
  219. } // namespace ROCKSDB_NAMESPACE