version_edit_handler.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include "db/version_builder.h"
  11. #include "db/version_edit.h"
  12. #include "db/version_set.h"
  13. namespace ROCKSDB_NAMESPACE {
  14. struct FileMetaData;
  15. class VersionEditHandlerBase {
  16. public:
  17. explicit VersionEditHandlerBase(const ReadOptions& read_options)
  18. : read_options_(read_options),
  19. max_manifest_read_size_(std::numeric_limits<uint64_t>::max()) {}
  20. virtual ~VersionEditHandlerBase() {}
  21. void Iterate(log::Reader& reader, Status* log_read_status);
  22. const Status& status() const { return status_; }
  23. AtomicGroupReadBuffer& GetReadBuffer() { return read_buffer_; }
  24. protected:
  25. explicit VersionEditHandlerBase(const ReadOptions& read_options,
  26. uint64_t max_read_size)
  27. : read_options_(read_options), max_manifest_read_size_(max_read_size) {}
  28. virtual Status Initialize() { return Status::OK(); }
  29. virtual Status ApplyVersionEdit(VersionEdit& edit,
  30. ColumnFamilyData** cfd) = 0;
  31. virtual Status OnAtomicGroupReplayBegin() { return Status::OK(); }
  32. virtual Status OnAtomicGroupReplayEnd() { return Status::OK(); }
  33. virtual void CheckIterationResult(const log::Reader& /*reader*/,
  34. Status* /*s*/) {}
  35. void ClearReadBuffer() { read_buffer_.Clear(); }
  36. Status status_;
  37. const ReadOptions& read_options_;
  38. private:
  39. AtomicGroupReadBuffer read_buffer_;
  40. const uint64_t max_manifest_read_size_;
  41. };
  42. class ListColumnFamiliesHandler : public VersionEditHandlerBase {
  43. public:
  44. explicit ListColumnFamiliesHandler(const ReadOptions& read_options)
  45. : VersionEditHandlerBase(read_options) {}
  46. ~ListColumnFamiliesHandler() override {}
  47. const std::map<uint32_t, std::string> GetColumnFamilyNames() const {
  48. return column_family_names_;
  49. }
  50. protected:
  51. Status ApplyVersionEdit(VersionEdit& edit,
  52. ColumnFamilyData** /*unused*/) override;
  53. private:
  54. // default column family is always implicitly there
  55. std::map<uint32_t, std::string> column_family_names_{
  56. {0, kDefaultColumnFamilyName}};
  57. };
  58. class FileChecksumRetriever : public VersionEditHandlerBase {
  59. public:
  60. FileChecksumRetriever(const ReadOptions& read_options, uint64_t max_read_size)
  61. : VersionEditHandlerBase(read_options, max_read_size) {}
  62. ~FileChecksumRetriever() override {}
  63. Status FetchFileChecksumList(FileChecksumList& file_checksum_list);
  64. protected:
  65. Status ApplyVersionEdit(VersionEdit& edit,
  66. ColumnFamilyData** /*unused*/) override;
  67. private:
  68. // Map from CF to file # to string pair, where first portion of the value
  69. // is checksum, and second portion of the value is checksum function name.
  70. //
  71. // [column family id A]
  72. // |
  73. // |-- [file #1] -> [checksum #1, checksum function name #1]
  74. // |-- [file #2] -> [checksum #2, checksum function name #2]
  75. // |
  76. // ...
  77. // |
  78. // |-- [file #N] -> [checksum #N, checksum function name #N]
  79. // [column family id B]
  80. // |
  81. // |-- [file #1] -> [checksum #1, checksum function name #1]
  82. // |
  83. // ...
  84. // |
  85. // |-- [file #M] -> [checksum #M, checksum function name #M]
  86. // |
  87. // ...
  88. std::unordered_map<
  89. uint32_t,
  90. std::unordered_map<uint64_t, std::pair<std::string, std::string>>>
  91. cf_file_checksums_;
  92. };
  93. using VersionBuilderUPtr = std::unique_ptr<BaseReferencedVersionBuilder>;
  94. // A class used for scanning MANIFEST file.
  95. // VersionEditHandler reads a MANIFEST file, parses the version edits, and
  96. // builds the version set's in-memory state, e.g. the version storage info for
  97. // the versions of column families. It replays all the version edits in one
  98. // MANIFEST file to build the end version.
  99. //
  100. // To use this class and its subclasses,
  101. // 1. Create an object of VersionEditHandler or its subclasses.
  102. // VersionEditHandler handler(read_only, column_families, version_set,
  103. // track_found_and_missing_files,
  104. // no_error_if_files_missing);
  105. // 2. Status s = handler.Iterate(reader, &db_id);
  106. // 3. Check s and handle possible errors.
  107. //
  108. // Not thread-safe, external synchronization is necessary if an object of
  109. // VersionEditHandler is shared by multiple threads.
  110. class VersionEditHandler : public VersionEditHandlerBase {
  111. public:
  112. explicit VersionEditHandler(
  113. bool read_only,
  114. const std::vector<ColumnFamilyDescriptor>& column_families,
  115. VersionSet* version_set, bool track_found_and_missing_files,
  116. bool no_error_if_files_missing,
  117. const std::shared_ptr<IOTracer>& io_tracer,
  118. const ReadOptions& read_options, bool allow_incomplete_valid_version,
  119. EpochNumberRequirement epoch_number_requirement =
  120. EpochNumberRequirement::kMustPresent)
  121. : VersionEditHandler(read_only, column_families, version_set,
  122. track_found_and_missing_files,
  123. no_error_if_files_missing, io_tracer, read_options,
  124. /*skip_load_table_files=*/false,
  125. allow_incomplete_valid_version,
  126. epoch_number_requirement) {}
  127. ~VersionEditHandler() override {}
  128. const VersionEditParams& GetVersionEditParams() const {
  129. return version_edit_params_;
  130. }
  131. void GetDbId(std::string* db_id) const {
  132. if (db_id && version_edit_params_.HasDbId()) {
  133. *db_id = version_edit_params_.GetDbId();
  134. }
  135. }
  136. virtual Status VerifyFile(ColumnFamilyData* /*cfd*/,
  137. const std::string& /*fpath*/, int /*level*/,
  138. const FileMetaData& /*fmeta*/) {
  139. return Status::OK();
  140. }
  141. virtual Status VerifyBlobFile(ColumnFamilyData* /*cfd*/,
  142. uint64_t /*blob_file_num*/,
  143. const BlobFileAddition& /*blob_addition*/) {
  144. return Status::OK();
  145. }
  146. protected:
  147. explicit VersionEditHandler(
  148. bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
  149. VersionSet* version_set, bool track_found_and_missing_files,
  150. bool no_error_if_files_missing,
  151. const std::shared_ptr<IOTracer>& io_tracer,
  152. const ReadOptions& read_options, bool skip_load_table_files,
  153. bool allow_incomplete_valid_version,
  154. EpochNumberRequirement epoch_number_requirement =
  155. EpochNumberRequirement::kMustPresent);
  156. Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override;
  157. virtual Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd);
  158. Status OnColumnFamilyDrop(VersionEdit& edit, ColumnFamilyData** cfd);
  159. Status OnNonCfOperation(VersionEdit& edit, ColumnFamilyData** cfd);
  160. Status OnWalAddition(VersionEdit& edit);
  161. Status OnWalDeletion(VersionEdit& edit);
  162. Status Initialize() override;
  163. void CheckColumnFamilyId(const VersionEdit& edit, bool* do_not_open_cf,
  164. bool* cf_in_builders) const;
  165. void CheckIterationResult(const log::Reader& reader, Status* s) override;
  166. ColumnFamilyData* CreateCfAndInit(const ColumnFamilyOptions& cf_options,
  167. const VersionEdit& edit);
  168. virtual ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit);
  169. virtual Status MaybeCreateVersionBeforeApplyEdit(const VersionEdit& edit,
  170. ColumnFamilyData* cfd,
  171. bool force_create_version);
  172. virtual Status LoadTables(ColumnFamilyData* cfd,
  173. bool prefetch_index_and_filter_in_cache,
  174. bool is_initial_load);
  175. virtual bool MustOpenAllColumnFamilies() const {
  176. return !version_set_->unchanging();
  177. }
  178. const bool read_only_;
  179. std::vector<ColumnFamilyDescriptor> column_families_;
  180. VersionSet* version_set_;
  181. std::unordered_map<uint32_t, VersionBuilderUPtr> builders_;
  182. std::unordered_map<std::string, ColumnFamilyOptions> name_to_options_;
  183. const bool track_found_and_missing_files_;
  184. // Keeps track of column families in manifest that were not found in
  185. // column families parameters. Namely, the user asks to not open these column
  186. // families. In non read only mode, if those column families are not dropped
  187. // by subsequent manifest records, Recover() will return failure status.
  188. std::unordered_map<uint32_t, std::string> do_not_open_column_families_;
  189. VersionEditParams version_edit_params_;
  190. bool no_error_if_files_missing_;
  191. std::shared_ptr<IOTracer> io_tracer_;
  192. bool skip_load_table_files_;
  193. bool initialized_;
  194. std::unique_ptr<std::unordered_map<uint32_t, std::string>> cf_to_cmp_names_;
  195. // If false, only a complete Version for which all files consisting it can be
  196. // found is considered a valid Version. If true, besides complete Version, an
  197. // incomplete Version with only a suffix of L0 files missing is also
  198. // considered valid if the Version is never edited in an atomic group.
  199. const bool allow_incomplete_valid_version_;
  200. EpochNumberRequirement epoch_number_requirement_;
  201. std::unordered_set<uint32_t> cfds_to_mark_no_udt_;
  202. private:
  203. Status ExtractInfoFromVersionEdit(ColumnFamilyData* cfd,
  204. const VersionEdit& edit);
  205. // When `FileMetaData.user_defined_timestamps_persisted` is false and
  206. // user-defined timestamp size is non-zero. User-defined timestamps are
  207. // stripped from file boundaries: `smallest`, `largest` in
  208. // `VersionEdit.DecodeFrom` before they were written to Manifest.
  209. // This is the mirroring change to handle file boundaries on the Manifest read
  210. // path for this scenario: to pad a minimum timestamp to the user key in
  211. // `smallest` and `largest` so their format are consistent with the running
  212. // user comparator.
  213. Status MaybeHandleFileBoundariesForNewFiles(VersionEdit& edit,
  214. const ColumnFamilyData* cfd);
  215. };
  216. // A class similar to its base class, i.e. VersionEditHandler.
  217. // Unlike VersionEditHandler that only aims to build the end version, this class
  218. // supports building the most recent point in time version. A point in time
  219. // version is a version for which no files are missing, or if
  220. // `allow_incomplete_valid_version` is true, only a suffix of L0 files (and
  221. // their associated blob files) are missing.
  222. //
  223. // Building a point in time version when end version is not available can
  224. // be useful for best efforts recovery (options.best_efforts_recovery), which
  225. // uses this class and sets `allow_incomplete_valid_version` to true.
  226. // It's also useful for secondary instances/follower instances for which end
  227. // version could be transiently unavailable. These two cases use subclass
  228. // `ManifestTailer` and sets `allow_incomplete_valid_version` to false.
  229. //
  230. // Not thread-safe, external synchronization is necessary if an object of
  231. // VersionEditHandlerPointInTime is shared by multiple threads.
  232. class VersionEditHandlerPointInTime : public VersionEditHandler {
  233. public:
  234. VersionEditHandlerPointInTime(
  235. bool read_only, std::vector<ColumnFamilyDescriptor> column_families,
  236. VersionSet* version_set, const std::shared_ptr<IOTracer>& io_tracer,
  237. const ReadOptions& read_options, bool allow_incomplete_valid_version,
  238. EpochNumberRequirement epoch_number_requirement =
  239. EpochNumberRequirement::kMustPresent);
  240. ~VersionEditHandlerPointInTime() override;
  241. bool HasMissingFiles() const;
  242. virtual Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath,
  243. int level, const FileMetaData& fmeta) override;
  244. virtual Status VerifyBlobFile(ColumnFamilyData* cfd, uint64_t blob_file_num,
  245. const BlobFileAddition& blob_addition) override;
  246. protected:
  247. Status OnAtomicGroupReplayBegin() override;
  248. Status OnAtomicGroupReplayEnd() override;
  249. void CheckIterationResult(const log::Reader& reader, Status* s) override;
  250. ColumnFamilyData* DestroyCfAndCleanup(const VersionEdit& edit) override;
  251. // `MaybeCreateVersionBeforeApplyEdit(..., false)` creates a version upon a
  252. // negative edge trigger (transition from valid to invalid).
  253. //
  254. // `MaybeCreateVersionBeforeApplyEdit(..., true)` creates a version on a
  255. // positive level trigger (state is valid).
  256. Status MaybeCreateVersionBeforeApplyEdit(const VersionEdit& edit,
  257. ColumnFamilyData* cfd,
  258. bool force_create_version) override;
  259. Status LoadTables(ColumnFamilyData* cfd,
  260. bool prefetch_index_and_filter_in_cache,
  261. bool is_initial_load) override;
  262. std::unordered_map<uint32_t, Version*> versions_;
  263. // `atomic_update_versions_` is for ensuring all-or-nothing AtomicGroup
  264. // recoveries. When `atomic_update_versions_` is nonempty, it serves as a
  265. // barrier to updating `versions_` until all its values are populated.
  266. std::unordered_map<uint32_t, Version*> atomic_update_versions_;
  267. // `atomic_update_versions_missing_` counts the nullptr values in
  268. // `atomic_update_versions_`.
  269. size_t atomic_update_versions_missing_;
  270. bool in_atomic_group_ = false;
  271. private:
  272. bool AtomicUpdateVersionsCompleted();
  273. bool AtomicUpdateVersionsContains(uint32_t cfid);
  274. void AtomicUpdateVersionsDropCf(uint32_t cfid);
  275. // This function is called for `Version*` updates for column families in an
  276. // incomplete atomic update. It buffers `Version*` updates in
  277. // `atomic_update_versions_`.
  278. void AtomicUpdateVersionsPut(Version* version);
  279. // This function is called upon completion of an atomic update. It applies
  280. // `Version*` updates in `atomic_update_versions_` to `versions_`.
  281. void AtomicUpdateVersionsApply();
  282. };
  283. // A class similar to `VersionEditHandlerPointInTime` that parse MANIFEST and
  284. // builds point in time version.
  285. // `ManifestTailer` supports reading one MANIFEST file in multiple tailing
  286. // attempts and supports switching to a different MANIFEST after
  287. // `PrepareToReadNewManifest` is called. This class is used by secondary and
  288. // follower instance.
  289. class ManifestTailer : public VersionEditHandlerPointInTime {
  290. public:
  291. explicit ManifestTailer(std::vector<ColumnFamilyDescriptor> column_families,
  292. VersionSet* version_set,
  293. const std::shared_ptr<IOTracer>& io_tracer,
  294. const ReadOptions& read_options,
  295. EpochNumberRequirement epoch_number_requirement =
  296. EpochNumberRequirement::kMustPresent)
  297. : VersionEditHandlerPointInTime(
  298. /*read_only=*/true, column_families, version_set, io_tracer,
  299. read_options,
  300. /*allow_incomplete_valid_version=*/false, epoch_number_requirement),
  301. mode_(Mode::kRecovery) {}
  302. Status VerifyFile(ColumnFamilyData* cfd, const std::string& fpath, int level,
  303. const FileMetaData& fmeta) override;
  304. void PrepareToReadNewManifest() {
  305. initialized_ = false;
  306. ClearReadBuffer();
  307. }
  308. std::unordered_set<ColumnFamilyData*>& GetUpdatedColumnFamilies() {
  309. return cfds_changed_;
  310. }
  311. std::vector<std::string> GetAndClearIntermediateFiles();
  312. protected:
  313. Status Initialize() override;
  314. bool MustOpenAllColumnFamilies() const override { return false; }
  315. Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override;
  316. Status OnColumnFamilyAdd(VersionEdit& edit, ColumnFamilyData** cfd) override;
  317. void CheckIterationResult(const log::Reader& reader, Status* s) override;
  318. enum Mode : uint8_t {
  319. kRecovery = 0,
  320. kCatchUp = 1,
  321. };
  322. Mode mode_;
  323. std::unordered_set<ColumnFamilyData*> cfds_changed_;
  324. };
  325. class DumpManifestHandler : public VersionEditHandler {
  326. public:
  327. DumpManifestHandler(std::vector<ColumnFamilyDescriptor> column_families,
  328. VersionSet* version_set,
  329. const std::shared_ptr<IOTracer>& io_tracer,
  330. const ReadOptions& read_options, bool verbose, bool hex,
  331. bool json)
  332. : VersionEditHandler(
  333. /*read_only=*/true, column_families, version_set,
  334. /*track_found_and_missing_files=*/false,
  335. /*no_error_if_files_missing=*/false, io_tracer, read_options,
  336. /*skip_load_table_files=*/true,
  337. /*allow_incomplete_valid_version=*/false,
  338. /*epoch_number_requirement=*/EpochNumberRequirement::kMustPresent),
  339. verbose_(verbose),
  340. hex_(hex),
  341. json_(json),
  342. count_(0) {
  343. cf_to_cmp_names_.reset(new std::unordered_map<uint32_t, std::string>());
  344. }
  345. ~DumpManifestHandler() override {}
  346. Status ApplyVersionEdit(VersionEdit& edit, ColumnFamilyData** cfd) override {
  347. // Write out each individual edit
  348. if (json_) {
  349. // Print out DebugStrings. Can include non-terminating null characters.
  350. std::string edit_dump_str = edit.DebugJSON(count_, hex_);
  351. fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
  352. fwrite("\n", sizeof(char), 1, stdout);
  353. } else if (verbose_) {
  354. // Print out DebugStrings. Can include non-terminating null characters.
  355. std::string edit_dump_str = edit.DebugString(hex_);
  356. fwrite(edit_dump_str.data(), sizeof(char), edit_dump_str.size(), stdout);
  357. }
  358. ++count_;
  359. return VersionEditHandler::ApplyVersionEdit(edit, cfd);
  360. }
  361. void CheckIterationResult(const log::Reader& reader, Status* s) override;
  362. private:
  363. const bool verbose_;
  364. const bool hex_;
  365. const bool json_;
  366. int count_;
  367. };
  368. } // namespace ROCKSDB_NAMESPACE