import_column_family_job.h 3.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. // Copyright (c) Meta Platforms, Inc. and affiliates.
  2. //
  3. // This source code is licensed under both the GPLv2 (found in the
  4. // COPYING file in the root directory) and Apache 2.0 License
  5. // (found in the LICENSE.Apache file in the root directory).
  6. #pragma once
  7. #include <string>
  8. #include <unordered_set>
  9. #include <vector>
  10. #include "db/column_family.h"
  11. #include "db/external_sst_file_ingestion_job.h"
  12. #include "db/snapshot_impl.h"
  13. #include "options/db_options.h"
  14. #include "rocksdb/db.h"
  15. #include "rocksdb/metadata.h"
  16. #include "rocksdb/sst_file_writer.h"
  17. #include "util/autovector.h"
  18. namespace ROCKSDB_NAMESPACE {
  19. struct EnvOptions;
  20. class SystemClock;
  21. // Imports a set of sst files as is into a new column family. Logic is similar
  22. // to ExternalSstFileIngestionJob.
  23. class ImportColumnFamilyJob {
  24. // All file information of an imported CF, mainly used to
  25. // calculate whether there is overlap between CFs
  26. struct ColumnFamilyIngestFileInfo {
  27. // Smallest internal key in cf
  28. InternalKey smallest_internal_key;
  29. // Largest internal key in cf
  30. InternalKey largest_internal_key;
  31. };
  32. public:
  33. ImportColumnFamilyJob(
  34. VersionSet* versions, ColumnFamilyData* cfd,
  35. const ImmutableDBOptions& db_options, const EnvOptions& env_options,
  36. const ImportColumnFamilyOptions& import_options,
  37. const std::vector<std::vector<LiveFileMetaData*>>& metadatas,
  38. const std::shared_ptr<IOTracer>& io_tracer)
  39. : clock_(db_options.clock),
  40. versions_(versions),
  41. cfd_(cfd),
  42. db_options_(db_options),
  43. fs_(db_options_.fs, io_tracer),
  44. env_options_(env_options),
  45. import_options_(import_options),
  46. metadatas_(metadatas),
  47. io_tracer_(io_tracer) {}
  48. // Prepare the job by copying external files into the DB.
  49. Status Prepare(uint64_t next_file_number, SuperVersion* sv);
  50. // Will execute the import job and prepare edit() to be applied.
  51. // REQUIRES: Mutex held
  52. Status Run();
  53. // Cleanup after successful/failed job
  54. void Cleanup(const Status& status);
  55. VersionEdit* edit() { return &edit_; }
  56. const std::vector<std::vector<IngestedFileInfo>>& files_to_import() const {
  57. return files_to_import_;
  58. }
  59. private:
  60. // Open the external file and populate `file_to_import` with all the
  61. // external information we need to import this file.
  62. Status GetIngestedFileInfo(const std::string& external_file,
  63. uint64_t new_file_number, SuperVersion* sv,
  64. const LiveFileMetaData& file_meta,
  65. IngestedFileInfo* file_to_import);
  66. SystemClock* clock_;
  67. VersionSet* versions_;
  68. ColumnFamilyData* cfd_;
  69. const ImmutableDBOptions& db_options_;
  70. const FileSystemPtr fs_;
  71. const EnvOptions& env_options_;
  72. std::vector<std::vector<IngestedFileInfo>> files_to_import_;
  73. VersionEdit edit_;
  74. const ImportColumnFamilyOptions& import_options_;
  75. const std::vector<std::vector<LiveFileMetaData*>> metadatas_;
  76. const std::shared_ptr<IOTracer> io_tracer_;
  77. };
  78. } // namespace ROCKSDB_NAMESPACE