| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2012 Facebook.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file.
- #include "utilities/checkpoint/checkpoint_impl.h"
- #include <algorithm>
- #include <cinttypes>
- #include <string>
- #include <tuple>
- #include <unordered_set>
- #include <vector>
- #include "db/wal_manager.h"
- #include "file/file_util.h"
- #include "file/filename.h"
- #include "logging/logging.h"
- #include "port/port.h"
- #include "rocksdb/db.h"
- #include "rocksdb/env.h"
- #include "rocksdb/metadata.h"
- #include "rocksdb/options.h"
- #include "rocksdb/transaction_log.h"
- #include "rocksdb/types.h"
- #include "rocksdb/utilities/checkpoint.h"
- #include "test_util/sync_point.h"
- #include "util/cast_util.h"
- #include "util/file_checksum_helper.h"
- namespace ROCKSDB_NAMESPACE {
- Status Checkpoint::Create(DB* db, Checkpoint** checkpoint_ptr) {
- *checkpoint_ptr = new CheckpointImpl(db);
- return Status::OK();
- }
- Status Checkpoint::CreateCheckpoint(const std::string& /*checkpoint_dir*/,
- uint64_t /*log_size_for_flush*/,
- uint64_t* /*sequence_number_ptr*/) {
- return Status::NotSupported("");
- }
- Status CheckpointImpl::CleanStagingDirectory(
- const std::string& full_private_path, Logger* info_log) {
- std::vector<std::string> subchildren;
- Status s = db_->GetEnv()->FileExists(full_private_path);
- if (s.IsNotFound()) {
- // Nothing to clean
- return Status::OK();
- } else if (!s.ok()) {
- return s;
- }
- assert(s.ok());
- ROCKS_LOG_INFO(info_log, "File exists %s -- %s", full_private_path.c_str(),
- s.ToString().c_str());
- s = db_->GetEnv()->GetChildren(full_private_path, &subchildren);
- if (s.ok()) {
- for (auto& subchild : subchildren) {
- Status del_s;
- std::string subchild_path = full_private_path + "/" + subchild;
- del_s = db_->GetEnv()->DeleteFile(subchild_path);
- ROCKS_LOG_INFO(info_log, "Delete file %s -- %s", subchild_path.c_str(),
- del_s.ToString().c_str());
- if (!del_s.ok() && s.ok()) {
- s = del_s;
- }
- }
- }
- // Then delete the private dir
- if (s.ok()) {
- s = db_->GetEnv()->DeleteDir(full_private_path);
- ROCKS_LOG_INFO(info_log, "Delete dir %s -- %s", full_private_path.c_str(),
- s.ToString().c_str());
- }
- return s;
- }
- Status Checkpoint::ExportColumnFamily(
- ColumnFamilyHandle* /*handle*/, const std::string& /*export_dir*/,
- ExportImportFilesMetaData** /*metadata*/) {
- return Status::NotSupported("");
- }
- // Builds an openable snapshot of RocksDB
- Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir,
- uint64_t log_size_for_flush,
- uint64_t* sequence_number_ptr) {
- DBOptions db_options = db_->GetDBOptions();
- Status file_exists_s = db_->GetEnv()->FileExists(checkpoint_dir);
- if (file_exists_s.ok()) {
- return Status::InvalidArgument("Directory exists");
- } else if (!file_exists_s.IsNotFound()) {
- assert(file_exists_s.IsIOError());
- return file_exists_s;
- } else {
- assert(file_exists_s.IsNotFound());
- };
- Status s;
- ROCKS_LOG_INFO(
- db_options.info_log,
- "Started the snapshot process -- creating snapshot in directory %s",
- checkpoint_dir.c_str());
- size_t final_nonslash_idx = checkpoint_dir.find_last_not_of('/');
- if (final_nonslash_idx == std::string::npos) {
- // npos means it's only slashes or empty. Non-empty means it's the root
- // directory, but it shouldn't be because we verified above the directory
- // doesn't exist.
- assert(checkpoint_dir.empty());
- s.PermitUncheckedError();
- return Status::InvalidArgument("invalid checkpoint directory name");
- }
- std::string full_private_path =
- checkpoint_dir.substr(0, final_nonslash_idx + 1) + ".tmp";
- ROCKS_LOG_INFO(db_options.info_log,
- "Snapshot process -- using temporary directory %s",
- full_private_path.c_str());
- s = CleanStagingDirectory(full_private_path, db_options.info_log.get());
- if (!s.ok()) {
- return Status::Aborted(
- "Failed to clean the temporary directory " + full_private_path +
- " needed before checkpoint creation : " + s.ToString());
- }
- // create snapshot directory
- s = db_->GetEnv()->CreateDir(full_private_path);
- uint64_t sequence_number = 0;
- if (s.ok()) {
- // enable file deletions
- s = db_->DisableFileDeletions();
- const bool disabled_file_deletions = s.ok();
- if (s.ok() || s.IsNotSupported()) {
- s = CreateCustomCheckpoint(
- [&](const std::string& src_dirname, const std::string& fname,
- FileType) {
- ROCKS_LOG_INFO(db_options.info_log, "Hard Linking %s",
- fname.c_str());
- return db_->GetFileSystem()->LinkFile(
- src_dirname + "/" + fname, full_private_path + "/" + fname,
- IOOptions(), nullptr);
- } /* link_file_cb */,
- [&](const std::string& src_dirname, const std::string& fname,
- uint64_t size_limit_bytes, FileType,
- const std::string& /* checksum_func_name */,
- const std::string& /* checksum_val */,
- const Temperature temperature) {
- ROCKS_LOG_INFO(db_options.info_log, "Copying %s", fname.c_str());
- return CopyFile(db_->GetFileSystem(), src_dirname + "/" + fname,
- temperature, full_private_path + "/" + fname,
- temperature, size_limit_bytes, db_options.use_fsync,
- nullptr);
- } /* copy_file_cb */,
- [&](const std::string& fname, const std::string& contents, FileType) {
- ROCKS_LOG_INFO(db_options.info_log, "Creating %s", fname.c_str());
- return CreateFile(db_->GetFileSystem(),
- full_private_path + "/" + fname, contents,
- db_options.use_fsync);
- } /* create_file_cb */,
- &sequence_number, log_size_for_flush);
- // we copied all the files, enable file deletions
- if (disabled_file_deletions) {
- Status ss = db_->EnableFileDeletions();
- assert(ss.ok());
- ss.PermitUncheckedError();
- }
- }
- }
- if (s.ok()) {
- // move tmp private backup to real snapshot directory
- s = db_->GetEnv()->RenameFile(full_private_path, checkpoint_dir);
- }
- if (s.ok()) {
- std::unique_ptr<FSDirectory> checkpoint_directory;
- s = db_->GetFileSystem()->NewDirectory(checkpoint_dir, IOOptions(),
- &checkpoint_directory, nullptr);
- if (s.ok() && checkpoint_directory != nullptr) {
- s = checkpoint_directory->FsyncWithDirOptions(
- IOOptions(), nullptr,
- DirFsyncOptions(DirFsyncOptions::FsyncReason::kDirRenamed));
- }
- }
- if (s.ok()) {
- if (sequence_number_ptr != nullptr) {
- *sequence_number_ptr = sequence_number;
- }
- // here we know that we succeeded and installed the new snapshot
- ROCKS_LOG_INFO(db_options.info_log, "Snapshot DONE. All is good");
- ROCKS_LOG_INFO(db_options.info_log, "Snapshot sequence number: %" PRIu64,
- sequence_number);
- } else {
- ROCKS_LOG_INFO(db_options.info_log, "Snapshot failed -- %s",
- s.ToString().c_str());
- // clean all the files and directory we might have created
- Status del_s =
- CleanStagingDirectory(full_private_path, db_options.info_log.get());
- ROCKS_LOG_INFO(db_options.info_log,
- "Clean files or directory we might have created %s: %s",
- full_private_path.c_str(), del_s.ToString().c_str());
- del_s.PermitUncheckedError();
- }
- return s;
- }
- Status CheckpointImpl::CreateCustomCheckpoint(
- std::function<Status(const std::string& src_dirname,
- const std::string& src_fname, FileType type)>
- link_file_cb,
- std::function<
- Status(const std::string& src_dirname, const std::string& src_fname,
- uint64_t size_limit_bytes, FileType type,
- const std::string& checksum_func_name,
- const std::string& checksum_val, const Temperature temperature)>
- copy_file_cb,
- std::function<Status(const std::string& fname, const std::string& contents,
- FileType type)>
- create_file_cb,
- uint64_t* sequence_number, uint64_t log_size_for_flush,
- bool get_live_table_checksum) {
- *sequence_number = db_->GetLatestSequenceNumber();
- LiveFilesStorageInfoOptions opts;
- opts.include_checksum_info = get_live_table_checksum;
- opts.wal_size_for_flush = log_size_for_flush;
- std::vector<LiveFileStorageInfo> infos;
- {
- Status s = db_->GetLiveFilesStorageInfo(opts, &infos);
- if (!s.ok()) {
- return s;
- }
- }
- // Verify that everything except WAL files are in same directory
- // (db_paths / cf_paths not supported)
- std::unordered_set<std::string> dirs;
- for (auto& info : infos) {
- if (info.file_type != kWalFile) {
- dirs.insert(info.directory);
- }
- }
- if (dirs.size() > 1) {
- return Status::NotSupported(
- "db_paths / cf_paths not supported for Checkpoint nor BackupEngine");
- }
- bool same_fs = true;
- for (auto& info : infos) {
- Status s;
- if (!info.replacement_contents.empty()) {
- // Currently should only be used for CURRENT file.
- assert(info.file_type == kCurrentFile);
- if (info.size != info.replacement_contents.size()) {
- s = Status::Corruption("Inconsistent size metadata for " +
- info.relative_filename);
- } else {
- s = create_file_cb(info.relative_filename, info.replacement_contents,
- info.file_type);
- }
- } else {
- if (same_fs && !info.trim_to_size) {
- s = link_file_cb(info.directory, info.relative_filename,
- info.file_type);
- if (s.IsNotSupported()) {
- same_fs = false;
- s = Status::OK();
- }
- s.MustCheck();
- }
- if (!same_fs || info.trim_to_size) {
- assert(info.file_checksum_func_name.empty() ==
- !opts.include_checksum_info);
- // no assertion on file_checksum because empty is used for both "not
- // set" and "unknown"
- if (opts.include_checksum_info) {
- s = copy_file_cb(info.directory, info.relative_filename, info.size,
- info.file_type, info.file_checksum_func_name,
- info.file_checksum, info.temperature);
- } else {
- s = copy_file_cb(info.directory, info.relative_filename, info.size,
- info.file_type, kUnknownFileChecksumFuncName,
- kUnknownFileChecksum, info.temperature);
- }
- }
- }
- if (!s.ok()) {
- return s;
- }
- }
- return Status::OK();
- }
- // Exports all live SST files of a specified Column Family onto export_dir,
- // returning SST files information in metadata.
- Status CheckpointImpl::ExportColumnFamily(
- ColumnFamilyHandle* handle, const std::string& export_dir,
- ExportImportFilesMetaData** metadata) {
- auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handle);
- const auto cf_name = cfh->GetName();
- const auto db_options = db_->GetDBOptions();
- assert(metadata != nullptr);
- assert(*metadata == nullptr);
- auto s = db_->GetEnv()->FileExists(export_dir);
- if (s.ok()) {
- return Status::InvalidArgument("Specified export_dir exists");
- } else if (!s.IsNotFound()) {
- assert(s.IsIOError());
- return s;
- }
- const auto final_nonslash_idx = export_dir.find_last_not_of('/');
- if (final_nonslash_idx == std::string::npos) {
- return Status::InvalidArgument("Specified export_dir invalid");
- }
- ROCKS_LOG_INFO(db_options.info_log,
- "[%s] export column family onto export directory %s",
- cf_name.c_str(), export_dir.c_str());
- // Create a temporary export directory.
- const auto tmp_export_dir =
- export_dir.substr(0, final_nonslash_idx + 1) + ".tmp";
- s = db_->GetEnv()->CreateDir(tmp_export_dir);
- if (s.ok()) {
- // FIXME: should respect atomic_flush and flush all CFs if needed.
- s = db_->Flush(ROCKSDB_NAMESPACE::FlushOptions(), handle);
- }
- ColumnFamilyMetaData db_metadata;
- if (s.ok()) {
- // Export live sst files with file deletions disabled.
- s = db_->DisableFileDeletions();
- if (s.ok()) {
- db_->GetColumnFamilyMetaData(handle, &db_metadata);
- s = ExportFilesInMetaData(
- db_options, db_metadata,
- [&](const std::string& src_dirname, const std::string& fname) {
- ROCKS_LOG_INFO(db_options.info_log, "[%s] HardLinking %s",
- cf_name.c_str(), fname.c_str());
- return db_->GetEnv()->LinkFile(src_dirname + fname,
- tmp_export_dir + fname);
- } /*link_file_cb*/,
- [&](const std::string& src_dirname, const std::string& fname) {
- ROCKS_LOG_INFO(db_options.info_log, "[%s] Copying %s",
- cf_name.c_str(), fname.c_str());
- // FIXME: temperature handling
- return CopyFile(db_->GetFileSystem(), src_dirname + fname,
- Temperature::kUnknown, tmp_export_dir + fname,
- Temperature::kUnknown, 0, db_options.use_fsync,
- nullptr);
- } /*copy_file_cb*/);
- const auto enable_status = db_->EnableFileDeletions();
- if (s.ok()) {
- s = enable_status;
- }
- }
- }
- auto moved_to_user_specified_dir = false;
- if (s.ok()) {
- // Move temporary export directory to the actual export directory.
- s = db_->GetEnv()->RenameFile(tmp_export_dir, export_dir);
- }
- if (s.ok()) {
- // Fsync export directory.
- moved_to_user_specified_dir = true;
- std::unique_ptr<FSDirectory> dir_ptr;
- s = db_->GetFileSystem()->NewDirectory(export_dir, IOOptions(), &dir_ptr,
- nullptr);
- if (s.ok()) {
- assert(dir_ptr != nullptr);
- s = dir_ptr->FsyncWithDirOptions(
- IOOptions(), nullptr,
- DirFsyncOptions(DirFsyncOptions::FsyncReason::kDirRenamed));
- }
- }
- if (s.ok()) {
- // Export of files succeeded. Fill in the metadata information.
- auto result_metadata = new ExportImportFilesMetaData();
- result_metadata->db_comparator_name = handle->GetComparator()->Name();
- for (const auto& level_metadata : db_metadata.levels) {
- for (const auto& file_metadata : level_metadata.files) {
- LiveFileMetaData live_file_metadata;
- live_file_metadata.size = file_metadata.size;
- live_file_metadata.name = file_metadata.name;
- live_file_metadata.file_number = file_metadata.file_number;
- live_file_metadata.db_path = export_dir;
- live_file_metadata.smallest_seqno = file_metadata.smallest_seqno;
- live_file_metadata.largest_seqno = file_metadata.largest_seqno;
- live_file_metadata.smallestkey = file_metadata.smallestkey;
- live_file_metadata.largestkey = file_metadata.largestkey;
- live_file_metadata.oldest_blob_file_number =
- file_metadata.oldest_blob_file_number;
- live_file_metadata.epoch_number = file_metadata.epoch_number;
- live_file_metadata.level = level_metadata.level;
- live_file_metadata.smallest = file_metadata.smallest;
- live_file_metadata.largest = file_metadata.largest;
- result_metadata->files.push_back(live_file_metadata);
- }
- *metadata = result_metadata;
- }
- ROCKS_LOG_INFO(db_options.info_log, "[%s] Export succeeded.",
- cf_name.c_str());
- } else {
- // Failure: Clean up all the files/directories created.
- ROCKS_LOG_INFO(db_options.info_log, "[%s] Export failed. %s",
- cf_name.c_str(), s.ToString().c_str());
- std::vector<std::string> subchildren;
- const auto cleanup_dir =
- moved_to_user_specified_dir ? export_dir : tmp_export_dir;
- db_->GetEnv()->GetChildren(cleanup_dir, &subchildren);
- for (const auto& subchild : subchildren) {
- const auto subchild_path = cleanup_dir + "/" + subchild;
- const auto status = db_->GetEnv()->DeleteFile(subchild_path);
- if (!status.ok()) {
- ROCKS_LOG_WARN(db_options.info_log, "Failed to cleanup file %s: %s",
- subchild_path.c_str(), status.ToString().c_str());
- }
- }
- const auto status = db_->GetEnv()->DeleteDir(cleanup_dir);
- if (!status.ok()) {
- ROCKS_LOG_WARN(db_options.info_log, "Failed to cleanup dir %s: %s",
- cleanup_dir.c_str(), status.ToString().c_str());
- }
- }
- return s;
- }
- Status CheckpointImpl::ExportFilesInMetaData(
- const DBOptions& db_options, const ColumnFamilyMetaData& metadata,
- std::function<Status(const std::string& src_dirname,
- const std::string& src_fname)>
- link_file_cb,
- std::function<Status(const std::string& src_dirname,
- const std::string& src_fname)>
- copy_file_cb) {
- Status s;
- auto hardlink_file = true;
- // Copy/hard link files in metadata.
- size_t num_files = 0;
- for (const auto& level_metadata : metadata.levels) {
- for (const auto& file_metadata : level_metadata.files) {
- uint64_t number;
- FileType type;
- const auto ok = ParseFileName(file_metadata.name, &number, &type);
- if (!ok) {
- s = Status::Corruption("Could not parse file name");
- break;
- }
- // We should only get sst files here.
- assert(type == kTableFile);
- assert(file_metadata.size > 0 && file_metadata.name[0] == '/');
- const auto src_fname = file_metadata.name;
- ++num_files;
- if (hardlink_file) {
- s = link_file_cb(db_->GetName(), src_fname);
- if (num_files == 1 && s.IsNotSupported()) {
- // Fallback to copy if link failed due to cross-device directories.
- hardlink_file = false;
- s = Status::OK();
- }
- }
- if (!hardlink_file) {
- s = copy_file_cb(db_->GetName(), src_fname);
- }
- if (!s.ok()) {
- break;
- }
- }
- }
- ROCKS_LOG_INFO(db_options.info_log, "Number of table files %" ROCKSDB_PRIszt,
- num_files);
- return s;
- }
- } // namespace ROCKSDB_NAMESPACE
|