| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #include "db/wal_manager.h"
- #include <algorithm>
- #include <cinttypes>
- #include <memory>
- #include <vector>
- #include "db/log_reader.h"
- #include "db/log_writer.h"
- #include "db/transaction_log_impl.h"
- #include "db/write_batch_internal.h"
- #include "file/file_util.h"
- #include "file/filename.h"
- #include "file/sequence_file_reader.h"
- #include "logging/logging.h"
- #include "port/port.h"
- #include "rocksdb/env.h"
- #include "rocksdb/options.h"
- #include "rocksdb/write_batch.h"
- #include "test_util/sync_point.h"
- #include "util/cast_util.h"
- #include "util/coding.h"
- #include "util/mutexlock.h"
- #include "util/string_util.h"
- namespace ROCKSDB_NAMESPACE {
- #ifndef ROCKSDB_LITE
- Status WalManager::DeleteFile(const std::string& fname, uint64_t number) {
- auto s = env_->DeleteFile(db_options_.wal_dir + "/" + fname);
- if (s.ok()) {
- MutexLock l(&read_first_record_cache_mutex_);
- read_first_record_cache_.erase(number);
- }
- return s;
- }
- Status WalManager::GetSortedWalFiles(VectorLogPtr& files) {
- // First get sorted files in db dir, then get sorted files from archived
- // dir, to avoid a race condition where a log file is moved to archived
- // dir in between.
- Status s;
- // list wal files in main db dir.
- VectorLogPtr logs;
- s = GetSortedWalsOfType(db_options_.wal_dir, logs, kAliveLogFile);
- if (!s.ok()) {
- return s;
- }
- // Reproduce the race condition where a log file is moved
- // to archived dir, between these two sync points, used in
- // (DBTest,TransactionLogIteratorRace)
- TEST_SYNC_POINT("WalManager::GetSortedWalFiles:1");
- TEST_SYNC_POINT("WalManager::GetSortedWalFiles:2");
- files.clear();
- // list wal files in archive dir.
- std::string archivedir = ArchivalDirectory(db_options_.wal_dir);
- Status exists = env_->FileExists(archivedir);
- if (exists.ok()) {
- s = GetSortedWalsOfType(archivedir, files, kArchivedLogFile);
- if (!s.ok()) {
- return s;
- }
- } else if (!exists.IsNotFound()) {
- assert(s.IsIOError());
- return s;
- }
- uint64_t latest_archived_log_number = 0;
- if (!files.empty()) {
- latest_archived_log_number = files.back()->LogNumber();
- ROCKS_LOG_INFO(db_options_.info_log, "Latest Archived log: %" PRIu64,
- latest_archived_log_number);
- }
- files.reserve(files.size() + logs.size());
- for (auto& log : logs) {
- if (log->LogNumber() > latest_archived_log_number) {
- files.push_back(std::move(log));
- } else {
- // When the race condition happens, we could see the
- // same log in both db dir and archived dir. Simply
- // ignore the one in db dir. Note that, if we read
- // archived dir first, we would have missed the log file.
- ROCKS_LOG_WARN(db_options_.info_log, "%s already moved to archive",
- log->PathName().c_str());
- }
- }
- return s;
- }
- Status WalManager::GetUpdatesSince(
- SequenceNumber seq, std::unique_ptr<TransactionLogIterator>* iter,
- const TransactionLogIterator::ReadOptions& read_options,
- VersionSet* version_set) {
- // Get all sorted Wal Files.
- // Do binary search and open files and find the seq number.
- std::unique_ptr<VectorLogPtr> wal_files(new VectorLogPtr);
- Status s = GetSortedWalFiles(*wal_files);
- if (!s.ok()) {
- return s;
- }
- s = RetainProbableWalFiles(*wal_files, seq);
- if (!s.ok()) {
- return s;
- }
- iter->reset(new TransactionLogIteratorImpl(
- db_options_.wal_dir, &db_options_, read_options, file_options_, seq,
- std::move(wal_files), version_set, seq_per_batch_));
- return (*iter)->status();
- }
- // 1. Go through all archived files and
- // a. if ttl is enabled, delete outdated files
- // b. if archive size limit is enabled, delete empty files,
- // compute file number and size.
- // 2. If size limit is enabled:
- // a. compute how many files should be deleted
- // b. get sorted non-empty archived logs
- // c. delete what should be deleted
- void WalManager::PurgeObsoleteWALFiles() {
- bool const ttl_enabled = db_options_.wal_ttl_seconds > 0;
- bool const size_limit_enabled = db_options_.wal_size_limit_mb > 0;
- if (!ttl_enabled && !size_limit_enabled) {
- return;
- }
- int64_t current_time;
- Status s = env_->GetCurrentTime(¤t_time);
- if (!s.ok()) {
- ROCKS_LOG_ERROR(db_options_.info_log, "Can't get current time: %s",
- s.ToString().c_str());
- assert(false);
- return;
- }
- uint64_t const now_seconds = static_cast<uint64_t>(current_time);
- uint64_t const time_to_check = (ttl_enabled && !size_limit_enabled)
- ? db_options_.wal_ttl_seconds / 2
- : kDefaultIntervalToDeleteObsoleteWAL;
- if (purge_wal_files_last_run_ + time_to_check > now_seconds) {
- return;
- }
- purge_wal_files_last_run_ = now_seconds;
- std::string archival_dir = ArchivalDirectory(db_options_.wal_dir);
- std::vector<std::string> files;
- s = env_->GetChildren(archival_dir, &files);
- if (!s.ok()) {
- ROCKS_LOG_ERROR(db_options_.info_log, "Can't get archive files: %s",
- s.ToString().c_str());
- assert(false);
- return;
- }
- size_t log_files_num = 0;
- uint64_t log_file_size = 0;
- for (auto& f : files) {
- uint64_t number;
- FileType type;
- if (ParseFileName(f, &number, &type) && type == kLogFile) {
- std::string const file_path = archival_dir + "/" + f;
- if (ttl_enabled) {
- uint64_t file_m_time;
- s = env_->GetFileModificationTime(file_path, &file_m_time);
- if (!s.ok()) {
- ROCKS_LOG_WARN(db_options_.info_log,
- "Can't get file mod time: %s: %s", file_path.c_str(),
- s.ToString().c_str());
- continue;
- }
- if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) {
- s = DeleteDBFile(&db_options_, file_path, archival_dir, false,
- /*force_fg=*/!wal_in_db_path_);
- if (!s.ok()) {
- ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s",
- file_path.c_str(), s.ToString().c_str());
- continue;
- } else {
- MutexLock l(&read_first_record_cache_mutex_);
- read_first_record_cache_.erase(number);
- }
- continue;
- }
- }
- if (size_limit_enabled) {
- uint64_t file_size;
- s = env_->GetFileSize(file_path, &file_size);
- if (!s.ok()) {
- ROCKS_LOG_ERROR(db_options_.info_log,
- "Unable to get file size: %s: %s", file_path.c_str(),
- s.ToString().c_str());
- return;
- } else {
- if (file_size > 0) {
- log_file_size = std::max(log_file_size, file_size);
- ++log_files_num;
- } else {
- s = DeleteDBFile(&db_options_, file_path, archival_dir, false,
- /*force_fg=*/!wal_in_db_path_);
- if (!s.ok()) {
- ROCKS_LOG_WARN(db_options_.info_log,
- "Unable to delete file: %s: %s", file_path.c_str(),
- s.ToString().c_str());
- continue;
- } else {
- MutexLock l(&read_first_record_cache_mutex_);
- read_first_record_cache_.erase(number);
- }
- }
- }
- }
- }
- }
- if (0 == log_files_num || !size_limit_enabled) {
- return;
- }
- size_t const files_keep_num =
- static_cast<size_t>(db_options_.wal_size_limit_mb * 1024 * 1024 / log_file_size);
- if (log_files_num <= files_keep_num) {
- return;
- }
- size_t files_del_num = log_files_num - files_keep_num;
- VectorLogPtr archived_logs;
- GetSortedWalsOfType(archival_dir, archived_logs, kArchivedLogFile);
- if (files_del_num > archived_logs.size()) {
- ROCKS_LOG_WARN(db_options_.info_log,
- "Trying to delete more archived log files than "
- "exist. Deleting all");
- files_del_num = archived_logs.size();
- }
- for (size_t i = 0; i < files_del_num; ++i) {
- std::string const file_path = archived_logs[i]->PathName();
- s = DeleteDBFile(&db_options_, db_options_.wal_dir + "/" + file_path,
- db_options_.wal_dir, false,
- /*force_fg=*/!wal_in_db_path_);
- if (!s.ok()) {
- ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s",
- file_path.c_str(), s.ToString().c_str());
- continue;
- } else {
- MutexLock l(&read_first_record_cache_mutex_);
- read_first_record_cache_.erase(archived_logs[i]->LogNumber());
- }
- }
- }
- void WalManager::ArchiveWALFile(const std::string& fname, uint64_t number) {
- auto archived_log_name = ArchivedLogFileName(db_options_.wal_dir, number);
- // The sync point below is used in (DBTest,TransactionLogIteratorRace)
- TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:1");
- Status s = env_->RenameFile(fname, archived_log_name);
- // The sync point below is used in (DBTest,TransactionLogIteratorRace)
- TEST_SYNC_POINT("WalManager::PurgeObsoleteFiles:2");
- ROCKS_LOG_INFO(db_options_.info_log, "Move log file %s to %s -- %s\n",
- fname.c_str(), archived_log_name.c_str(),
- s.ToString().c_str());
- }
- Status WalManager::GetSortedWalsOfType(const std::string& path,
- VectorLogPtr& log_files,
- WalFileType log_type) {
- std::vector<std::string> all_files;
- const Status status = env_->GetChildren(path, &all_files);
- if (!status.ok()) {
- return status;
- }
- log_files.reserve(all_files.size());
- for (const auto& f : all_files) {
- uint64_t number;
- FileType type;
- if (ParseFileName(f, &number, &type) && type == kLogFile) {
- SequenceNumber sequence;
- Status s = ReadFirstRecord(log_type, number, &sequence);
- if (!s.ok()) {
- return s;
- }
- if (sequence == 0) {
- // empty file
- continue;
- }
- // Reproduce the race condition where a log file is moved
- // to archived dir, between these two sync points, used in
- // (DBTest,TransactionLogIteratorRace)
- TEST_SYNC_POINT("WalManager::GetSortedWalsOfType:1");
- TEST_SYNC_POINT("WalManager::GetSortedWalsOfType:2");
- uint64_t size_bytes;
- s = env_->GetFileSize(LogFileName(path, number), &size_bytes);
- // re-try in case the alive log file has been moved to archive.
- if (!s.ok() && log_type == kAliveLogFile) {
- std::string archived_file = ArchivedLogFileName(path, number);
- if (env_->FileExists(archived_file).ok()) {
- s = env_->GetFileSize(archived_file, &size_bytes);
- if (!s.ok() && env_->FileExists(archived_file).IsNotFound()) {
- // oops, the file just got deleted from archived dir! move on
- s = Status::OK();
- continue;
- }
- }
- }
- if (!s.ok()) {
- return s;
- }
- log_files.push_back(std::unique_ptr<LogFile>(
- new LogFileImpl(number, log_type, sequence, size_bytes)));
- }
- }
- std::sort(
- log_files.begin(), log_files.end(),
- [](const std::unique_ptr<LogFile>& a, const std::unique_ptr<LogFile>& b) {
- LogFileImpl* a_impl =
- static_cast_with_check<LogFileImpl, LogFile>(a.get());
- LogFileImpl* b_impl =
- static_cast_with_check<LogFileImpl, LogFile>(b.get());
- return *a_impl < *b_impl;
- });
- return status;
- }
- Status WalManager::RetainProbableWalFiles(VectorLogPtr& all_logs,
- const SequenceNumber target) {
- int64_t start = 0; // signed to avoid overflow when target is < first file.
- int64_t end = static_cast<int64_t>(all_logs.size()) - 1;
- // Binary Search. avoid opening all files.
- while (end >= start) {
- int64_t mid = start + (end - start) / 2; // Avoid overflow.
- SequenceNumber current_seq_num = all_logs.at(static_cast<size_t>(mid))->StartSequence();
- if (current_seq_num == target) {
- end = mid;
- break;
- } else if (current_seq_num < target) {
- start = mid + 1;
- } else {
- end = mid - 1;
- }
- }
- // end could be -ve.
- size_t start_index = static_cast<size_t>(std::max(static_cast<int64_t>(0), end));
- // The last wal file is always included
- all_logs.erase(all_logs.begin(), all_logs.begin() + start_index);
- return Status::OK();
- }
- Status WalManager::ReadFirstRecord(const WalFileType type,
- const uint64_t number,
- SequenceNumber* sequence) {
- *sequence = 0;
- if (type != kAliveLogFile && type != kArchivedLogFile) {
- ROCKS_LOG_ERROR(db_options_.info_log, "[WalManger] Unknown file type %s",
- ToString(type).c_str());
- return Status::NotSupported(
- "File Type Not Known " + ToString(type));
- }
- {
- MutexLock l(&read_first_record_cache_mutex_);
- auto itr = read_first_record_cache_.find(number);
- if (itr != read_first_record_cache_.end()) {
- *sequence = itr->second;
- return Status::OK();
- }
- }
- Status s;
- if (type == kAliveLogFile) {
- std::string fname = LogFileName(db_options_.wal_dir, number);
- s = ReadFirstLine(fname, number, sequence);
- if (!s.ok() && env_->FileExists(fname).ok()) {
- // return any error that is not caused by non-existing file
- return s;
- }
- }
- if (type == kArchivedLogFile || !s.ok()) {
- // check if the file got moved to archive.
- std::string archived_file =
- ArchivedLogFileName(db_options_.wal_dir, number);
- s = ReadFirstLine(archived_file, number, sequence);
- // maybe the file was deleted from archive dir. If that's the case, return
- // Status::OK(). The caller with identify this as empty file because
- // *sequence == 0
- if (!s.ok() && env_->FileExists(archived_file).IsNotFound()) {
- return Status::OK();
- }
- }
- if (s.ok() && *sequence != 0) {
- MutexLock l(&read_first_record_cache_mutex_);
- read_first_record_cache_.insert({number, *sequence});
- }
- return s;
- }
- Status WalManager::GetLiveWalFile(uint64_t number,
- std::unique_ptr<LogFile>* log_file) {
- if (!log_file) {
- return Status::InvalidArgument("log_file not preallocated.");
- }
- if (!number) {
- return Status::PathNotFound("log file not available");
- }
- Status s;
- uint64_t size_bytes;
- s = env_->GetFileSize(LogFileName(db_options_.wal_dir, number), &size_bytes);
- if (!s.ok()) {
- return s;
- }
- log_file->reset(new LogFileImpl(number, kAliveLogFile,
- 0, // SequenceNumber
- size_bytes));
- return Status::OK();
- }
- // the function returns status.ok() and sequence == 0 if the file exists, but is
- // empty
- Status WalManager::ReadFirstLine(const std::string& fname,
- const uint64_t number,
- SequenceNumber* sequence) {
- struct LogReporter : public log::Reader::Reporter {
- Env* env;
- Logger* info_log;
- const char* fname;
- Status* status;
- bool ignore_error; // true if db_options_.paranoid_checks==false
- void Corruption(size_t bytes, const Status& s) override {
- ROCKS_LOG_WARN(info_log, "[WalManager] %s%s: dropping %d bytes; %s",
- (this->ignore_error ? "(ignoring error) " : ""), fname,
- static_cast<int>(bytes), s.ToString().c_str());
- if (this->status->ok()) {
- // only keep the first error
- *this->status = s;
- }
- }
- };
- std::unique_ptr<FSSequentialFile> file;
- Status status = fs_->NewSequentialFile(fname,
- fs_->OptimizeForLogRead(file_options_),
- &file, nullptr);
- std::unique_ptr<SequentialFileReader> file_reader(
- new SequentialFileReader(std::move(file), fname));
- if (!status.ok()) {
- return status;
- }
- LogReporter reporter;
- reporter.env = env_;
- reporter.info_log = db_options_.info_log.get();
- reporter.fname = fname.c_str();
- reporter.status = &status;
- reporter.ignore_error = !db_options_.paranoid_checks;
- log::Reader reader(db_options_.info_log, std::move(file_reader), &reporter,
- true /*checksum*/, number);
- std::string scratch;
- Slice record;
- if (reader.ReadRecord(&record, &scratch) &&
- (status.ok() || !db_options_.paranoid_checks)) {
- if (record.size() < WriteBatchInternal::kHeader) {
- reporter.Corruption(record.size(),
- Status::Corruption("log record too small"));
- // TODO read record's till the first no corrupt entry?
- } else {
- WriteBatch batch;
- WriteBatchInternal::SetContents(&batch, record);
- *sequence = WriteBatchInternal::Sequence(&batch);
- return Status::OK();
- }
- }
- // ReadRecord returns false on EOF, which means that the log file is empty. we
- // return status.ok() in that case and set sequence number to 0
- *sequence = 0;
- return status;
- }
- #endif // ROCKSDB_LITE
- } // namespace ROCKSDB_NAMESPACE
|