flush_job.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <atomic>
  11. #include <deque>
  12. #include <limits>
  13. #include <list>
  14. #include <set>
  15. #include <string>
  16. #include <utility>
  17. #include <vector>
  18. #include "db/column_family.h"
  19. #include "db/dbformat.h"
  20. #include "db/flush_scheduler.h"
  21. #include "db/internal_stats.h"
  22. #include "db/job_context.h"
  23. #include "db/log_writer.h"
  24. #include "db/logs_with_prep_tracker.h"
  25. #include "db/memtable_list.h"
  26. #include "db/snapshot_impl.h"
  27. #include "db/version_edit.h"
  28. #include "db/write_controller.h"
  29. #include "db/write_thread.h"
  30. #include "logging/event_logger.h"
  31. #include "monitoring/instrumented_mutex.h"
  32. #include "options/db_options.h"
  33. #include "port/port.h"
  34. #include "rocksdb/db.h"
  35. #include "rocksdb/env.h"
  36. #include "rocksdb/listener.h"
  37. #include "rocksdb/memtablerep.h"
  38. #include "rocksdb/transaction_log.h"
  39. #include "table/scoped_arena_iterator.h"
  40. #include "util/autovector.h"
  41. #include "util/stop_watch.h"
  42. #include "util/thread_local.h"
  43. namespace ROCKSDB_NAMESPACE {
  44. class DBImpl;
  45. class MemTable;
  46. class SnapshotChecker;
  47. class TableCache;
  48. class Version;
  49. class VersionEdit;
  50. class VersionSet;
  51. class Arena;
  52. class FlushJob {
  53. public:
  54. // TODO(icanadi) make effort to reduce number of parameters here
  55. // IMPORTANT: mutable_cf_options needs to be alive while FlushJob is alive
  56. FlushJob(const std::string& dbname, ColumnFamilyData* cfd,
  57. const ImmutableDBOptions& db_options,
  58. const MutableCFOptions& mutable_cf_options,
  59. const uint64_t* max_memtable_id, const FileOptions& file_options,
  60. VersionSet* versions, InstrumentedMutex* db_mutex,
  61. std::atomic<bool>* shutting_down,
  62. std::vector<SequenceNumber> existing_snapshots,
  63. SequenceNumber earliest_write_conflict_snapshot,
  64. SnapshotChecker* snapshot_checker, JobContext* job_context,
  65. LogBuffer* log_buffer, Directory* db_directory,
  66. Directory* output_file_directory, CompressionType output_compression,
  67. Statistics* stats, EventLogger* event_logger, bool measure_io_stats,
  68. const bool sync_output_directory, const bool write_manifest,
  69. Env::Priority thread_pri);
  70. ~FlushJob();
  71. // Require db_mutex held.
  72. // Once PickMemTable() is called, either Run() or Cancel() has to be called.
  73. void PickMemTable();
  74. Status Run(LogsWithPrepTracker* prep_tracker = nullptr,
  75. FileMetaData* file_meta = nullptr);
  76. void Cancel();
  77. const autovector<MemTable*>& GetMemTables() const { return mems_; }
  78. #ifndef ROCKSDB_LITE
  79. std::list<std::unique_ptr<FlushJobInfo>>* GetCommittedFlushJobsInfo() {
  80. return &committed_flush_jobs_info_;
  81. }
  82. #endif // !ROCKSDB_LITE
  83. private:
  84. void ReportStartedFlush();
  85. void ReportFlushInputSize(const autovector<MemTable*>& mems);
  86. void RecordFlushIOStats();
  87. Status WriteLevel0Table();
  88. #ifndef ROCKSDB_LITE
  89. std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const;
  90. #endif // !ROCKSDB_LITE
  91. const std::string& dbname_;
  92. ColumnFamilyData* cfd_;
  93. const ImmutableDBOptions& db_options_;
  94. const MutableCFOptions& mutable_cf_options_;
  95. // Pointer to a variable storing the largest memtable id to flush in this
  96. // flush job. RocksDB uses this variable to select the memtables to flush in
  97. // this job. All memtables in this column family with an ID smaller than or
  98. // equal to *max_memtable_id_ will be selected for flush. If null, then all
  99. // memtables in the column family will be selected.
  100. const uint64_t* max_memtable_id_;
  101. const FileOptions file_options_;
  102. VersionSet* versions_;
  103. InstrumentedMutex* db_mutex_;
  104. std::atomic<bool>* shutting_down_;
  105. std::vector<SequenceNumber> existing_snapshots_;
  106. SequenceNumber earliest_write_conflict_snapshot_;
  107. SnapshotChecker* snapshot_checker_;
  108. JobContext* job_context_;
  109. LogBuffer* log_buffer_;
  110. Directory* db_directory_;
  111. Directory* output_file_directory_;
  112. CompressionType output_compression_;
  113. Statistics* stats_;
  114. EventLogger* event_logger_;
  115. TableProperties table_properties_;
  116. bool measure_io_stats_;
  117. // True if this flush job should call fsync on the output directory. False
  118. // otherwise.
  119. // Usually sync_output_directory_ is true. A flush job needs to call sync on
  120. // the output directory before committing to the MANIFEST.
  121. // However, an individual flush job does not have to call sync on the output
  122. // directory if it is part of an atomic flush. After all flush jobs in the
  123. // atomic flush succeed, call sync once on each distinct output directory.
  124. const bool sync_output_directory_;
  125. // True if this flush job should write to MANIFEST after successfully
  126. // flushing memtables. False otherwise.
  127. // Usually write_manifest_ is true. A flush job commits to the MANIFEST after
  128. // flushing the memtables.
  129. // However, an individual flush job cannot rashly write to the MANIFEST
  130. // immediately after it finishes the flush if it is part of an atomic flush.
  131. // In this case, only after all flush jobs succeed in flush can RocksDB
  132. // commit to the MANIFEST.
  133. const bool write_manifest_;
  134. // The current flush job can commit flush result of a concurrent flush job.
  135. // We collect FlushJobInfo of all jobs committed by current job and fire
  136. // OnFlushCompleted for them.
  137. std::list<std::unique_ptr<FlushJobInfo>> committed_flush_jobs_info_;
  138. // Variables below are set by PickMemTable():
  139. FileMetaData meta_;
  140. autovector<MemTable*> mems_;
  141. VersionEdit* edit_;
  142. Version* base_;
  143. bool pick_memtable_called;
  144. Env::Priority thread_pri_;
  145. };
  146. } // namespace ROCKSDB_NAMESPACE