expected_state.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. // Copyright (c) 2021-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #ifdef GFLAGS
  6. #pragma once
  7. #include <stdint.h>
  8. #include <atomic>
  9. #include <memory>
  10. #include "db/dbformat.h"
  11. #include "db_stress_tool/expected_value.h"
  12. #include "file/file_util.h"
  13. #include "rocksdb/db.h"
  14. #include "rocksdb/env.h"
  15. #include "rocksdb/file_system.h"
  16. #include "rocksdb/rocksdb_namespace.h"
  17. #include "rocksdb/types.h"
  18. #include "util/string_util.h"
  19. namespace ROCKSDB_NAMESPACE {
  20. // `ExpectedState` provides read/write access to expected values stored in
  21. // `ExpectedState` for every key.
  22. class ExpectedState {
  23. public:
  24. explicit ExpectedState(size_t max_key, size_t num_column_families);
  25. virtual ~ExpectedState() {}
  26. // Requires external locking preventing concurrent execution with any other
  27. // member function.
  28. virtual Status Open(bool create) = 0;
  29. // Requires external locking covering all keys in `cf`.
  30. void ClearColumnFamily(int cf);
  31. // Requires external locking
  32. void SetPersistedSeqno(SequenceNumber seqno) {
  33. persisted_seqno_->store(
  34. std::max(persisted_seqno_->load(std::memory_order_relaxed), seqno),
  35. std::memory_order_relaxed);
  36. }
  37. // Requires external locking
  38. SequenceNumber GetPersistedSeqno() {
  39. return persisted_seqno_->load(std::memory_order_relaxed);
  40. }
  41. // Prepare a Put that will be started but not finished yet
  42. // This is useful for crash-recovery testing when the process may crash
  43. // before updating the corresponding expected value
  44. //
  45. // Requires external locking covering `key` in `cf` to prevent concurrent
  46. // write or delete to the same `key`.
  47. PendingExpectedValue PreparePut(int cf, int64_t key);
  48. // Does not requires external locking.
  49. ExpectedValue Get(int cf, int64_t key);
  50. // Prepare a Delete that will be started but not finished yet.
  51. // This is useful for crash-recovery testing when the process may crash
  52. // before updating the corresponding expected value
  53. //
  54. // Requires external locking covering `key` in `cf` to prevent concurrent
  55. // write or delete to the same `key`.
  56. PendingExpectedValue PrepareDelete(int cf, int64_t key);
  57. // Requires external locking covering `key` in `cf` to prevent concurrent
  58. // write or delete to the same `key`.
  59. PendingExpectedValue PrepareSingleDelete(int cf, int64_t key);
  60. // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
  61. // to prevent concurrent write or delete to the same `key`.
  62. std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
  63. int64_t begin_key,
  64. int64_t end_key);
  65. // Update the expected value for start of an incomplete write or delete
  66. // operation on the key assoicated with this expected value
  67. void Precommit(int cf, int64_t key, const ExpectedValue& value);
  68. // Requires external locking covering `key` in `cf` to prevent concurrent
  69. // delete to the same `key`.
  70. bool Exists(int cf, int64_t key);
  71. // Sync the `value_base` to the corresponding expected value
  72. //
  73. // Requires external locking covering `key` in `cf` or be in single thread
  74. // to prevent concurrent write or delete to the same `key`
  75. void SyncPut(int cf, int64_t key, uint32_t value_base);
  76. // Sync the corresponding expected value to be pending Put
  77. //
  78. // Requires external locking covering `key` in `cf` or be in single thread
  79. // to prevent concurrent write or delete to the same `key`
  80. void SyncPendingPut(int cf, int64_t key);
  81. // Sync the corresponding expected value to be deleted
  82. //
  83. // Requires external locking covering `key` in `cf` or be in single thread
  84. // to prevent concurrent write or delete to the same `key`
  85. void SyncDelete(int cf, int64_t key);
  86. // Sync the corresponding expected values to be deleted
  87. //
  88. // Requires external locking covering keys in `[begin_key, end_key)` in `cf`
  89. // to prevent concurrent write or delete to the same `key`
  90. void SyncDeleteRange(int cf, int64_t begin_key, int64_t end_key);
  91. private:
  92. // Does not requires external locking.
  93. std::atomic<uint32_t>& Value(int cf, int64_t key) const {
  94. return values_[cf * max_key_ + key];
  95. }
  96. // Does not requires external locking
  97. ExpectedValue Load(int cf, int64_t key) const {
  98. return ExpectedValue(Value(cf, key).load());
  99. }
  100. const size_t max_key_;
  101. const size_t num_column_families_;
  102. protected:
  103. size_t GetValuesLen() const {
  104. return sizeof(std::atomic<uint32_t>) * num_column_families_ * max_key_;
  105. }
  106. // Requires external locking preventing concurrent execution with any other
  107. // member function.
  108. void Reset();
  109. std::atomic<uint32_t>* values_;
  110. std::atomic<SequenceNumber>* persisted_seqno_;
  111. };
  112. // A `FileExpectedState` implements `ExpectedState` backed by a file.
  113. class FileExpectedState : public ExpectedState {
  114. public:
  115. explicit FileExpectedState(
  116. const std::string& expected_state_file_path,
  117. const std::string& expected_persisted_seqno_file_path, size_t max_key,
  118. size_t num_column_families);
  119. // Requires external locking preventing concurrent execution with any other
  120. // member function.
  121. Status Open(bool create) override;
  122. private:
  123. static Status CreateFile(Env* env, const EnvOptions& options,
  124. const std::string& file_path,
  125. const std::string& content) {
  126. std::unique_ptr<WritableFile> wfile;
  127. Status status = env->NewWritableFile(file_path, &wfile, options);
  128. if (status.ok()) {
  129. status = wfile->Append(content);
  130. }
  131. return status;
  132. }
  133. static Status MemoryMappedFile(
  134. Env* env, const std::string& file_path,
  135. std::unique_ptr<MemoryMappedFileBuffer>& memory_mapped_file_buffer,
  136. std::size_t size) {
  137. Status status =
  138. env->NewMemoryMappedFileBuffer(file_path, &memory_mapped_file_buffer);
  139. if (status.ok()) {
  140. assert(memory_mapped_file_buffer->GetLen() == size);
  141. }
  142. (void)size;
  143. return status;
  144. }
  145. const std::string expected_state_file_path_;
  146. const std::string expected_persisted_seqno_file_path_;
  147. std::unique_ptr<MemoryMappedFileBuffer> expected_state_mmap_buffer_;
  148. std::unique_ptr<MemoryMappedFileBuffer> expected_persisted_seqno_mmap_buffer_;
  149. };
  150. // An `AnonExpectedState` implements `ExpectedState` backed by a memory
  151. // allocation.
  152. class AnonExpectedState : public ExpectedState {
  153. public:
  154. explicit AnonExpectedState(size_t max_key, size_t num_column_families);
  155. // Requires external locking preventing concurrent execution with any other
  156. // member function.
  157. Status Open(bool create) override;
  158. private:
  159. std::unique_ptr<std::atomic<uint32_t>[]> values_allocation_;
  160. };
  161. // An `ExpectedStateManager` manages data about the expected state of the
  162. // database. It exposes operations for reading and modifying the latest
  163. // expected state.
  164. class ExpectedStateManager {
  165. public:
  166. explicit ExpectedStateManager(size_t max_key, size_t num_column_families);
  167. virtual ~ExpectedStateManager();
  168. // Requires external locking preventing concurrent execution with any other
  169. // member function.
  170. virtual Status Open() = 0;
  171. // Saves expected values for the current state of `db` and begins tracking
  172. // changes. Following a successful `SaveAtAndAfter()`, `Restore()` can be
  173. // called on the same DB, as long as its state does not roll back to before
  174. // its current state.
  175. //
  176. // Requires external locking preventing concurrent execution with any other
  177. // member function. Furthermore, `db` must not be mutated while this function
  178. // is executing.
  179. virtual Status SaveAtAndAfter(DB* db) = 0;
  180. // Returns true if at least one state of historical expected values can be
  181. // restored.
  182. //
  183. // Requires external locking preventing concurrent execution with any other
  184. // member function.
  185. virtual bool HasHistory() = 0;
  186. // Restores expected values according to the current state of `db`. See
  187. // `SaveAtAndAfter()` for conditions where this can be called.
  188. //
  189. // Requires external locking preventing concurrent execution with any other
  190. // member function. Furthermore, `db` must not be mutated while this function
  191. // is executing.
  192. virtual Status Restore(DB* db) = 0;
  193. // Requires external locking covering all keys in `cf`.
  194. void ClearColumnFamily(int cf) { return latest_->ClearColumnFamily(cf); }
  195. void SetPersistedSeqno(SequenceNumber seqno) {
  196. return latest_->SetPersistedSeqno(seqno);
  197. }
  198. SequenceNumber GetPersistedSeqno() { return latest_->GetPersistedSeqno(); }
  199. // See ExpectedState::PreparePut()
  200. PendingExpectedValue PreparePut(int cf, int64_t key) {
  201. return latest_->PreparePut(cf, key);
  202. }
  203. // See ExpectedState::Get()
  204. ExpectedValue Get(int cf, int64_t key) { return latest_->Get(cf, key); }
  205. // See ExpectedState::PrepareDelete()
  206. PendingExpectedValue PrepareDelete(int cf, int64_t key) {
  207. return latest_->PrepareDelete(cf, key);
  208. }
  209. // See ExpectedState::PrepareSingleDelete()
  210. PendingExpectedValue PrepareSingleDelete(int cf, int64_t key) {
  211. return latest_->PrepareSingleDelete(cf, key);
  212. }
  213. // See ExpectedState::PrepareDeleteRange()
  214. std::vector<PendingExpectedValue> PrepareDeleteRange(int cf,
  215. int64_t begin_key,
  216. int64_t end_key) {
  217. return latest_->PrepareDeleteRange(cf, begin_key, end_key);
  218. }
  219. // See ExpectedState::Exists()
  220. bool Exists(int cf, int64_t key) { return latest_->Exists(cf, key); }
  221. // See ExpectedState::SyncPut()
  222. void SyncPut(int cf, int64_t key, uint32_t value_base) {
  223. return latest_->SyncPut(cf, key, value_base);
  224. }
  225. // See ExpectedState::SyncPendingPut()
  226. void SyncPendingPut(int cf, int64_t key) {
  227. return latest_->SyncPendingPut(cf, key);
  228. }
  229. // See ExpectedState::SyncDelete()
  230. void SyncDelete(int cf, int64_t key) { return latest_->SyncDelete(cf, key); }
  231. // See ExpectedState::SyncDeleteRange()
  232. void SyncDeleteRange(int cf, int64_t begin_key, int64_t end_key) {
  233. return latest_->SyncDeleteRange(cf, begin_key, end_key);
  234. }
  235. protected:
  236. const size_t max_key_;
  237. const size_t num_column_families_;
  238. std::unique_ptr<ExpectedState> latest_;
  239. };
  240. // A `FileExpectedStateManager` implements an `ExpectedStateManager` backed by
  241. // a directory of files containing data about the expected state of the
  242. // database.
  243. class FileExpectedStateManager : public ExpectedStateManager {
  244. public:
  245. explicit FileExpectedStateManager(size_t max_key, size_t num_column_families,
  246. std::string expected_state_dir_path);
  247. // Requires external locking preventing concurrent execution with any other
  248. // member function.
  249. Status Open() override;
  250. // See `ExpectedStateManager::SaveAtAndAfter()` API doc.
  251. //
  252. // This implementation makes a copy of "LATEST.state" into
  253. // "<current seqno>.state", and starts a trace in "<current seqno>.trace".
  254. // Due to using external files, a following `Restore()` can happen even
  255. // from a different process.
  256. Status SaveAtAndAfter(DB* db) override;
  257. // See `ExpectedStateManager::HasHistory()` API doc.
  258. bool HasHistory() override;
  259. // See `ExpectedStateManager::Restore()` API doc.
  260. //
  261. // Say `db->GetLatestSequenceNumber()` was `a` last time `SaveAtAndAfter()`
  262. // was called and now it is `b`. Then this function replays `b - a` write
  263. // operations from "`a`.trace" onto "`a`.state", and then copies the resulting
  264. // file into "LATEST.state".
  265. Status Restore(DB* db) override;
  266. private:
  267. // Requires external locking preventing concurrent execution with any other
  268. // member function.
  269. Status Clean();
  270. std::string GetTempPathForFilename(const std::string& filename);
  271. std::string GetPathForFilename(const std::string& filename);
  272. static const std::string kLatestBasename;
  273. static const std::string kStateFilenameSuffix;
  274. static const std::string kTraceFilenameSuffix;
  275. static const std::string kPersistedSeqnoBasename;
  276. static const std::string kPersistedSeqnoFilenameSuffix;
  277. static const std::string kTempFilenamePrefix;
  278. static const std::string kTempFilenameSuffix;
  279. const std::string expected_state_dir_path_;
  280. SequenceNumber saved_seqno_ = kMaxSequenceNumber;
  281. };
  282. // An `AnonExpectedStateManager` implements an `ExpectedStateManager` backed by
  283. // a memory allocation containing data about the expected state of the database.
  284. class AnonExpectedStateManager : public ExpectedStateManager {
  285. public:
  286. explicit AnonExpectedStateManager(size_t max_key, size_t num_column_families);
  287. // See `ExpectedStateManager::SaveAtAndAfter()` API doc.
  288. //
  289. // This implementation returns `Status::NotSupported` since we do not
  290. // currently have a need to keep history of expected state within a process.
  291. Status SaveAtAndAfter(DB* /* db */) override {
  292. return Status::NotSupported();
  293. }
  294. // See `ExpectedStateManager::HasHistory()` API doc.
  295. bool HasHistory() override { return false; }
  296. // See `ExpectedStateManager::Restore()` API doc.
  297. //
  298. // This implementation returns `Status::NotSupported` since we do not
  299. // currently have a need to keep history of expected state within a process.
  300. Status Restore(DB* /* db */) override { return Status::NotSupported(); }
  301. // Requires external locking preventing concurrent execution with any other
  302. // member function.
  303. Status Open() override;
  304. };
  305. } // namespace ROCKSDB_NAMESPACE
  306. #endif // GFLAGS