transaction_util.cc 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #ifndef ROCKSDB_LITE
  6. #include "utilities/transactions/transaction_util.h"
  7. #include <cinttypes>
  8. #include <string>
  9. #include <vector>
  10. #include "db/db_impl/db_impl.h"
  11. #include "rocksdb/status.h"
  12. #include "rocksdb/utilities/write_batch_with_index.h"
  13. #include "util/string_util.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. Status TransactionUtil::CheckKeyForConflicts(
  16. DBImpl* db_impl, ColumnFamilyHandle* column_family, const std::string& key,
  17. SequenceNumber snap_seq, bool cache_only, ReadCallback* snap_checker,
  18. SequenceNumber min_uncommitted) {
  19. Status result;
  20. auto cfh = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family);
  21. auto cfd = cfh->cfd();
  22. SuperVersion* sv = db_impl->GetAndRefSuperVersion(cfd);
  23. if (sv == nullptr) {
  24. result = Status::InvalidArgument("Could not access column family " +
  25. cfh->GetName());
  26. }
  27. if (result.ok()) {
  28. SequenceNumber earliest_seq =
  29. db_impl->GetEarliestMemTableSequenceNumber(sv, true);
  30. result = CheckKey(db_impl, sv, earliest_seq, snap_seq, key, cache_only,
  31. snap_checker, min_uncommitted);
  32. db_impl->ReturnAndCleanupSuperVersion(cfd, sv);
  33. }
  34. return result;
  35. }
  36. Status TransactionUtil::CheckKey(DBImpl* db_impl, SuperVersion* sv,
  37. SequenceNumber earliest_seq,
  38. SequenceNumber snap_seq,
  39. const std::string& key, bool cache_only,
  40. ReadCallback* snap_checker,
  41. SequenceNumber min_uncommitted) {
  42. // When `min_uncommitted` is provided, keys are not always committed
  43. // in sequence number order, and `snap_checker` is used to check whether
  44. // specific sequence number is in the database is visible to the transaction.
  45. // So `snap_checker` must be provided.
  46. assert(min_uncommitted == kMaxSequenceNumber || snap_checker != nullptr);
  47. Status result;
  48. bool need_to_read_sst = false;
  49. // Since it would be too slow to check the SST files, we will only use
  50. // the memtables to check whether there have been any recent writes
  51. // to this key after it was accessed in this transaction. But if the
  52. // Memtables do not contain a long enough history, we must fail the
  53. // transaction.
  54. if (earliest_seq == kMaxSequenceNumber) {
  55. // The age of this memtable is unknown. Cannot rely on it to check
  56. // for recent writes. This error shouldn't happen often in practice as
  57. // the Memtable should have a valid earliest sequence number except in some
  58. // corner cases (such as error cases during recovery).
  59. need_to_read_sst = true;
  60. if (cache_only) {
  61. result = Status::TryAgain(
  62. "Transaction could not check for conflicts as the MemTable does not "
  63. "contain a long enough history to check write at SequenceNumber: ",
  64. ToString(snap_seq));
  65. }
  66. } else if (snap_seq < earliest_seq || min_uncommitted <= earliest_seq) {
  67. // Use <= for min_uncommitted since earliest_seq is actually the largest sec
  68. // before this memtable was created
  69. need_to_read_sst = true;
  70. if (cache_only) {
  71. // The age of this memtable is too new to use to check for recent
  72. // writes.
  73. char msg[300];
  74. snprintf(msg, sizeof(msg),
  75. "Transaction could not check for conflicts for operation at "
  76. "SequenceNumber %" PRIu64
  77. " as the MemTable only contains changes newer than "
  78. "SequenceNumber %" PRIu64
  79. ". Increasing the value of the "
  80. "max_write_buffer_size_to_maintain option could reduce the "
  81. "frequency "
  82. "of this error.",
  83. snap_seq, earliest_seq);
  84. result = Status::TryAgain(msg);
  85. }
  86. }
  87. if (result.ok()) {
  88. SequenceNumber seq = kMaxSequenceNumber;
  89. bool found_record_for_key = false;
  90. // When min_uncommitted == kMaxSequenceNumber, writes are committed in
  91. // sequence number order, so only keys larger than `snap_seq` can cause
  92. // conflict.
  93. // When min_uncommitted != kMaxSequenceNumber, keys lower than
  94. // min_uncommitted will not triggered conflicts, while keys larger than
  95. // min_uncommitted might create conflicts, so we need to read them out
  96. // from the DB, and call callback to snap_checker to determine. So only
  97. // keys lower than min_uncommitted can be skipped.
  98. SequenceNumber lower_bound_seq =
  99. (min_uncommitted == kMaxSequenceNumber) ? snap_seq : min_uncommitted;
  100. Status s = db_impl->GetLatestSequenceForKey(sv, key, !need_to_read_sst,
  101. lower_bound_seq, &seq,
  102. &found_record_for_key);
  103. if (!(s.ok() || s.IsNotFound() || s.IsMergeInProgress())) {
  104. result = s;
  105. } else if (found_record_for_key) {
  106. bool write_conflict = snap_checker == nullptr
  107. ? snap_seq < seq
  108. : !snap_checker->IsVisible(seq);
  109. if (write_conflict) {
  110. result = Status::Busy();
  111. }
  112. }
  113. }
  114. return result;
  115. }
  116. Status TransactionUtil::CheckKeysForConflicts(DBImpl* db_impl,
  117. const TransactionKeyMap& key_map,
  118. bool cache_only) {
  119. Status result;
  120. for (auto& key_map_iter : key_map) {
  121. uint32_t cf_id = key_map_iter.first;
  122. const auto& keys = key_map_iter.second;
  123. SuperVersion* sv = db_impl->GetAndRefSuperVersion(cf_id);
  124. if (sv == nullptr) {
  125. result = Status::InvalidArgument("Could not access column family " +
  126. ToString(cf_id));
  127. break;
  128. }
  129. SequenceNumber earliest_seq =
  130. db_impl->GetEarliestMemTableSequenceNumber(sv, true);
  131. // For each of the keys in this transaction, check to see if someone has
  132. // written to this key since the start of the transaction.
  133. for (const auto& key_iter : keys) {
  134. const auto& key = key_iter.first;
  135. const SequenceNumber key_seq = key_iter.second.seq;
  136. result = CheckKey(db_impl, sv, earliest_seq, key_seq, key, cache_only);
  137. if (!result.ok()) {
  138. break;
  139. }
  140. }
  141. db_impl->ReturnAndCleanupSuperVersion(cf_id, sv);
  142. if (!result.ok()) {
  143. break;
  144. }
  145. }
  146. return result;
  147. }
  148. } // namespace ROCKSDB_NAMESPACE
  149. #endif // ROCKSDB_LITE