transaction_base.cc 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #ifndef ROCKSDB_LITE
  6. #include "utilities/transactions/transaction_base.h"
  7. #include <cinttypes>
  8. #include "db/column_family.h"
  9. #include "db/db_impl/db_impl.h"
  10. #include "rocksdb/comparator.h"
  11. #include "rocksdb/db.h"
  12. #include "rocksdb/status.h"
  13. #include "util/cast_util.h"
  14. #include "util/string_util.h"
  15. namespace ROCKSDB_NAMESPACE {
  16. TransactionBaseImpl::TransactionBaseImpl(DB* db,
  17. const WriteOptions& write_options)
  18. : db_(db),
  19. dbimpl_(static_cast_with_check<DBImpl, DB>(db)),
  20. write_options_(write_options),
  21. cmp_(GetColumnFamilyUserComparator(db->DefaultColumnFamily())),
  22. start_time_(db_->GetEnv()->NowMicros()),
  23. write_batch_(cmp_, 0, true, 0),
  24. indexing_enabled_(true) {
  25. assert(dynamic_cast<DBImpl*>(db_) != nullptr);
  26. log_number_ = 0;
  27. if (dbimpl_->allow_2pc()) {
  28. InitWriteBatch();
  29. }
  30. }
  31. TransactionBaseImpl::~TransactionBaseImpl() {
  32. // Release snapshot if snapshot is set
  33. SetSnapshotInternal(nullptr);
  34. }
  35. void TransactionBaseImpl::Clear() {
  36. save_points_.reset(nullptr);
  37. write_batch_.Clear();
  38. commit_time_batch_.Clear();
  39. tracked_keys_.clear();
  40. num_puts_ = 0;
  41. num_deletes_ = 0;
  42. num_merges_ = 0;
  43. if (dbimpl_->allow_2pc()) {
  44. InitWriteBatch();
  45. }
  46. }
  47. void TransactionBaseImpl::Reinitialize(DB* db,
  48. const WriteOptions& write_options) {
  49. Clear();
  50. ClearSnapshot();
  51. id_ = 0;
  52. db_ = db;
  53. name_.clear();
  54. log_number_ = 0;
  55. write_options_ = write_options;
  56. start_time_ = db_->GetEnv()->NowMicros();
  57. indexing_enabled_ = true;
  58. cmp_ = GetColumnFamilyUserComparator(db_->DefaultColumnFamily());
  59. }
  60. void TransactionBaseImpl::SetSnapshot() {
  61. const Snapshot* snapshot = dbimpl_->GetSnapshotForWriteConflictBoundary();
  62. SetSnapshotInternal(snapshot);
  63. }
  64. void TransactionBaseImpl::SetSnapshotInternal(const Snapshot* snapshot) {
  65. // Set a custom deleter for the snapshot_ SharedPtr as the snapshot needs to
  66. // be released, not deleted when it is no longer referenced.
  67. snapshot_.reset(snapshot, std::bind(&TransactionBaseImpl::ReleaseSnapshot,
  68. this, std::placeholders::_1, db_));
  69. snapshot_needed_ = false;
  70. snapshot_notifier_ = nullptr;
  71. }
  72. void TransactionBaseImpl::SetSnapshotOnNextOperation(
  73. std::shared_ptr<TransactionNotifier> notifier) {
  74. snapshot_needed_ = true;
  75. snapshot_notifier_ = notifier;
  76. }
  77. void TransactionBaseImpl::SetSnapshotIfNeeded() {
  78. if (snapshot_needed_) {
  79. std::shared_ptr<TransactionNotifier> notifier = snapshot_notifier_;
  80. SetSnapshot();
  81. if (notifier != nullptr) {
  82. notifier->SnapshotCreated(GetSnapshot());
  83. }
  84. }
  85. }
  86. Status TransactionBaseImpl::TryLock(ColumnFamilyHandle* column_family,
  87. const SliceParts& key, bool read_only,
  88. bool exclusive, const bool do_validate,
  89. const bool assume_tracked) {
  90. size_t key_size = 0;
  91. for (int i = 0; i < key.num_parts; ++i) {
  92. key_size += key.parts[i].size();
  93. }
  94. std::string str;
  95. str.reserve(key_size);
  96. for (int i = 0; i < key.num_parts; ++i) {
  97. str.append(key.parts[i].data(), key.parts[i].size());
  98. }
  99. return TryLock(column_family, str, read_only, exclusive, do_validate,
  100. assume_tracked);
  101. }
  102. void TransactionBaseImpl::SetSavePoint() {
  103. if (save_points_ == nullptr) {
  104. save_points_.reset(new std::stack<TransactionBaseImpl::SavePoint, autovector<TransactionBaseImpl::SavePoint>>());
  105. }
  106. save_points_->emplace(snapshot_, snapshot_needed_, snapshot_notifier_,
  107. num_puts_, num_deletes_, num_merges_);
  108. write_batch_.SetSavePoint();
  109. }
  110. Status TransactionBaseImpl::RollbackToSavePoint() {
  111. if (save_points_ != nullptr && save_points_->size() > 0) {
  112. // Restore saved SavePoint
  113. TransactionBaseImpl::SavePoint& save_point = save_points_->top();
  114. snapshot_ = save_point.snapshot_;
  115. snapshot_needed_ = save_point.snapshot_needed_;
  116. snapshot_notifier_ = save_point.snapshot_notifier_;
  117. num_puts_ = save_point.num_puts_;
  118. num_deletes_ = save_point.num_deletes_;
  119. num_merges_ = save_point.num_merges_;
  120. // Rollback batch
  121. Status s = write_batch_.RollbackToSavePoint();
  122. assert(s.ok());
  123. // Rollback any keys that were tracked since the last savepoint
  124. const TransactionKeyMap& key_map = save_point.new_keys_;
  125. for (const auto& key_map_iter : key_map) {
  126. uint32_t column_family_id = key_map_iter.first;
  127. auto& keys = key_map_iter.second;
  128. auto& cf_tracked_keys = tracked_keys_[column_family_id];
  129. for (const auto& key_iter : keys) {
  130. const std::string& key = key_iter.first;
  131. uint32_t num_reads = key_iter.second.num_reads;
  132. uint32_t num_writes = key_iter.second.num_writes;
  133. auto tracked_keys_iter = cf_tracked_keys.find(key);
  134. assert(tracked_keys_iter != cf_tracked_keys.end());
  135. // Decrement the total reads/writes of this key by the number of
  136. // reads/writes done since the last SavePoint.
  137. if (num_reads > 0) {
  138. assert(tracked_keys_iter->second.num_reads >= num_reads);
  139. tracked_keys_iter->second.num_reads -= num_reads;
  140. }
  141. if (num_writes > 0) {
  142. assert(tracked_keys_iter->second.num_writes >= num_writes);
  143. tracked_keys_iter->second.num_writes -= num_writes;
  144. }
  145. if (tracked_keys_iter->second.num_reads == 0 &&
  146. tracked_keys_iter->second.num_writes == 0) {
  147. cf_tracked_keys.erase(tracked_keys_iter);
  148. }
  149. }
  150. }
  151. save_points_->pop();
  152. return s;
  153. } else {
  154. assert(write_batch_.RollbackToSavePoint().IsNotFound());
  155. return Status::NotFound();
  156. }
  157. }
  158. Status TransactionBaseImpl::PopSavePoint() {
  159. if (save_points_ == nullptr ||
  160. save_points_->empty()) {
  161. // No SavePoint yet.
  162. assert(write_batch_.PopSavePoint().IsNotFound());
  163. return Status::NotFound();
  164. }
  165. assert(!save_points_->empty());
  166. // If there is another savepoint A below the current savepoint B, then A needs
  167. // to inherit tracked_keys in B so that if we rollback to savepoint A, we
  168. // remember to unlock keys in B. If there is no other savepoint below, then we
  169. // can safely discard savepoint info.
  170. if (save_points_->size() == 1) {
  171. save_points_->pop();
  172. } else {
  173. TransactionBaseImpl::SavePoint top;
  174. std::swap(top, save_points_->top());
  175. save_points_->pop();
  176. const TransactionKeyMap& curr_cf_key_map = top.new_keys_;
  177. TransactionKeyMap& prev_cf_key_map = save_points_->top().new_keys_;
  178. for (const auto& curr_cf_key_iter : curr_cf_key_map) {
  179. uint32_t column_family_id = curr_cf_key_iter.first;
  180. const std::unordered_map<std::string, TransactionKeyMapInfo>& curr_keys =
  181. curr_cf_key_iter.second;
  182. // If cfid was not previously tracked, just copy everything over.
  183. auto prev_keys_iter = prev_cf_key_map.find(column_family_id);
  184. if (prev_keys_iter == prev_cf_key_map.end()) {
  185. prev_cf_key_map.emplace(curr_cf_key_iter);
  186. } else {
  187. std::unordered_map<std::string, TransactionKeyMapInfo>& prev_keys =
  188. prev_keys_iter->second;
  189. for (const auto& key_iter : curr_keys) {
  190. const std::string& key = key_iter.first;
  191. const TransactionKeyMapInfo& info = key_iter.second;
  192. // If key was not previously tracked, just copy the whole struct over.
  193. // Otherwise, some merging needs to occur.
  194. auto prev_info = prev_keys.find(key);
  195. if (prev_info == prev_keys.end()) {
  196. prev_keys.emplace(key_iter);
  197. } else {
  198. prev_info->second.Merge(info);
  199. }
  200. }
  201. }
  202. }
  203. }
  204. return write_batch_.PopSavePoint();
  205. }
  206. Status TransactionBaseImpl::Get(const ReadOptions& read_options,
  207. ColumnFamilyHandle* column_family,
  208. const Slice& key, std::string* value) {
  209. assert(value != nullptr);
  210. PinnableSlice pinnable_val(value);
  211. assert(!pinnable_val.IsPinned());
  212. auto s = Get(read_options, column_family, key, &pinnable_val);
  213. if (s.ok() && pinnable_val.IsPinned()) {
  214. value->assign(pinnable_val.data(), pinnable_val.size());
  215. } // else value is already assigned
  216. return s;
  217. }
  218. Status TransactionBaseImpl::Get(const ReadOptions& read_options,
  219. ColumnFamilyHandle* column_family,
  220. const Slice& key, PinnableSlice* pinnable_val) {
  221. return write_batch_.GetFromBatchAndDB(db_, read_options, column_family, key,
  222. pinnable_val);
  223. }
  224. Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options,
  225. ColumnFamilyHandle* column_family,
  226. const Slice& key, std::string* value,
  227. bool exclusive,
  228. const bool do_validate) {
  229. if (!do_validate && read_options.snapshot != nullptr) {
  230. return Status::InvalidArgument(
  231. "If do_validate is false then GetForUpdate with snapshot is not "
  232. "defined.");
  233. }
  234. Status s =
  235. TryLock(column_family, key, true /* read_only */, exclusive, do_validate);
  236. if (s.ok() && value != nullptr) {
  237. assert(value != nullptr);
  238. PinnableSlice pinnable_val(value);
  239. assert(!pinnable_val.IsPinned());
  240. s = Get(read_options, column_family, key, &pinnable_val);
  241. if (s.ok() && pinnable_val.IsPinned()) {
  242. value->assign(pinnable_val.data(), pinnable_val.size());
  243. } // else value is already assigned
  244. }
  245. return s;
  246. }
  247. Status TransactionBaseImpl::GetForUpdate(const ReadOptions& read_options,
  248. ColumnFamilyHandle* column_family,
  249. const Slice& key,
  250. PinnableSlice* pinnable_val,
  251. bool exclusive,
  252. const bool do_validate) {
  253. if (!do_validate && read_options.snapshot != nullptr) {
  254. return Status::InvalidArgument(
  255. "If do_validate is false then GetForUpdate with snapshot is not "
  256. "defined.");
  257. }
  258. Status s =
  259. TryLock(column_family, key, true /* read_only */, exclusive, do_validate);
  260. if (s.ok() && pinnable_val != nullptr) {
  261. s = Get(read_options, column_family, key, pinnable_val);
  262. }
  263. return s;
  264. }
  265. std::vector<Status> TransactionBaseImpl::MultiGet(
  266. const ReadOptions& read_options,
  267. const std::vector<ColumnFamilyHandle*>& column_family,
  268. const std::vector<Slice>& keys, std::vector<std::string>* values) {
  269. size_t num_keys = keys.size();
  270. values->resize(num_keys);
  271. std::vector<Status> stat_list(num_keys);
  272. for (size_t i = 0; i < num_keys; ++i) {
  273. std::string* value = values ? &(*values)[i] : nullptr;
  274. stat_list[i] = Get(read_options, column_family[i], keys[i], value);
  275. }
  276. return stat_list;
  277. }
  278. void TransactionBaseImpl::MultiGet(const ReadOptions& read_options,
  279. ColumnFamilyHandle* column_family,
  280. const size_t num_keys, const Slice* keys,
  281. PinnableSlice* values, Status* statuses,
  282. const bool sorted_input) {
  283. write_batch_.MultiGetFromBatchAndDB(db_, read_options, column_family,
  284. num_keys, keys, values, statuses,
  285. sorted_input);
  286. }
  287. std::vector<Status> TransactionBaseImpl::MultiGetForUpdate(
  288. const ReadOptions& read_options,
  289. const std::vector<ColumnFamilyHandle*>& column_family,
  290. const std::vector<Slice>& keys, std::vector<std::string>* values) {
  291. // Regardless of whether the MultiGet succeeded, track these keys.
  292. size_t num_keys = keys.size();
  293. values->resize(num_keys);
  294. // Lock all keys
  295. for (size_t i = 0; i < num_keys; ++i) {
  296. Status s = TryLock(column_family[i], keys[i], true /* read_only */,
  297. true /* exclusive */);
  298. if (!s.ok()) {
  299. // Fail entire multiget if we cannot lock all keys
  300. return std::vector<Status>(num_keys, s);
  301. }
  302. }
  303. // TODO(agiardullo): optimize multiget?
  304. std::vector<Status> stat_list(num_keys);
  305. for (size_t i = 0; i < num_keys; ++i) {
  306. std::string* value = values ? &(*values)[i] : nullptr;
  307. stat_list[i] = Get(read_options, column_family[i], keys[i], value);
  308. }
  309. return stat_list;
  310. }
  311. Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options) {
  312. Iterator* db_iter = db_->NewIterator(read_options);
  313. assert(db_iter);
  314. return write_batch_.NewIteratorWithBase(db_iter);
  315. }
  316. Iterator* TransactionBaseImpl::GetIterator(const ReadOptions& read_options,
  317. ColumnFamilyHandle* column_family) {
  318. Iterator* db_iter = db_->NewIterator(read_options, column_family);
  319. assert(db_iter);
  320. return write_batch_.NewIteratorWithBase(column_family, db_iter,
  321. &read_options);
  322. }
  323. Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
  324. const Slice& key, const Slice& value,
  325. const bool assume_tracked) {
  326. const bool do_validate = !assume_tracked;
  327. Status s = TryLock(column_family, key, false /* read_only */,
  328. true /* exclusive */, do_validate, assume_tracked);
  329. if (s.ok()) {
  330. s = GetBatchForWrite()->Put(column_family, key, value);
  331. if (s.ok()) {
  332. num_puts_++;
  333. }
  334. }
  335. return s;
  336. }
  337. Status TransactionBaseImpl::Put(ColumnFamilyHandle* column_family,
  338. const SliceParts& key, const SliceParts& value,
  339. const bool assume_tracked) {
  340. const bool do_validate = !assume_tracked;
  341. Status s = TryLock(column_family, key, false /* read_only */,
  342. true /* exclusive */, do_validate, assume_tracked);
  343. if (s.ok()) {
  344. s = GetBatchForWrite()->Put(column_family, key, value);
  345. if (s.ok()) {
  346. num_puts_++;
  347. }
  348. }
  349. return s;
  350. }
  351. Status TransactionBaseImpl::Merge(ColumnFamilyHandle* column_family,
  352. const Slice& key, const Slice& value,
  353. const bool assume_tracked) {
  354. const bool do_validate = !assume_tracked;
  355. Status s = TryLock(column_family, key, false /* read_only */,
  356. true /* exclusive */, do_validate, assume_tracked);
  357. if (s.ok()) {
  358. s = GetBatchForWrite()->Merge(column_family, key, value);
  359. if (s.ok()) {
  360. num_merges_++;
  361. }
  362. }
  363. return s;
  364. }
  365. Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family,
  366. const Slice& key,
  367. const bool assume_tracked) {
  368. const bool do_validate = !assume_tracked;
  369. Status s = TryLock(column_family, key, false /* read_only */,
  370. true /* exclusive */, do_validate, assume_tracked);
  371. if (s.ok()) {
  372. s = GetBatchForWrite()->Delete(column_family, key);
  373. if (s.ok()) {
  374. num_deletes_++;
  375. }
  376. }
  377. return s;
  378. }
  379. Status TransactionBaseImpl::Delete(ColumnFamilyHandle* column_family,
  380. const SliceParts& key,
  381. const bool assume_tracked) {
  382. const bool do_validate = !assume_tracked;
  383. Status s = TryLock(column_family, key, false /* read_only */,
  384. true /* exclusive */, do_validate, assume_tracked);
  385. if (s.ok()) {
  386. s = GetBatchForWrite()->Delete(column_family, key);
  387. if (s.ok()) {
  388. num_deletes_++;
  389. }
  390. }
  391. return s;
  392. }
  393. Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family,
  394. const Slice& key,
  395. const bool assume_tracked) {
  396. const bool do_validate = !assume_tracked;
  397. Status s = TryLock(column_family, key, false /* read_only */,
  398. true /* exclusive */, do_validate, assume_tracked);
  399. if (s.ok()) {
  400. s = GetBatchForWrite()->SingleDelete(column_family, key);
  401. if (s.ok()) {
  402. num_deletes_++;
  403. }
  404. }
  405. return s;
  406. }
  407. Status TransactionBaseImpl::SingleDelete(ColumnFamilyHandle* column_family,
  408. const SliceParts& key,
  409. const bool assume_tracked) {
  410. const bool do_validate = !assume_tracked;
  411. Status s = TryLock(column_family, key, false /* read_only */,
  412. true /* exclusive */, do_validate, assume_tracked);
  413. if (s.ok()) {
  414. s = GetBatchForWrite()->SingleDelete(column_family, key);
  415. if (s.ok()) {
  416. num_deletes_++;
  417. }
  418. }
  419. return s;
  420. }
  421. Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family,
  422. const Slice& key, const Slice& value) {
  423. Status s = TryLock(column_family, key, false /* read_only */,
  424. true /* exclusive */, false /* do_validate */);
  425. if (s.ok()) {
  426. s = GetBatchForWrite()->Put(column_family, key, value);
  427. if (s.ok()) {
  428. num_puts_++;
  429. }
  430. }
  431. return s;
  432. }
  433. Status TransactionBaseImpl::PutUntracked(ColumnFamilyHandle* column_family,
  434. const SliceParts& key,
  435. const SliceParts& value) {
  436. Status s = TryLock(column_family, key, false /* read_only */,
  437. true /* exclusive */, false /* do_validate */);
  438. if (s.ok()) {
  439. s = GetBatchForWrite()->Put(column_family, key, value);
  440. if (s.ok()) {
  441. num_puts_++;
  442. }
  443. }
  444. return s;
  445. }
  446. Status TransactionBaseImpl::MergeUntracked(ColumnFamilyHandle* column_family,
  447. const Slice& key,
  448. const Slice& value) {
  449. Status s = TryLock(column_family, key, false /* read_only */,
  450. true /* exclusive */, false /* do_validate */);
  451. if (s.ok()) {
  452. s = GetBatchForWrite()->Merge(column_family, key, value);
  453. if (s.ok()) {
  454. num_merges_++;
  455. }
  456. }
  457. return s;
  458. }
  459. Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family,
  460. const Slice& key) {
  461. Status s = TryLock(column_family, key, false /* read_only */,
  462. true /* exclusive */, false /* do_validate */);
  463. if (s.ok()) {
  464. s = GetBatchForWrite()->Delete(column_family, key);
  465. if (s.ok()) {
  466. num_deletes_++;
  467. }
  468. }
  469. return s;
  470. }
  471. Status TransactionBaseImpl::DeleteUntracked(ColumnFamilyHandle* column_family,
  472. const SliceParts& key) {
  473. Status s = TryLock(column_family, key, false /* read_only */,
  474. true /* exclusive */, false /* do_validate */);
  475. if (s.ok()) {
  476. s = GetBatchForWrite()->Delete(column_family, key);
  477. if (s.ok()) {
  478. num_deletes_++;
  479. }
  480. }
  481. return s;
  482. }
  483. Status TransactionBaseImpl::SingleDeleteUntracked(
  484. ColumnFamilyHandle* column_family, const Slice& key) {
  485. Status s = TryLock(column_family, key, false /* read_only */,
  486. true /* exclusive */, false /* do_validate */);
  487. if (s.ok()) {
  488. s = GetBatchForWrite()->SingleDelete(column_family, key);
  489. if (s.ok()) {
  490. num_deletes_++;
  491. }
  492. }
  493. return s;
  494. }
  495. void TransactionBaseImpl::PutLogData(const Slice& blob) {
  496. write_batch_.PutLogData(blob);
  497. }
  498. WriteBatchWithIndex* TransactionBaseImpl::GetWriteBatch() {
  499. return &write_batch_;
  500. }
  501. uint64_t TransactionBaseImpl::GetElapsedTime() const {
  502. return (db_->GetEnv()->NowMicros() - start_time_) / 1000;
  503. }
  504. uint64_t TransactionBaseImpl::GetNumPuts() const { return num_puts_; }
  505. uint64_t TransactionBaseImpl::GetNumDeletes() const { return num_deletes_; }
  506. uint64_t TransactionBaseImpl::GetNumMerges() const { return num_merges_; }
  507. uint64_t TransactionBaseImpl::GetNumKeys() const {
  508. uint64_t count = 0;
  509. // sum up locked keys in all column families
  510. for (const auto& key_map_iter : tracked_keys_) {
  511. const auto& keys = key_map_iter.second;
  512. count += keys.size();
  513. }
  514. return count;
  515. }
  516. void TransactionBaseImpl::TrackKey(uint32_t cfh_id, const std::string& key,
  517. SequenceNumber seq, bool read_only,
  518. bool exclusive) {
  519. // Update map of all tracked keys for this transaction
  520. TrackKey(&tracked_keys_, cfh_id, key, seq, read_only, exclusive);
  521. if (save_points_ != nullptr && !save_points_->empty()) {
  522. // Update map of tracked keys in this SavePoint
  523. TrackKey(&save_points_->top().new_keys_, cfh_id, key, seq, read_only,
  524. exclusive);
  525. }
  526. }
  527. // Add a key to the given TransactionKeyMap
  528. // seq for pessimistic transactions is the sequence number from which we know
  529. // there has not been a concurrent update to the key.
  530. void TransactionBaseImpl::TrackKey(TransactionKeyMap* key_map, uint32_t cfh_id,
  531. const std::string& key, SequenceNumber seq,
  532. bool read_only, bool exclusive) {
  533. auto& cf_key_map = (*key_map)[cfh_id];
  534. #ifdef __cpp_lib_unordered_map_try_emplace
  535. // use c++17's try_emplace if available, to avoid rehashing the key
  536. // in case it is not already in the map
  537. auto result = cf_key_map.try_emplace(key, seq);
  538. auto iter = result.first;
  539. if (!result.second && seq < iter->second.seq) {
  540. // Now tracking this key with an earlier sequence number
  541. iter->second.seq = seq;
  542. }
  543. #else
  544. auto iter = cf_key_map.find(key);
  545. if (iter == cf_key_map.end()) {
  546. auto result = cf_key_map.emplace(key, TransactionKeyMapInfo(seq));
  547. iter = result.first;
  548. } else if (seq < iter->second.seq) {
  549. // Now tracking this key with an earlier sequence number
  550. iter->second.seq = seq;
  551. }
  552. #endif
  553. // else we do not update the seq. The smaller the tracked seq, the stronger it
  554. // the guarantee since it implies from the seq onward there has not been a
  555. // concurrent update to the key. So we update the seq if it implies stronger
  556. // guarantees, i.e., if it is smaller than the existing tracked seq.
  557. if (read_only) {
  558. iter->second.num_reads++;
  559. } else {
  560. iter->second.num_writes++;
  561. }
  562. iter->second.exclusive |= exclusive;
  563. }
  564. std::unique_ptr<TransactionKeyMap>
  565. TransactionBaseImpl::GetTrackedKeysSinceSavePoint() {
  566. if (save_points_ != nullptr && !save_points_->empty()) {
  567. // Examine the number of reads/writes performed on all keys written
  568. // since the last SavePoint and compare to the total number of reads/writes
  569. // for each key.
  570. TransactionKeyMap* result = new TransactionKeyMap();
  571. for (const auto& key_map_iter : save_points_->top().new_keys_) {
  572. uint32_t column_family_id = key_map_iter.first;
  573. auto& keys = key_map_iter.second;
  574. auto& cf_tracked_keys = tracked_keys_[column_family_id];
  575. for (const auto& key_iter : keys) {
  576. const std::string& key = key_iter.first;
  577. uint32_t num_reads = key_iter.second.num_reads;
  578. uint32_t num_writes = key_iter.second.num_writes;
  579. auto total_key_info = cf_tracked_keys.find(key);
  580. assert(total_key_info != cf_tracked_keys.end());
  581. assert(total_key_info->second.num_reads >= num_reads);
  582. assert(total_key_info->second.num_writes >= num_writes);
  583. if (total_key_info->second.num_reads == num_reads &&
  584. total_key_info->second.num_writes == num_writes) {
  585. // All the reads/writes to this key were done in the last savepoint.
  586. bool read_only = (num_writes == 0);
  587. TrackKey(result, column_family_id, key, key_iter.second.seq,
  588. read_only, key_iter.second.exclusive);
  589. }
  590. }
  591. }
  592. return std::unique_ptr<TransactionKeyMap>(result);
  593. }
  594. // No SavePoint
  595. return nullptr;
  596. }
  597. // Gets the write batch that should be used for Put/Merge/Deletes.
  598. //
  599. // Returns either a WriteBatch or WriteBatchWithIndex depending on whether
  600. // DisableIndexing() has been called.
  601. WriteBatchBase* TransactionBaseImpl::GetBatchForWrite() {
  602. if (indexing_enabled_) {
  603. // Use WriteBatchWithIndex
  604. return &write_batch_;
  605. } else {
  606. // Don't use WriteBatchWithIndex. Return base WriteBatch.
  607. return write_batch_.GetWriteBatch();
  608. }
  609. }
  610. void TransactionBaseImpl::ReleaseSnapshot(const Snapshot* snapshot, DB* db) {
  611. if (snapshot != nullptr) {
  612. ROCKS_LOG_DETAILS(dbimpl_->immutable_db_options().info_log,
  613. "ReleaseSnapshot %" PRIu64 " Set",
  614. snapshot->GetSequenceNumber());
  615. db->ReleaseSnapshot(snapshot);
  616. }
  617. }
  618. void TransactionBaseImpl::UndoGetForUpdate(ColumnFamilyHandle* column_family,
  619. const Slice& key) {
  620. uint32_t column_family_id = GetColumnFamilyID(column_family);
  621. auto& cf_tracked_keys = tracked_keys_[column_family_id];
  622. std::string key_str = key.ToString();
  623. bool can_decrement = false;
  624. bool can_unlock __attribute__((__unused__)) = false;
  625. if (save_points_ != nullptr && !save_points_->empty()) {
  626. // Check if this key was fetched ForUpdate in this SavePoint
  627. auto& cf_savepoint_keys = save_points_->top().new_keys_[column_family_id];
  628. auto savepoint_iter = cf_savepoint_keys.find(key_str);
  629. if (savepoint_iter != cf_savepoint_keys.end()) {
  630. if (savepoint_iter->second.num_reads > 0) {
  631. savepoint_iter->second.num_reads--;
  632. can_decrement = true;
  633. if (savepoint_iter->second.num_reads == 0 &&
  634. savepoint_iter->second.num_writes == 0) {
  635. // No other GetForUpdates or write on this key in this SavePoint
  636. cf_savepoint_keys.erase(savepoint_iter);
  637. can_unlock = true;
  638. }
  639. }
  640. }
  641. } else {
  642. // No SavePoint set
  643. can_decrement = true;
  644. can_unlock = true;
  645. }
  646. // We can only decrement the read count for this key if we were able to
  647. // decrement the read count in the current SavePoint, OR if there is no
  648. // SavePoint set.
  649. if (can_decrement) {
  650. auto key_iter = cf_tracked_keys.find(key_str);
  651. if (key_iter != cf_tracked_keys.end()) {
  652. if (key_iter->second.num_reads > 0) {
  653. key_iter->second.num_reads--;
  654. if (key_iter->second.num_reads == 0 &&
  655. key_iter->second.num_writes == 0) {
  656. // No other GetForUpdates or writes on this key
  657. assert(can_unlock);
  658. cf_tracked_keys.erase(key_iter);
  659. UnlockGetForUpdate(column_family, key);
  660. }
  661. }
  662. }
  663. }
  664. }
  665. Status TransactionBaseImpl::RebuildFromWriteBatch(WriteBatch* src_batch) {
  666. struct IndexedWriteBatchBuilder : public WriteBatch::Handler {
  667. Transaction* txn_;
  668. DBImpl* db_;
  669. IndexedWriteBatchBuilder(Transaction* txn, DBImpl* db)
  670. : txn_(txn), db_(db) {
  671. assert(dynamic_cast<TransactionBaseImpl*>(txn_) != nullptr);
  672. }
  673. Status PutCF(uint32_t cf, const Slice& key, const Slice& val) override {
  674. return txn_->Put(db_->GetColumnFamilyHandle(cf), key, val);
  675. }
  676. Status DeleteCF(uint32_t cf, const Slice& key) override {
  677. return txn_->Delete(db_->GetColumnFamilyHandle(cf), key);
  678. }
  679. Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
  680. return txn_->SingleDelete(db_->GetColumnFamilyHandle(cf), key);
  681. }
  682. Status MergeCF(uint32_t cf, const Slice& key, const Slice& val) override {
  683. return txn_->Merge(db_->GetColumnFamilyHandle(cf), key, val);
  684. }
  685. // this is used for reconstructing prepared transactions upon
  686. // recovery. there should not be any meta markers in the batches
  687. // we are processing.
  688. Status MarkBeginPrepare(bool) override { return Status::InvalidArgument(); }
  689. Status MarkEndPrepare(const Slice&) override {
  690. return Status::InvalidArgument();
  691. }
  692. Status MarkCommit(const Slice&) override {
  693. return Status::InvalidArgument();
  694. }
  695. Status MarkRollback(const Slice&) override {
  696. return Status::InvalidArgument();
  697. }
  698. };
  699. IndexedWriteBatchBuilder copycat(this, dbimpl_);
  700. return src_batch->Iterate(&copycat);
  701. }
  702. WriteBatch* TransactionBaseImpl::GetCommitTimeWriteBatch() {
  703. return &commit_time_batch_;
  704. }
  705. } // namespace ROCKSDB_NAMESPACE
  706. #endif // ROCKSDB_LITE