dbformat.h 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #pragma once
  10. #include <stdio.h>
  11. #include <memory>
  12. #include <string>
  13. #include <utility>
  14. #include "db/lookup_key.h"
  15. #include "db/merge_context.h"
  16. #include "logging/logging.h"
  17. #include "monitoring/perf_context_imp.h"
  18. #include "rocksdb/comparator.h"
  19. #include "rocksdb/db.h"
  20. #include "rocksdb/filter_policy.h"
  21. #include "rocksdb/slice.h"
  22. #include "rocksdb/slice_transform.h"
  23. #include "rocksdb/table.h"
  24. #include "rocksdb/types.h"
  25. #include "util/coding.h"
  26. #include "util/user_comparator_wrapper.h"
  27. namespace ROCKSDB_NAMESPACE {
  28. // The file declares data structures and functions that deal with internal
  29. // keys.
  30. // Each internal key contains a user key, a sequence number (SequenceNumber)
  31. // and a type (ValueType), and they are usually encoded together.
  32. // There are some related helper classes here.
  33. class InternalKey;
  34. // Value types encoded as the last component of internal keys.
  35. // DO NOT CHANGE THESE ENUM VALUES: they are embedded in the on-disk
  36. // data structures.
  37. // The highest bit of the value type needs to be reserved to SST tables
  38. // for them to do more flexible encoding.
  39. enum ValueType : unsigned char {
  40. kTypeDeletion = 0x0,
  41. kTypeValue = 0x1,
  42. kTypeMerge = 0x2,
  43. kTypeLogData = 0x3, // WAL only.
  44. kTypeColumnFamilyDeletion = 0x4, // WAL only.
  45. kTypeColumnFamilyValue = 0x5, // WAL only.
  46. kTypeColumnFamilyMerge = 0x6, // WAL only.
  47. kTypeSingleDeletion = 0x7,
  48. kTypeColumnFamilySingleDeletion = 0x8, // WAL only.
  49. kTypeBeginPrepareXID = 0x9, // WAL only.
  50. kTypeEndPrepareXID = 0xA, // WAL only.
  51. kTypeCommitXID = 0xB, // WAL only.
  52. kTypeRollbackXID = 0xC, // WAL only.
  53. kTypeNoop = 0xD, // WAL only.
  54. kTypeColumnFamilyRangeDeletion = 0xE, // WAL only.
  55. kTypeRangeDeletion = 0xF, // meta block
  56. kTypeColumnFamilyBlobIndex = 0x10, // Blob DB only
  57. kTypeBlobIndex = 0x11, // Blob DB only
  58. // When the prepared record is also persisted in db, we use a different
  59. // record. This is to ensure that the WAL that is generated by a WritePolicy
  60. // is not mistakenly read by another, which would result into data
  61. // inconsistency.
  62. kTypeBeginPersistedPrepareXID = 0x12, // WAL only.
  63. // Similar to kTypeBeginPersistedPrepareXID, this is to ensure that WAL
  64. // generated by WriteUnprepared write policy is not mistakenly read by
  65. // another.
  66. kTypeBeginUnprepareXID = 0x13, // WAL only.
  67. kMaxValue = 0x7F // Not used for storing records.
  68. };
  69. // Defined in dbformat.cc
  70. extern const ValueType kValueTypeForSeek;
  71. extern const ValueType kValueTypeForSeekForPrev;
  72. // Checks whether a type is an inline value type
  73. // (i.e. a type used in memtable skiplist and sst file datablock).
  74. inline bool IsValueType(ValueType t) {
  75. return t <= kTypeMerge || t == kTypeSingleDeletion || t == kTypeBlobIndex;
  76. }
  77. // Checks whether a type is from user operation
  78. // kTypeRangeDeletion is in meta block so this API is separated from above
  79. inline bool IsExtendedValueType(ValueType t) {
  80. return IsValueType(t) || t == kTypeRangeDeletion;
  81. }
  82. // We leave eight bits empty at the bottom so a type and sequence#
  83. // can be packed together into 64-bits.
  84. static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
  85. static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
  86. // The data structure that represents an internal key in the way that user_key,
  87. // sequence number and type are stored in separated forms.
  88. struct ParsedInternalKey {
  89. Slice user_key;
  90. SequenceNumber sequence;
  91. ValueType type;
  92. ParsedInternalKey()
  93. : sequence(kMaxSequenceNumber) // Make code analyzer happy
  94. {} // Intentionally left uninitialized (for speed)
  95. ParsedInternalKey(const Slice& u, const SequenceNumber& seq, ValueType t)
  96. : user_key(u), sequence(seq), type(t) {}
  97. std::string DebugString(bool hex = false) const;
  98. void clear() {
  99. user_key.clear();
  100. sequence = 0;
  101. type = kTypeDeletion;
  102. }
  103. };
  104. // Return the length of the encoding of "key".
  105. inline size_t InternalKeyEncodingLength(const ParsedInternalKey& key) {
  106. return key.user_key.size() + 8;
  107. }
  108. // Pack a sequence number and a ValueType into a uint64_t
  109. extern uint64_t PackSequenceAndType(uint64_t seq, ValueType t);
  110. // Given the result of PackSequenceAndType, store the sequence number in *seq
  111. // and the ValueType in *t.
  112. extern void UnPackSequenceAndType(uint64_t packed, uint64_t* seq, ValueType* t);
  113. EntryType GetEntryType(ValueType value_type);
  114. // Append the serialization of "key" to *result.
  115. extern void AppendInternalKey(std::string* result,
  116. const ParsedInternalKey& key);
  117. // Serialized internal key consists of user key followed by footer.
  118. // This function appends the footer to *result, assuming that *result already
  119. // contains the user key at the end.
  120. extern void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
  121. ValueType t);
  122. // Attempt to parse an internal key from "internal_key". On success,
  123. // stores the parsed data in "*result", and returns true.
  124. //
  125. // On error, returns false, leaves "*result" in an undefined state.
  126. extern bool ParseInternalKey(const Slice& internal_key,
  127. ParsedInternalKey* result);
  128. // Returns the user key portion of an internal key.
  129. inline Slice ExtractUserKey(const Slice& internal_key) {
  130. assert(internal_key.size() >= 8);
  131. return Slice(internal_key.data(), internal_key.size() - 8);
  132. }
  133. inline Slice ExtractUserKeyAndStripTimestamp(const Slice& internal_key,
  134. size_t ts_sz) {
  135. assert(internal_key.size() >= 8 + ts_sz);
  136. return Slice(internal_key.data(), internal_key.size() - 8 - ts_sz);
  137. }
  138. inline Slice StripTimestampFromUserKey(const Slice& user_key, size_t ts_sz) {
  139. assert(user_key.size() >= ts_sz);
  140. return Slice(user_key.data(), user_key.size() - ts_sz);
  141. }
  142. inline uint64_t ExtractInternalKeyFooter(const Slice& internal_key) {
  143. assert(internal_key.size() >= 8);
  144. const size_t n = internal_key.size();
  145. return DecodeFixed64(internal_key.data() + n - 8);
  146. }
  147. inline ValueType ExtractValueType(const Slice& internal_key) {
  148. uint64_t num = ExtractInternalKeyFooter(internal_key);
  149. unsigned char c = num & 0xff;
  150. return static_cast<ValueType>(c);
  151. }
  152. // A comparator for internal keys that uses a specified comparator for
  153. // the user key portion and breaks ties by decreasing sequence number.
  154. class InternalKeyComparator
  155. #ifdef NDEBUG
  156. final
  157. #endif
  158. : public Comparator {
  159. private:
  160. UserComparatorWrapper user_comparator_;
  161. std::string name_;
  162. public:
  163. explicit InternalKeyComparator(const Comparator* c)
  164. : user_comparator_(c),
  165. name_("rocksdb.InternalKeyComparator:" +
  166. std::string(user_comparator_.Name())) {}
  167. virtual ~InternalKeyComparator() {}
  168. virtual const char* Name() const override;
  169. virtual int Compare(const Slice& a, const Slice& b) const override;
  170. // Same as Compare except that it excludes the value type from comparison
  171. virtual int CompareKeySeq(const Slice& a, const Slice& b) const;
  172. virtual void FindShortestSeparator(std::string* start,
  173. const Slice& limit) const override;
  174. virtual void FindShortSuccessor(std::string* key) const override;
  175. const Comparator* user_comparator() const {
  176. return user_comparator_.user_comparator();
  177. }
  178. int Compare(const InternalKey& a, const InternalKey& b) const;
  179. int Compare(const ParsedInternalKey& a, const ParsedInternalKey& b) const;
  180. virtual const Comparator* GetRootComparator() const override {
  181. return user_comparator_.GetRootComparator();
  182. }
  183. };
  184. // The class represent the internal key in encoded form.
  185. class InternalKey {
  186. private:
  187. std::string rep_;
  188. public:
  189. InternalKey() {} // Leave rep_ as empty to indicate it is invalid
  190. InternalKey(const Slice& _user_key, SequenceNumber s, ValueType t) {
  191. AppendInternalKey(&rep_, ParsedInternalKey(_user_key, s, t));
  192. }
  193. // sets the internal key to be bigger or equal to all internal keys with this
  194. // user key
  195. void SetMaxPossibleForUserKey(const Slice& _user_key) {
  196. AppendInternalKey(
  197. &rep_, ParsedInternalKey(_user_key, 0, static_cast<ValueType>(0)));
  198. }
  199. // sets the internal key to be smaller or equal to all internal keys with this
  200. // user key
  201. void SetMinPossibleForUserKey(const Slice& _user_key) {
  202. AppendInternalKey(&rep_, ParsedInternalKey(_user_key, kMaxSequenceNumber,
  203. kValueTypeForSeek));
  204. }
  205. bool Valid() const {
  206. ParsedInternalKey parsed;
  207. return ParseInternalKey(Slice(rep_), &parsed);
  208. }
  209. void DecodeFrom(const Slice& s) { rep_.assign(s.data(), s.size()); }
  210. Slice Encode() const {
  211. assert(!rep_.empty());
  212. return rep_;
  213. }
  214. Slice user_key() const { return ExtractUserKey(rep_); }
  215. size_t size() { return rep_.size(); }
  216. void Set(const Slice& _user_key, SequenceNumber s, ValueType t) {
  217. SetFrom(ParsedInternalKey(_user_key, s, t));
  218. }
  219. void SetFrom(const ParsedInternalKey& p) {
  220. rep_.clear();
  221. AppendInternalKey(&rep_, p);
  222. }
  223. void Clear() { rep_.clear(); }
  224. // The underlying representation.
  225. // Intended only to be used together with ConvertFromUserKey().
  226. std::string* rep() { return &rep_; }
  227. // Assuming that *rep() contains a user key, this method makes internal key
  228. // out of it in-place. This saves a memcpy compared to Set()/SetFrom().
  229. void ConvertFromUserKey(SequenceNumber s, ValueType t) {
  230. AppendInternalKeyFooter(&rep_, s, t);
  231. }
  232. std::string DebugString(bool hex = false) const;
  233. };
  234. inline int InternalKeyComparator::Compare(const InternalKey& a,
  235. const InternalKey& b) const {
  236. return Compare(a.Encode(), b.Encode());
  237. }
  238. inline bool ParseInternalKey(const Slice& internal_key,
  239. ParsedInternalKey* result) {
  240. const size_t n = internal_key.size();
  241. if (n < 8) return false;
  242. uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
  243. unsigned char c = num & 0xff;
  244. result->sequence = num >> 8;
  245. result->type = static_cast<ValueType>(c);
  246. assert(result->type <= ValueType::kMaxValue);
  247. result->user_key = Slice(internal_key.data(), n - 8);
  248. return IsExtendedValueType(result->type);
  249. }
  250. // Update the sequence number in the internal key.
  251. // Guarantees not to invalidate ikey.data().
  252. inline void UpdateInternalKey(std::string* ikey, uint64_t seq, ValueType t) {
  253. size_t ikey_sz = ikey->size();
  254. assert(ikey_sz >= 8);
  255. uint64_t newval = (seq << 8) | t;
  256. // Note: Since C++11, strings are guaranteed to be stored contiguously and
  257. // string::operator[]() is guaranteed not to change ikey.data().
  258. EncodeFixed64(&(*ikey)[ikey_sz - 8], newval);
  259. }
  260. // Get the sequence number from the internal key
  261. inline uint64_t GetInternalKeySeqno(const Slice& internal_key) {
  262. const size_t n = internal_key.size();
  263. assert(n >= 8);
  264. uint64_t num = DecodeFixed64(internal_key.data() + n - 8);
  265. return num >> 8;
  266. }
  267. // The class to store keys in an efficient way. It allows:
  268. // 1. Users can either copy the key into it, or have it point to an unowned
  269. // address.
  270. // 2. For copied key, a short inline buffer is kept to reduce memory
  271. // allocation for smaller keys.
  272. // 3. It tracks user key or internal key, and allow conversion between them.
  273. class IterKey {
  274. public:
  275. IterKey()
  276. : buf_(space_),
  277. key_(buf_),
  278. key_size_(0),
  279. buf_size_(sizeof(space_)),
  280. is_user_key_(true) {}
  281. // No copying allowed
  282. IterKey(const IterKey&) = delete;
  283. void operator=(const IterKey&) = delete;
  284. ~IterKey() { ResetBuffer(); }
  285. // The bool will be picked up by the next calls to SetKey
  286. void SetIsUserKey(bool is_user_key) { is_user_key_ = is_user_key; }
  287. // Returns the key in whichever format that was provided to KeyIter
  288. Slice GetKey() const { return Slice(key_, key_size_); }
  289. Slice GetInternalKey() const {
  290. assert(!IsUserKey());
  291. return Slice(key_, key_size_);
  292. }
  293. Slice GetUserKey() const {
  294. if (IsUserKey()) {
  295. return Slice(key_, key_size_);
  296. } else {
  297. assert(key_size_ >= 8);
  298. return Slice(key_, key_size_ - 8);
  299. }
  300. }
  301. size_t Size() const { return key_size_; }
  302. void Clear() { key_size_ = 0; }
  303. // Append "non_shared_data" to its back, from "shared_len"
  304. // This function is used in Block::Iter::ParseNextKey
  305. // shared_len: bytes in [0, shard_len-1] would be remained
  306. // non_shared_data: data to be append, its length must be >= non_shared_len
  307. void TrimAppend(const size_t shared_len, const char* non_shared_data,
  308. const size_t non_shared_len) {
  309. assert(shared_len <= key_size_);
  310. size_t total_size = shared_len + non_shared_len;
  311. if (IsKeyPinned() /* key is not in buf_ */) {
  312. // Copy the key from external memory to buf_ (copy shared_len bytes)
  313. EnlargeBufferIfNeeded(total_size);
  314. memcpy(buf_, key_, shared_len);
  315. } else if (total_size > buf_size_) {
  316. // Need to allocate space, delete previous space
  317. char* p = new char[total_size];
  318. memcpy(p, key_, shared_len);
  319. if (buf_ != space_) {
  320. delete[] buf_;
  321. }
  322. buf_ = p;
  323. buf_size_ = total_size;
  324. }
  325. memcpy(buf_ + shared_len, non_shared_data, non_shared_len);
  326. key_ = buf_;
  327. key_size_ = total_size;
  328. }
  329. Slice SetKey(const Slice& key, bool copy = true) {
  330. // is_user_key_ expected to be set already via SetIsUserKey
  331. return SetKeyImpl(key, copy);
  332. }
  333. Slice SetUserKey(const Slice& key, bool copy = true) {
  334. is_user_key_ = true;
  335. return SetKeyImpl(key, copy);
  336. }
  337. Slice SetInternalKey(const Slice& key, bool copy = true) {
  338. is_user_key_ = false;
  339. return SetKeyImpl(key, copy);
  340. }
  341. // Copies the content of key, updates the reference to the user key in ikey
  342. // and returns a Slice referencing the new copy.
  343. Slice SetInternalKey(const Slice& key, ParsedInternalKey* ikey) {
  344. size_t key_n = key.size();
  345. assert(key_n >= 8);
  346. SetInternalKey(key);
  347. ikey->user_key = Slice(key_, key_n - 8);
  348. return Slice(key_, key_n);
  349. }
  350. // Copy the key into IterKey own buf_
  351. void OwnKey() {
  352. assert(IsKeyPinned() == true);
  353. Reserve(key_size_);
  354. memcpy(buf_, key_, key_size_);
  355. key_ = buf_;
  356. }
  357. // Update the sequence number in the internal key. Guarantees not to
  358. // invalidate slices to the key (and the user key).
  359. void UpdateInternalKey(uint64_t seq, ValueType t) {
  360. assert(!IsKeyPinned());
  361. assert(key_size_ >= 8);
  362. uint64_t newval = (seq << 8) | t;
  363. EncodeFixed64(&buf_[key_size_ - 8], newval);
  364. }
  365. bool IsKeyPinned() const { return (key_ != buf_); }
  366. void SetInternalKey(const Slice& key_prefix, const Slice& user_key,
  367. SequenceNumber s,
  368. ValueType value_type = kValueTypeForSeek) {
  369. size_t psize = key_prefix.size();
  370. size_t usize = user_key.size();
  371. EnlargeBufferIfNeeded(psize + usize + sizeof(uint64_t));
  372. if (psize > 0) {
  373. memcpy(buf_, key_prefix.data(), psize);
  374. }
  375. memcpy(buf_ + psize, user_key.data(), usize);
  376. EncodeFixed64(buf_ + usize + psize, PackSequenceAndType(s, value_type));
  377. key_ = buf_;
  378. key_size_ = psize + usize + sizeof(uint64_t);
  379. is_user_key_ = false;
  380. }
  381. void SetInternalKey(const Slice& user_key, SequenceNumber s,
  382. ValueType value_type = kValueTypeForSeek) {
  383. SetInternalKey(Slice(), user_key, s, value_type);
  384. }
  385. void Reserve(size_t size) {
  386. EnlargeBufferIfNeeded(size);
  387. key_size_ = size;
  388. }
  389. void SetInternalKey(const ParsedInternalKey& parsed_key) {
  390. SetInternalKey(Slice(), parsed_key);
  391. }
  392. void SetInternalKey(const Slice& key_prefix,
  393. const ParsedInternalKey& parsed_key_suffix) {
  394. SetInternalKey(key_prefix, parsed_key_suffix.user_key,
  395. parsed_key_suffix.sequence, parsed_key_suffix.type);
  396. }
  397. void EncodeLengthPrefixedKey(const Slice& key) {
  398. auto size = key.size();
  399. EnlargeBufferIfNeeded(size + static_cast<size_t>(VarintLength(size)));
  400. char* ptr = EncodeVarint32(buf_, static_cast<uint32_t>(size));
  401. memcpy(ptr, key.data(), size);
  402. key_ = buf_;
  403. is_user_key_ = true;
  404. }
  405. bool IsUserKey() const { return is_user_key_; }
  406. private:
  407. char* buf_;
  408. const char* key_;
  409. size_t key_size_;
  410. size_t buf_size_;
  411. char space_[32]; // Avoid allocation for short keys
  412. bool is_user_key_;
  413. Slice SetKeyImpl(const Slice& key, bool copy) {
  414. size_t size = key.size();
  415. if (copy) {
  416. // Copy key to buf_
  417. EnlargeBufferIfNeeded(size);
  418. memcpy(buf_, key.data(), size);
  419. key_ = buf_;
  420. } else {
  421. // Update key_ to point to external memory
  422. key_ = key.data();
  423. }
  424. key_size_ = size;
  425. return Slice(key_, key_size_);
  426. }
  427. void ResetBuffer() {
  428. if (buf_ != space_) {
  429. delete[] buf_;
  430. buf_ = space_;
  431. }
  432. buf_size_ = sizeof(space_);
  433. key_size_ = 0;
  434. }
  435. // Enlarge the buffer size if needed based on key_size.
  436. // By default, static allocated buffer is used. Once there is a key
  437. // larger than the static allocated buffer, another buffer is dynamically
  438. // allocated, until a larger key buffer is requested. In that case, we
  439. // reallocate buffer and delete the old one.
  440. void EnlargeBufferIfNeeded(size_t key_size) {
  441. // If size is smaller than buffer size, continue using current buffer,
  442. // or the static allocated one, as default
  443. if (key_size > buf_size_) {
  444. EnlargeBuffer(key_size);
  445. }
  446. }
  447. void EnlargeBuffer(size_t key_size);
  448. };
  449. // Convert from a SliceTranform of user keys, to a SliceTransform of
  450. // user keys.
  451. class InternalKeySliceTransform : public SliceTransform {
  452. public:
  453. explicit InternalKeySliceTransform(const SliceTransform* transform)
  454. : transform_(transform) {}
  455. virtual const char* Name() const override { return transform_->Name(); }
  456. virtual Slice Transform(const Slice& src) const override {
  457. auto user_key = ExtractUserKey(src);
  458. return transform_->Transform(user_key);
  459. }
  460. virtual bool InDomain(const Slice& src) const override {
  461. auto user_key = ExtractUserKey(src);
  462. return transform_->InDomain(user_key);
  463. }
  464. virtual bool InRange(const Slice& dst) const override {
  465. auto user_key = ExtractUserKey(dst);
  466. return transform_->InRange(user_key);
  467. }
  468. const SliceTransform* user_prefix_extractor() const { return transform_; }
  469. private:
  470. // Like comparator, InternalKeySliceTransform will not take care of the
  471. // deletion of transform_
  472. const SliceTransform* const transform_;
  473. };
  474. // Read the key of a record from a write batch.
  475. // if this record represent the default column family then cf_record
  476. // must be passed as false, otherwise it must be passed as true.
  477. extern bool ReadKeyFromWriteBatchEntry(Slice* input, Slice* key,
  478. bool cf_record);
  479. // Read record from a write batch piece from input.
  480. // tag, column_family, key, value and blob are return values. Callers own the
  481. // Slice they point to.
  482. // Tag is defined as ValueType.
  483. // input will be advanced to after the record.
  484. extern Status ReadRecordFromWriteBatch(Slice* input, char* tag,
  485. uint32_t* column_family, Slice* key,
  486. Slice* value, Slice* blob, Slice* xid);
  487. // When user call DeleteRange() to delete a range of keys,
  488. // we will store a serialized RangeTombstone in MemTable and SST.
  489. // the struct here is a easy-understood form
  490. // start/end_key_ is the start/end user key of the range to be deleted
  491. struct RangeTombstone {
  492. Slice start_key_;
  493. Slice end_key_;
  494. SequenceNumber seq_;
  495. RangeTombstone() = default;
  496. RangeTombstone(Slice sk, Slice ek, SequenceNumber sn)
  497. : start_key_(sk), end_key_(ek), seq_(sn) {}
  498. RangeTombstone(ParsedInternalKey parsed_key, Slice value) {
  499. start_key_ = parsed_key.user_key;
  500. seq_ = parsed_key.sequence;
  501. end_key_ = value;
  502. }
  503. // be careful to use Serialize(), allocates new memory
  504. std::pair<InternalKey, Slice> Serialize() const {
  505. auto key = InternalKey(start_key_, seq_, kTypeRangeDeletion);
  506. Slice value = end_key_;
  507. return std::make_pair(std::move(key), std::move(value));
  508. }
  509. // be careful to use SerializeKey(), allocates new memory
  510. InternalKey SerializeKey() const {
  511. return InternalKey(start_key_, seq_, kTypeRangeDeletion);
  512. }
  513. // The tombstone end-key is exclusive, so we generate an internal-key here
  514. // which has a similar property. Using kMaxSequenceNumber guarantees that
  515. // the returned internal-key will compare less than any other internal-key
  516. // with the same user-key. This in turn guarantees that the serialized
  517. // end-key for a tombstone such as [a-b] will compare less than the key "b".
  518. //
  519. // be careful to use SerializeEndKey(), allocates new memory
  520. InternalKey SerializeEndKey() const {
  521. return InternalKey(end_key_, kMaxSequenceNumber, kTypeRangeDeletion);
  522. }
  523. };
  524. inline int InternalKeyComparator::Compare(const Slice& akey,
  525. const Slice& bkey) const {
  526. // Order by:
  527. // increasing user key (according to user-supplied comparator)
  528. // decreasing sequence number
  529. // decreasing type (though sequence# should be enough to disambiguate)
  530. int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
  531. if (r == 0) {
  532. const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8);
  533. const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8);
  534. if (anum > bnum) {
  535. r = -1;
  536. } else if (anum < bnum) {
  537. r = +1;
  538. }
  539. }
  540. return r;
  541. }
  542. inline int InternalKeyComparator::CompareKeySeq(const Slice& akey,
  543. const Slice& bkey) const {
  544. // Order by:
  545. // increasing user key (according to user-supplied comparator)
  546. // decreasing sequence number
  547. int r = user_comparator_.Compare(ExtractUserKey(akey), ExtractUserKey(bkey));
  548. if (r == 0) {
  549. // Shift the number to exclude the last byte which contains the value type
  550. const uint64_t anum = DecodeFixed64(akey.data() + akey.size() - 8) >> 8;
  551. const uint64_t bnum = DecodeFixed64(bkey.data() + bkey.size() - 8) >> 8;
  552. if (anum > bnum) {
  553. r = -1;
  554. } else if (anum < bnum) {
  555. r = +1;
  556. }
  557. }
  558. return r;
  559. }
  560. // Wrap InternalKeyComparator as a comparator class for ParsedInternalKey.
  561. struct ParsedInternalKeyComparator {
  562. explicit ParsedInternalKeyComparator(const InternalKeyComparator* c)
  563. : cmp(c) {}
  564. bool operator()(const ParsedInternalKey& a,
  565. const ParsedInternalKey& b) const {
  566. return cmp->Compare(a, b) < 0;
  567. }
  568. const InternalKeyComparator* cmp;
  569. };
  570. } // namespace ROCKSDB_NAMESPACE