dbformat.cc 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "db/dbformat.h"
  10. #include <cinttypes>
  11. #include <cstdio>
  12. #include "db/lookup_key.h"
  13. #include "monitoring/perf_context_imp.h"
  14. #include "port/port.h"
  15. #include "util/coding.h"
  16. #include "util/string_util.h"
  17. namespace ROCKSDB_NAMESPACE {
  18. // kValueTypeForSeek defines the ValueType that should be passed when
  19. // constructing a ParsedInternalKey object for seeking to a particular
  20. // sequence number (since we sort sequence numbers in decreasing order
  21. // and the value type is embedded as the low 8 bits in the sequence
  22. // number in internal keys, we need to use the highest-numbered
  23. // ValueType, not the lowest).
  24. const ValueType kValueTypeForSeek = kTypeValuePreferredSeqno;
  25. const ValueType kValueTypeForSeekForPrev = kTypeDeletion;
  26. const std::string kDisableUserTimestamp;
  27. EntryType GetEntryType(ValueType value_type) {
  28. switch (value_type) {
  29. case kTypeValue:
  30. return kEntryPut;
  31. case kTypeDeletion:
  32. return kEntryDelete;
  33. case kTypeDeletionWithTimestamp:
  34. return kEntryDeleteWithTimestamp;
  35. case kTypeSingleDeletion:
  36. return kEntrySingleDelete;
  37. case kTypeMerge:
  38. return kEntryMerge;
  39. case kTypeRangeDeletion:
  40. return kEntryRangeDeletion;
  41. case kTypeBlobIndex:
  42. return kEntryBlobIndex;
  43. case kTypeWideColumnEntity:
  44. return kEntryWideColumnEntity;
  45. case kTypeValuePreferredSeqno:
  46. return kEntryTimedPut;
  47. default:
  48. return kEntryOther;
  49. }
  50. }
  51. void AppendInternalKey(std::string* result, const ParsedInternalKey& key) {
  52. result->append(key.user_key.data(), key.user_key.size());
  53. PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
  54. }
  55. void AppendInternalKeyWithDifferentTimestamp(std::string* result,
  56. const ParsedInternalKey& key,
  57. const Slice& ts) {
  58. assert(key.user_key.size() >= ts.size());
  59. result->append(key.user_key.data(), key.user_key.size() - ts.size());
  60. result->append(ts.data(), ts.size());
  61. PutFixed64(result, PackSequenceAndType(key.sequence, key.type));
  62. }
  63. void AppendUserKeyWithDifferentTimestamp(std::string* result, const Slice& key,
  64. const Slice& ts) {
  65. assert(key.size() >= ts.size());
  66. result->append(key.data(), key.size() - ts.size());
  67. result->append(ts.data(), ts.size());
  68. }
  69. void AppendInternalKeyFooter(std::string* result, SequenceNumber s,
  70. ValueType t) {
  71. PutFixed64(result, PackSequenceAndType(s, t));
  72. }
  73. void AppendKeyWithMinTimestamp(std::string* result, const Slice& key,
  74. size_t ts_sz) {
  75. assert(ts_sz > 0);
  76. const std::string kTsMin(ts_sz, static_cast<unsigned char>(0));
  77. result->append(key.data(), key.size());
  78. result->append(kTsMin.data(), ts_sz);
  79. }
  80. void AppendKeyWithMaxTimestamp(std::string* result, const Slice& key,
  81. size_t ts_sz) {
  82. assert(ts_sz > 0);
  83. const std::string kTsMax(ts_sz, static_cast<unsigned char>(0xff));
  84. result->append(key.data(), key.size());
  85. result->append(kTsMax.data(), ts_sz);
  86. }
  87. void AppendUserKeyWithMinTimestamp(std::string* result, const Slice& key,
  88. size_t ts_sz) {
  89. assert(ts_sz > 0);
  90. result->append(key.data(), key.size() - ts_sz);
  91. result->append(ts_sz, static_cast<unsigned char>(0));
  92. }
  93. void AppendUserKeyWithMaxTimestamp(std::string* result, const Slice& key,
  94. size_t ts_sz) {
  95. assert(ts_sz > 0);
  96. result->append(key.data(), key.size() - ts_sz);
  97. static constexpr char kTsMax[] = "\xff\xff\xff\xff\xff\xff\xff\xff\xff";
  98. if (ts_sz < strlen(kTsMax)) {
  99. result->append(kTsMax, ts_sz);
  100. } else {
  101. result->append(std::string(ts_sz, '\xff'));
  102. }
  103. }
  104. void PadInternalKeyWithMinTimestamp(std::string* result, const Slice& key,
  105. size_t ts_sz) {
  106. assert(ts_sz > 0);
  107. assert(key.size() >= kNumInternalBytes);
  108. size_t user_key_size = key.size() - kNumInternalBytes;
  109. result->reserve(key.size() + ts_sz);
  110. result->append(key.data(), user_key_size);
  111. result->append(ts_sz, static_cast<unsigned char>(0));
  112. result->append(key.data() + user_key_size, kNumInternalBytes);
  113. }
  114. void PadInternalKeyWithMaxTimestamp(std::string* result, const Slice& key,
  115. size_t ts_sz) {
  116. assert(ts_sz > 0);
  117. assert(key.size() >= kNumInternalBytes);
  118. size_t user_key_size = key.size() - kNumInternalBytes;
  119. result->reserve(key.size() + ts_sz);
  120. result->append(key.data(), user_key_size);
  121. result->append(std::string(ts_sz, '\xff'));
  122. result->append(key.data() + user_key_size, kNumInternalBytes);
  123. }
  124. void StripTimestampFromInternalKey(std::string* result, const Slice& key,
  125. size_t ts_sz) {
  126. assert(key.size() >= ts_sz + kNumInternalBytes);
  127. result->reserve(key.size() - ts_sz);
  128. result->append(key.data(), key.size() - kNumInternalBytes - ts_sz);
  129. result->append(key.data() + key.size() - kNumInternalBytes,
  130. kNumInternalBytes);
  131. }
  132. void ReplaceInternalKeyWithMinTimestamp(std::string* result, const Slice& key,
  133. size_t ts_sz) {
  134. const size_t key_sz = key.size();
  135. assert(key_sz >= ts_sz + kNumInternalBytes);
  136. result->reserve(key_sz);
  137. result->append(key.data(), key_sz - kNumInternalBytes - ts_sz);
  138. result->append(ts_sz, static_cast<unsigned char>(0));
  139. result->append(key.data() + key_sz - kNumInternalBytes, kNumInternalBytes);
  140. }
  141. std::string ParsedInternalKey::DebugString(bool log_err_key, bool hex,
  142. const Comparator* ucmp) const {
  143. std::string result = "'";
  144. size_t ts_sz_for_debug = ucmp == nullptr ? 0 : ucmp->timestamp_size();
  145. if (log_err_key) {
  146. if (ts_sz_for_debug == 0) {
  147. result += user_key.ToString(hex);
  148. } else {
  149. assert(user_key.size() >= ts_sz_for_debug);
  150. Slice user_key_without_ts = user_key;
  151. user_key_without_ts.remove_suffix(ts_sz_for_debug);
  152. result += user_key_without_ts.ToString(hex);
  153. Slice ts = Slice(user_key.data() + user_key.size() - ts_sz_for_debug,
  154. ts_sz_for_debug);
  155. result += "|timestamp:";
  156. result += ucmp->TimestampToString(ts);
  157. }
  158. } else {
  159. result += "<redacted>";
  160. }
  161. result += "' seq:" + std::to_string(sequence);
  162. result += ", type:" + std::to_string(type);
  163. return result;
  164. }
  165. std::string InternalKey::DebugString(bool hex, const Comparator* ucmp) const {
  166. std::string result;
  167. ParsedInternalKey parsed;
  168. if (ParseInternalKey(rep_, &parsed, false /* log_err_key */).ok()) {
  169. result = parsed.DebugString(true /* log_err_key */, hex, ucmp); // TODO
  170. } else {
  171. result = "(bad)";
  172. result.append(EscapeString(rep_));
  173. }
  174. return result;
  175. }
  176. int InternalKeyComparator::Compare(const ParsedInternalKey& a,
  177. const ParsedInternalKey& b) const {
  178. // Order by:
  179. // increasing user key (according to user-supplied comparator)
  180. // decreasing sequence number
  181. // decreasing type (though sequence# should be enough to disambiguate)
  182. int r = user_comparator_.Compare(a.user_key, b.user_key);
  183. if (r == 0) {
  184. if (a.sequence > b.sequence) {
  185. r = -1;
  186. } else if (a.sequence < b.sequence) {
  187. r = +1;
  188. } else if (a.type > b.type) {
  189. r = -1;
  190. } else if (a.type < b.type) {
  191. r = +1;
  192. }
  193. }
  194. return r;
  195. }
  196. int InternalKeyComparator::Compare(const Slice& a,
  197. const ParsedInternalKey& b) const {
  198. // Order by:
  199. // increasing user key (according to user-supplied comparator)
  200. // decreasing sequence number
  201. // decreasing type (though sequence# should be enough to disambiguate)
  202. int r = user_comparator_.Compare(ExtractUserKey(a), b.user_key);
  203. if (r == 0) {
  204. const uint64_t anum =
  205. DecodeFixed64(a.data() + a.size() - kNumInternalBytes);
  206. const uint64_t bnum = (b.sequence << 8) | b.type;
  207. if (anum > bnum) {
  208. r = -1;
  209. } else if (anum < bnum) {
  210. r = +1;
  211. }
  212. }
  213. return r;
  214. }
  215. int InternalKeyComparator::Compare(const ParsedInternalKey& a,
  216. const Slice& b) const {
  217. return -Compare(b, a);
  218. }
  219. LookupKey::LookupKey(const Slice& _user_key, SequenceNumber s,
  220. const Slice* ts) {
  221. size_t usize = _user_key.size();
  222. size_t ts_sz = (nullptr == ts) ? 0 : ts->size();
  223. size_t needed = usize + ts_sz + 13; // A conservative estimate
  224. char* dst;
  225. if (needed <= sizeof(space_)) {
  226. dst = space_;
  227. } else {
  228. dst = new char[needed];
  229. }
  230. start_ = dst;
  231. // NOTE: We don't support users keys of more than 2GB :)
  232. dst = EncodeVarint32(dst, static_cast<uint32_t>(usize + ts_sz + 8));
  233. kstart_ = dst;
  234. memcpy(dst, _user_key.data(), usize);
  235. dst += usize;
  236. if (nullptr != ts) {
  237. memcpy(dst, ts->data(), ts_sz);
  238. dst += ts_sz;
  239. }
  240. EncodeFixed64(dst, PackSequenceAndType(s, kValueTypeForSeek));
  241. dst += 8;
  242. end_ = dst;
  243. }
  244. void IterKey::EnlargeBuffer(size_t key_size) {
  245. // If size is smaller than buffer size, continue using current buffer,
  246. // or the inline one, as default
  247. assert(key_size > buf_size_);
  248. // Need to enlarge the buffer.
  249. ResetBuffer();
  250. buf_ = new char[key_size];
  251. buf_size_ = key_size;
  252. }
  253. void IterKey::EnlargeSecondaryBufferIfNeeded(size_t key_size) {
  254. // If size is smaller than buffer size, continue using current buffer,
  255. // or the inline one, as default
  256. if (key_size <= secondary_buf_size_) {
  257. return;
  258. }
  259. // Need to enlarge the secondary buffer.
  260. ResetSecondaryBuffer();
  261. secondary_buf_ = new char[key_size];
  262. secondary_buf_size_ = key_size;
  263. }
  264. } // namespace ROCKSDB_NAMESPACE