coding.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. // Endian-neutral encoding:
  11. // * Fixed-length numbers are encoded with least-significant byte first
  12. // * In addition we support variable length "varint" encoding
  13. // * Strings are encoded prefixed by their length in varint format
  14. #pragma once
  15. #include <algorithm>
  16. #include <stdint.h>
  17. #include <string.h>
  18. #include <string>
  19. #include "rocksdb/write_batch.h"
  20. #include "port/port.h"
  21. // Some processors does not allow unaligned access to memory
  22. #if defined(__sparc)
  23. #define PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED
  24. #endif
  25. namespace ROCKSDB_NAMESPACE {
  26. // The maximum length of a varint in bytes for 64-bit.
  27. const unsigned int kMaxVarint64Length = 10;
  28. // Standard Put... routines append to a string
  29. extern void PutFixed16(std::string* dst, uint16_t value);
  30. extern void PutFixed32(std::string* dst, uint32_t value);
  31. extern void PutFixed64(std::string* dst, uint64_t value);
  32. extern void PutVarint32(std::string* dst, uint32_t value);
  33. extern void PutVarint32Varint32(std::string* dst, uint32_t value1,
  34. uint32_t value2);
  35. extern void PutVarint32Varint32Varint32(std::string* dst, uint32_t value1,
  36. uint32_t value2, uint32_t value3);
  37. extern void PutVarint64(std::string* dst, uint64_t value);
  38. extern void PutVarint64Varint64(std::string* dst, uint64_t value1,
  39. uint64_t value2);
  40. extern void PutVarint32Varint64(std::string* dst, uint32_t value1,
  41. uint64_t value2);
  42. extern void PutVarint32Varint32Varint64(std::string* dst, uint32_t value1,
  43. uint32_t value2, uint64_t value3);
  44. extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
  45. extern void PutLengthPrefixedSliceParts(std::string* dst,
  46. const SliceParts& slice_parts);
  47. extern void PutLengthPrefixedSlicePartsWithPadding(
  48. std::string* dst, const SliceParts& slice_parts, size_t pad_sz);
  49. // Standard Get... routines parse a value from the beginning of a Slice
  50. // and advance the slice past the parsed value.
  51. extern bool GetFixed64(Slice* input, uint64_t* value);
  52. extern bool GetFixed32(Slice* input, uint32_t* value);
  53. extern bool GetFixed16(Slice* input, uint16_t* value);
  54. extern bool GetVarint32(Slice* input, uint32_t* value);
  55. extern bool GetVarint64(Slice* input, uint64_t* value);
  56. extern bool GetVarsignedint64(Slice* input, int64_t* value);
  57. extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
  58. // This function assumes data is well-formed.
  59. extern Slice GetLengthPrefixedSlice(const char* data);
  60. extern Slice GetSliceUntil(Slice* slice, char delimiter);
  61. // Borrowed from
  62. // https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208
  63. constexpr inline uint64_t i64ToZigzag(const int64_t l) {
  64. return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63);
  65. }
  66. inline int64_t zigzagToI64(uint64_t n) {
  67. return (n >> 1) ^ -static_cast<int64_t>(n & 1);
  68. }
  69. // Pointer-based variants of GetVarint... These either store a value
  70. // in *v and return a pointer just past the parsed value, or return
  71. // nullptr on error. These routines only look at bytes in the range
  72. // [p..limit-1]
  73. extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v);
  74. extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v);
  75. inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
  76. int64_t* value) {
  77. uint64_t u = 0;
  78. const char* ret = GetVarint64Ptr(p, limit, &u);
  79. *value = zigzagToI64(u);
  80. return ret;
  81. }
  82. // Returns the length of the varint32 or varint64 encoding of "v"
  83. extern int VarintLength(uint64_t v);
  84. // Lower-level versions of Put... that write directly into a character buffer
  85. // REQUIRES: dst has enough space for the value being written
  86. extern void EncodeFixed16(char* dst, uint16_t value);
  87. extern void EncodeFixed32(char* dst, uint32_t value);
  88. extern void EncodeFixed64(char* dst, uint64_t value);
  89. // Lower-level versions of Put... that write directly into a character buffer
  90. // and return a pointer just past the last byte written.
  91. // REQUIRES: dst has enough space for the value being written
  92. extern char* EncodeVarint32(char* dst, uint32_t value);
  93. extern char* EncodeVarint64(char* dst, uint64_t value);
  94. // Lower-level versions of Get... that read directly from a character buffer
  95. // without any bounds checking.
  96. inline uint16_t DecodeFixed16(const char* ptr) {
  97. if (port::kLittleEndian) {
  98. // Load the raw bytes
  99. uint16_t result;
  100. memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
  101. return result;
  102. } else {
  103. return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
  104. (static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
  105. }
  106. }
  107. inline uint32_t DecodeFixed32(const char* ptr) {
  108. if (port::kLittleEndian) {
  109. // Load the raw bytes
  110. uint32_t result;
  111. memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
  112. return result;
  113. } else {
  114. return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
  115. | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
  116. | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
  117. | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
  118. }
  119. }
  120. inline uint64_t DecodeFixed64(const char* ptr) {
  121. if (port::kLittleEndian) {
  122. // Load the raw bytes
  123. uint64_t result;
  124. memcpy(&result, ptr, sizeof(result)); // gcc optimizes this to a plain load
  125. return result;
  126. } else {
  127. uint64_t lo = DecodeFixed32(ptr);
  128. uint64_t hi = DecodeFixed32(ptr + 4);
  129. return (hi << 32) | lo;
  130. }
  131. }
  132. // Internal routine for use by fallback path of GetVarint32Ptr
  133. extern const char* GetVarint32PtrFallback(const char* p,
  134. const char* limit,
  135. uint32_t* value);
  136. inline const char* GetVarint32Ptr(const char* p,
  137. const char* limit,
  138. uint32_t* value) {
  139. if (p < limit) {
  140. uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
  141. if ((result & 128) == 0) {
  142. *value = result;
  143. return p + 1;
  144. }
  145. }
  146. return GetVarint32PtrFallback(p, limit, value);
  147. }
  148. // -- Implementation of the functions declared above
  149. inline void EncodeFixed16(char* buf, uint16_t value) {
  150. if (port::kLittleEndian) {
  151. memcpy(buf, &value, sizeof(value));
  152. } else {
  153. buf[0] = value & 0xff;
  154. buf[1] = (value >> 8) & 0xff;
  155. }
  156. }
  157. inline void EncodeFixed32(char* buf, uint32_t value) {
  158. if (port::kLittleEndian) {
  159. memcpy(buf, &value, sizeof(value));
  160. } else {
  161. buf[0] = value & 0xff;
  162. buf[1] = (value >> 8) & 0xff;
  163. buf[2] = (value >> 16) & 0xff;
  164. buf[3] = (value >> 24) & 0xff;
  165. }
  166. }
  167. inline void EncodeFixed64(char* buf, uint64_t value) {
  168. if (port::kLittleEndian) {
  169. memcpy(buf, &value, sizeof(value));
  170. } else {
  171. buf[0] = value & 0xff;
  172. buf[1] = (value >> 8) & 0xff;
  173. buf[2] = (value >> 16) & 0xff;
  174. buf[3] = (value >> 24) & 0xff;
  175. buf[4] = (value >> 32) & 0xff;
  176. buf[5] = (value >> 40) & 0xff;
  177. buf[6] = (value >> 48) & 0xff;
  178. buf[7] = (value >> 56) & 0xff;
  179. }
  180. }
  181. // Pull the last 8 bits and cast it to a character
  182. inline void PutFixed16(std::string* dst, uint16_t value) {
  183. if (port::kLittleEndian) {
  184. dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
  185. sizeof(value));
  186. } else {
  187. char buf[sizeof(value)];
  188. EncodeFixed16(buf, value);
  189. dst->append(buf, sizeof(buf));
  190. }
  191. }
  192. inline void PutFixed32(std::string* dst, uint32_t value) {
  193. if (port::kLittleEndian) {
  194. dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
  195. sizeof(value));
  196. } else {
  197. char buf[sizeof(value)];
  198. EncodeFixed32(buf, value);
  199. dst->append(buf, sizeof(buf));
  200. }
  201. }
  202. inline void PutFixed64(std::string* dst, uint64_t value) {
  203. if (port::kLittleEndian) {
  204. dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
  205. sizeof(value));
  206. } else {
  207. char buf[sizeof(value)];
  208. EncodeFixed64(buf, value);
  209. dst->append(buf, sizeof(buf));
  210. }
  211. }
  212. inline void PutVarint32(std::string* dst, uint32_t v) {
  213. char buf[5];
  214. char* ptr = EncodeVarint32(buf, v);
  215. dst->append(buf, static_cast<size_t>(ptr - buf));
  216. }
  217. inline void PutVarint32Varint32(std::string* dst, uint32_t v1, uint32_t v2) {
  218. char buf[10];
  219. char* ptr = EncodeVarint32(buf, v1);
  220. ptr = EncodeVarint32(ptr, v2);
  221. dst->append(buf, static_cast<size_t>(ptr - buf));
  222. }
  223. inline void PutVarint32Varint32Varint32(std::string* dst, uint32_t v1,
  224. uint32_t v2, uint32_t v3) {
  225. char buf[15];
  226. char* ptr = EncodeVarint32(buf, v1);
  227. ptr = EncodeVarint32(ptr, v2);
  228. ptr = EncodeVarint32(ptr, v3);
  229. dst->append(buf, static_cast<size_t>(ptr - buf));
  230. }
  231. inline char* EncodeVarint64(char* dst, uint64_t v) {
  232. static const unsigned int B = 128;
  233. unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
  234. while (v >= B) {
  235. *(ptr++) = (v & (B - 1)) | B;
  236. v >>= 7;
  237. }
  238. *(ptr++) = static_cast<unsigned char>(v);
  239. return reinterpret_cast<char*>(ptr);
  240. }
  241. inline void PutVarint64(std::string* dst, uint64_t v) {
  242. char buf[kMaxVarint64Length];
  243. char* ptr = EncodeVarint64(buf, v);
  244. dst->append(buf, static_cast<size_t>(ptr - buf));
  245. }
  246. inline void PutVarsignedint64(std::string* dst, int64_t v) {
  247. char buf[kMaxVarint64Length];
  248. // Using Zigzag format to convert signed to unsigned
  249. char* ptr = EncodeVarint64(buf, i64ToZigzag(v));
  250. dst->append(buf, static_cast<size_t>(ptr - buf));
  251. }
  252. inline void PutVarint64Varint64(std::string* dst, uint64_t v1, uint64_t v2) {
  253. char buf[20];
  254. char* ptr = EncodeVarint64(buf, v1);
  255. ptr = EncodeVarint64(ptr, v2);
  256. dst->append(buf, static_cast<size_t>(ptr - buf));
  257. }
  258. inline void PutVarint32Varint64(std::string* dst, uint32_t v1, uint64_t v2) {
  259. char buf[15];
  260. char* ptr = EncodeVarint32(buf, v1);
  261. ptr = EncodeVarint64(ptr, v2);
  262. dst->append(buf, static_cast<size_t>(ptr - buf));
  263. }
  264. inline void PutVarint32Varint32Varint64(std::string* dst, uint32_t v1,
  265. uint32_t v2, uint64_t v3) {
  266. char buf[20];
  267. char* ptr = EncodeVarint32(buf, v1);
  268. ptr = EncodeVarint32(ptr, v2);
  269. ptr = EncodeVarint64(ptr, v3);
  270. dst->append(buf, static_cast<size_t>(ptr - buf));
  271. }
  272. inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
  273. PutVarint32(dst, static_cast<uint32_t>(value.size()));
  274. dst->append(value.data(), value.size());
  275. }
  276. inline void PutLengthPrefixedSliceParts(std::string* dst, size_t total_bytes,
  277. const SliceParts& slice_parts) {
  278. for (int i = 0; i < slice_parts.num_parts; ++i) {
  279. total_bytes += slice_parts.parts[i].size();
  280. }
  281. PutVarint32(dst, static_cast<uint32_t>(total_bytes));
  282. for (int i = 0; i < slice_parts.num_parts; ++i) {
  283. dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
  284. }
  285. }
  286. inline void PutLengthPrefixedSliceParts(std::string* dst,
  287. const SliceParts& slice_parts) {
  288. PutLengthPrefixedSliceParts(dst, /*total_bytes=*/0, slice_parts);
  289. }
  290. inline void PutLengthPrefixedSlicePartsWithPadding(
  291. std::string* dst, const SliceParts& slice_parts, size_t pad_sz) {
  292. PutLengthPrefixedSliceParts(dst, /*total_bytes=*/pad_sz, slice_parts);
  293. dst->append(pad_sz, '\0');
  294. }
  295. inline int VarintLength(uint64_t v) {
  296. int len = 1;
  297. while (v >= 128) {
  298. v >>= 7;
  299. len++;
  300. }
  301. return len;
  302. }
  303. inline bool GetFixed64(Slice* input, uint64_t* value) {
  304. if (input->size() < sizeof(uint64_t)) {
  305. return false;
  306. }
  307. *value = DecodeFixed64(input->data());
  308. input->remove_prefix(sizeof(uint64_t));
  309. return true;
  310. }
  311. inline bool GetFixed32(Slice* input, uint32_t* value) {
  312. if (input->size() < sizeof(uint32_t)) {
  313. return false;
  314. }
  315. *value = DecodeFixed32(input->data());
  316. input->remove_prefix(sizeof(uint32_t));
  317. return true;
  318. }
  319. inline bool GetFixed16(Slice* input, uint16_t* value) {
  320. if (input->size() < sizeof(uint16_t)) {
  321. return false;
  322. }
  323. *value = DecodeFixed16(input->data());
  324. input->remove_prefix(sizeof(uint16_t));
  325. return true;
  326. }
  327. inline bool GetVarint32(Slice* input, uint32_t* value) {
  328. const char* p = input->data();
  329. const char* limit = p + input->size();
  330. const char* q = GetVarint32Ptr(p, limit, value);
  331. if (q == nullptr) {
  332. return false;
  333. } else {
  334. *input = Slice(q, static_cast<size_t>(limit - q));
  335. return true;
  336. }
  337. }
  338. inline bool GetVarint64(Slice* input, uint64_t* value) {
  339. const char* p = input->data();
  340. const char* limit = p + input->size();
  341. const char* q = GetVarint64Ptr(p, limit, value);
  342. if (q == nullptr) {
  343. return false;
  344. } else {
  345. *input = Slice(q, static_cast<size_t>(limit - q));
  346. return true;
  347. }
  348. }
  349. inline bool GetVarsignedint64(Slice* input, int64_t* value) {
  350. const char* p = input->data();
  351. const char* limit = p + input->size();
  352. const char* q = GetVarsignedint64Ptr(p, limit, value);
  353. if (q == nullptr) {
  354. return false;
  355. } else {
  356. *input = Slice(q, static_cast<size_t>(limit - q));
  357. return true;
  358. }
  359. }
  360. // Provide an interface for platform independent endianness transformation
  361. inline uint64_t EndianTransform(uint64_t input, size_t size) {
  362. char* pos = reinterpret_cast<char*>(&input);
  363. uint64_t ret_val = 0;
  364. for (size_t i = 0; i < size; ++i) {
  365. ret_val |= (static_cast<uint64_t>(static_cast<unsigned char>(pos[i]))
  366. << ((size - i - 1) << 3));
  367. }
  368. return ret_val;
  369. }
  370. inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
  371. uint32_t len = 0;
  372. if (GetVarint32(input, &len) && input->size() >= len) {
  373. *result = Slice(input->data(), len);
  374. input->remove_prefix(len);
  375. return true;
  376. } else {
  377. return false;
  378. }
  379. }
  380. inline Slice GetLengthPrefixedSlice(const char* data) {
  381. uint32_t len = 0;
  382. // +5: we assume "data" is not corrupted
  383. // unsigned char is 7 bits, uint32_t is 32 bits, need 5 unsigned char
  384. auto p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
  385. return Slice(p, len);
  386. }
  387. inline Slice GetSliceUntil(Slice* slice, char delimiter) {
  388. uint32_t len = 0;
  389. for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
  390. // nothing
  391. }
  392. Slice ret(slice->data(), len);
  393. slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
  394. return ret;
  395. }
  396. template<class T>
  397. #ifdef ROCKSDB_UBSAN_RUN
  398. #if defined(__clang__)
  399. __attribute__((__no_sanitize__("alignment")))
  400. #elif defined(__GNUC__)
  401. __attribute__((__no_sanitize_undefined__))
  402. #endif
  403. #endif
  404. inline void PutUnaligned(T *memory, const T &value) {
  405. #if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
  406. char *nonAlignedMemory = reinterpret_cast<char*>(memory);
  407. memcpy(nonAlignedMemory, reinterpret_cast<const char*>(&value), sizeof(T));
  408. #else
  409. *memory = value;
  410. #endif
  411. }
  412. template<class T>
  413. #ifdef ROCKSDB_UBSAN_RUN
  414. #if defined(__clang__)
  415. __attribute__((__no_sanitize__("alignment")))
  416. #elif defined(__GNUC__)
  417. __attribute__((__no_sanitize_undefined__))
  418. #endif
  419. #endif
  420. inline void GetUnaligned(const T *memory, T *value) {
  421. #if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
  422. char *nonAlignedMemory = reinterpret_cast<char*>(value);
  423. memcpy(nonAlignedMemory, reinterpret_cast<const char*>(memory), sizeof(T));
  424. #else
  425. *value = *memory;
  426. #endif
  427. }
  428. } // namespace ROCKSDB_NAMESPACE