coding.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. // Encoding independent of machine byte order:
  11. // * Fixed-length numbers are encoded with least-significant byte first
  12. // (little endian, native order on Intel and others)
  13. // * In addition we support variable length "varint" encoding
  14. // * Strings are encoded prefixed by their length in varint format
  15. //
  16. // Some related functions are provided in coding_lean.h
  17. #pragma once
  18. #include <algorithm>
  19. #include <string>
  20. #include "port/port.h"
  21. #include "rocksdb/slice.h"
  22. #include "util/cast_util.h"
  23. #include "util/coding_lean.h"
  24. // Some processors does not allow unaligned access to memory
  25. #if defined(__sparc)
  26. #define PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED
  27. #endif
  28. namespace ROCKSDB_NAMESPACE {
  29. // The maximum length of a varint in bytes for 64-bit.
  30. const uint32_t kMaxVarint64Length = 10;
  31. // Standard Put... routines append to a string
  32. void PutFixed16(std::string* dst, uint16_t value);
  33. void PutFixed32(std::string* dst, uint32_t value);
  34. void PutFixed64(std::string* dst, uint64_t value);
  35. void PutVarint32(std::string* dst, uint32_t value);
  36. void PutVarint32Varint32(std::string* dst, uint32_t value1, uint32_t value2);
  37. void PutVarint32Varint32Varint32(std::string* dst, uint32_t value1,
  38. uint32_t value2, uint32_t value3);
  39. void PutVarint64(std::string* dst, uint64_t value);
  40. void PutVarint64Varint64(std::string* dst, uint64_t value1, uint64_t value2);
  41. void PutVarint32Varint64(std::string* dst, uint32_t value1, uint64_t value2);
  42. void PutVarint32Varint32Varint64(std::string* dst, uint32_t value1,
  43. uint32_t value2, uint64_t value3);
  44. void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
  45. void PutLengthPrefixedSliceParts(std::string* dst,
  46. const SliceParts& slice_parts);
  47. void PutLengthPrefixedSlicePartsWithPadding(std::string* dst,
  48. const SliceParts& slice_parts,
  49. size_t pad_sz);
  50. // Standard Get... routines parse a value from the beginning of a Slice
  51. // and advance the slice past the parsed value.
  52. bool GetFixed64(Slice* input, uint64_t* value);
  53. bool GetFixed32(Slice* input, uint32_t* value);
  54. bool GetFixed16(Slice* input, uint16_t* value);
  55. bool GetVarint32(Slice* input, uint32_t* value);
  56. bool GetVarint64(Slice* input, uint64_t* value);
  57. bool GetVarsignedint64(Slice* input, int64_t* value);
  58. bool GetLengthPrefixedSlice(Slice* input, Slice* result);
  59. // This function assumes data is well-formed.
  60. Slice GetLengthPrefixedSlice(const char* data);
  61. Slice GetSliceUntil(Slice* slice, char delimiter);
  62. // Borrowed from
  63. // https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208
  64. constexpr inline uint64_t i64ToZigzag(const int64_t l) {
  65. return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63);
  66. }
  67. inline int64_t zigzagToI64(uint64_t n) {
  68. return (n >> 1) ^ -static_cast<int64_t>(n & 1);
  69. }
  70. // Pointer-based variants of GetVarint... These either store a value
  71. // in *v and return a pointer just past the parsed value, or return
  72. // nullptr on error. These routines only look at bytes in the range
  73. // [p..limit-1]
  74. const char* GetVarint32Ptr(const char* p, const char* limit, uint32_t* v);
  75. const char* GetVarint64Ptr(const char* p, const char* limit, uint64_t* v);
  76. inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
  77. int64_t* value) {
  78. uint64_t u = 0;
  79. const char* ret = GetVarint64Ptr(p, limit, &u);
  80. *value = zigzagToI64(u);
  81. return ret;
  82. }
  83. // Returns the length of the varint32 or varint64 encoding of "v"
  84. uint16_t VarintLength(uint64_t v);
  85. // Lower-level versions of Put... that write directly into a character buffer
  86. // and return a pointer just past the last byte written.
  87. // REQUIRES: dst has enough space for the value being written
  88. char* EncodeVarint32(char* dst, uint32_t value);
  89. char* EncodeVarint64(char* dst, uint64_t value);
  90. // Internal routine for use by fallback path of GetVarint32Ptr
  91. const char* GetVarint32PtrFallback(const char* p, const char* limit,
  92. uint32_t* value);
  93. inline const char* GetVarint32Ptr(const char* p, const char* limit,
  94. uint32_t* value) {
  95. if (p < limit) {
  96. uint32_t result = *(lossless_cast<const unsigned char*>(p));
  97. if ((result & 128) == 0) {
  98. *value = result;
  99. return p + 1;
  100. }
  101. }
  102. return GetVarint32PtrFallback(p, limit, value);
  103. }
  104. // Pull the last 8 bits and cast it to a character
  105. inline void PutFixed16(std::string* dst, uint16_t value) {
  106. if (port::kLittleEndian) {
  107. dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
  108. sizeof(value));
  109. } else {
  110. char buf[sizeof(value)];
  111. EncodeFixed16(buf, value);
  112. dst->append(buf, sizeof(buf));
  113. }
  114. }
  115. inline void PutFixed32(std::string* dst, uint32_t value) {
  116. if (port::kLittleEndian) {
  117. dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
  118. sizeof(value));
  119. } else {
  120. char buf[sizeof(value)];
  121. EncodeFixed32(buf, value);
  122. dst->append(buf, sizeof(buf));
  123. }
  124. }
  125. inline void PutFixed64(std::string* dst, uint64_t value) {
  126. if (port::kLittleEndian) {
  127. dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
  128. sizeof(value));
  129. } else {
  130. char buf[sizeof(value)];
  131. EncodeFixed64(buf, value);
  132. dst->append(buf, sizeof(buf));
  133. }
  134. }
  135. inline void PutVarint32(std::string* dst, uint32_t v) {
  136. char buf[5];
  137. char* ptr = EncodeVarint32(buf, v);
  138. dst->append(buf, static_cast<size_t>(ptr - buf));
  139. }
  140. inline void PutVarint32Varint32(std::string* dst, uint32_t v1, uint32_t v2) {
  141. char buf[10];
  142. char* ptr = EncodeVarint32(buf, v1);
  143. ptr = EncodeVarint32(ptr, v2);
  144. dst->append(buf, static_cast<size_t>(ptr - buf));
  145. }
  146. inline void PutVarint32Varint32Varint32(std::string* dst, uint32_t v1,
  147. uint32_t v2, uint32_t v3) {
  148. char buf[15];
  149. char* ptr = EncodeVarint32(buf, v1);
  150. ptr = EncodeVarint32(ptr, v2);
  151. ptr = EncodeVarint32(ptr, v3);
  152. dst->append(buf, static_cast<size_t>(ptr - buf));
  153. }
  154. inline char* EncodeVarint64(char* dst, uint64_t v) {
  155. static const unsigned int B = 128;
  156. unsigned char* ptr = lossless_cast<unsigned char*>(dst);
  157. while (v >= B) {
  158. *(ptr++) = (v & (B - 1)) | B;
  159. v >>= 7;
  160. }
  161. *(ptr++) = static_cast<unsigned char>(v);
  162. return lossless_cast<char*>(ptr);
  163. }
  164. inline void PutVarint64(std::string* dst, uint64_t v) {
  165. char buf[kMaxVarint64Length];
  166. char* ptr = EncodeVarint64(buf, v);
  167. dst->append(buf, static_cast<size_t>(ptr - buf));
  168. }
  169. inline void PutVarsignedint64(std::string* dst, int64_t v) {
  170. char buf[kMaxVarint64Length];
  171. // Using Zigzag format to convert signed to unsigned
  172. char* ptr = EncodeVarint64(buf, i64ToZigzag(v));
  173. dst->append(buf, static_cast<size_t>(ptr - buf));
  174. }
  175. inline void PutVarint64Varint64(std::string* dst, uint64_t v1, uint64_t v2) {
  176. char buf[20];
  177. char* ptr = EncodeVarint64(buf, v1);
  178. ptr = EncodeVarint64(ptr, v2);
  179. dst->append(buf, static_cast<size_t>(ptr - buf));
  180. }
  181. inline void PutVarint32Varint64(std::string* dst, uint32_t v1, uint64_t v2) {
  182. char buf[15];
  183. char* ptr = EncodeVarint32(buf, v1);
  184. ptr = EncodeVarint64(ptr, v2);
  185. dst->append(buf, static_cast<size_t>(ptr - buf));
  186. }
  187. inline void PutVarint32Varint32Varint64(std::string* dst, uint32_t v1,
  188. uint32_t v2, uint64_t v3) {
  189. char buf[20];
  190. char* ptr = EncodeVarint32(buf, v1);
  191. ptr = EncodeVarint32(ptr, v2);
  192. ptr = EncodeVarint64(ptr, v3);
  193. dst->append(buf, static_cast<size_t>(ptr - buf));
  194. }
  195. inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
  196. PutVarint32(dst, static_cast<uint32_t>(value.size()));
  197. dst->append(value.data(), value.size());
  198. }
  199. inline void PutLengthPrefixedSliceParts(std::string* dst, size_t total_bytes,
  200. const SliceParts& slice_parts) {
  201. for (int i = 0; i < slice_parts.num_parts; ++i) {
  202. total_bytes += slice_parts.parts[i].size();
  203. }
  204. PutVarint32(dst, static_cast<uint32_t>(total_bytes));
  205. for (int i = 0; i < slice_parts.num_parts; ++i) {
  206. dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
  207. }
  208. }
  209. inline void PutLengthPrefixedSliceParts(std::string* dst,
  210. const SliceParts& slice_parts) {
  211. PutLengthPrefixedSliceParts(dst, /*total_bytes=*/0, slice_parts);
  212. }
  213. inline void PutLengthPrefixedSlicePartsWithPadding(
  214. std::string* dst, const SliceParts& slice_parts, size_t pad_sz) {
  215. PutLengthPrefixedSliceParts(dst, /*total_bytes=*/pad_sz, slice_parts);
  216. dst->append(pad_sz, '\0');
  217. }
  218. inline uint16_t VarintLength(uint64_t v) {
  219. uint16_t len = 1;
  220. while (v >= 128) {
  221. v >>= 7;
  222. len++;
  223. }
  224. return len;
  225. }
  226. inline bool GetFixed64(Slice* input, uint64_t* value) {
  227. if (input->size() < sizeof(uint64_t)) {
  228. return false;
  229. }
  230. *value = DecodeFixed64(input->data());
  231. input->remove_prefix(sizeof(uint64_t));
  232. return true;
  233. }
  234. inline bool GetFixed32(Slice* input, uint32_t* value) {
  235. if (input->size() < sizeof(uint32_t)) {
  236. return false;
  237. }
  238. *value = DecodeFixed32(input->data());
  239. input->remove_prefix(sizeof(uint32_t));
  240. return true;
  241. }
  242. inline bool GetFixed16(Slice* input, uint16_t* value) {
  243. if (input->size() < sizeof(uint16_t)) {
  244. return false;
  245. }
  246. *value = DecodeFixed16(input->data());
  247. input->remove_prefix(sizeof(uint16_t));
  248. return true;
  249. }
  250. inline bool GetVarint32(Slice* input, uint32_t* value) {
  251. const char* p = input->data();
  252. const char* limit = p + input->size();
  253. const char* q = GetVarint32Ptr(p, limit, value);
  254. if (q == nullptr) {
  255. return false;
  256. } else {
  257. *input = Slice(q, static_cast<size_t>(limit - q));
  258. return true;
  259. }
  260. }
  261. inline bool GetVarint64(Slice* input, uint64_t* value) {
  262. const char* p = input->data();
  263. const char* limit = p + input->size();
  264. const char* q = GetVarint64Ptr(p, limit, value);
  265. if (q == nullptr) {
  266. return false;
  267. } else {
  268. *input = Slice(q, static_cast<size_t>(limit - q));
  269. return true;
  270. }
  271. }
  272. inline bool GetVarsignedint64(Slice* input, int64_t* value) {
  273. const char* p = input->data();
  274. const char* limit = p + input->size();
  275. const char* q = GetVarsignedint64Ptr(p, limit, value);
  276. if (q == nullptr) {
  277. return false;
  278. } else {
  279. *input = Slice(q, static_cast<size_t>(limit - q));
  280. return true;
  281. }
  282. }
  283. inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
  284. uint32_t len = 0;
  285. if (GetVarint32(input, &len) && input->size() >= len) {
  286. *result = Slice(input->data(), len);
  287. input->remove_prefix(len);
  288. return true;
  289. } else {
  290. return false;
  291. }
  292. }
  293. inline Slice GetLengthPrefixedSlice(const char* data) {
  294. uint32_t len = 0;
  295. // +5: we assume "data" is not corrupted
  296. // unsigned char is 7 bits, uint32_t is 32 bits, need 5 unsigned char
  297. auto p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
  298. return Slice(p, len);
  299. }
  300. inline Slice GetSliceUntil(Slice* slice, char delimiter) {
  301. uint32_t len = 0;
  302. for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
  303. // nothing
  304. }
  305. Slice ret(slice->data(), len);
  306. slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
  307. return ret;
  308. }
  309. template <class T>
  310. #ifdef ROCKSDB_UBSAN_RUN
  311. #if defined(__clang__)
  312. __attribute__((__no_sanitize__("alignment")))
  313. #elif defined(__GNUC__)
  314. __attribute__((__no_sanitize_undefined__))
  315. #endif
  316. #endif
  317. inline void
  318. PutUnaligned(T* memory, const T& value) {
  319. #if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
  320. char* nonAlignedMemory = reinterpret_cast<char*>(memory);
  321. memcpy(nonAlignedMemory, reinterpret_cast<const char*>(&value), sizeof(T));
  322. #else
  323. *memory = value;
  324. #endif
  325. }
  326. template <class T>
  327. #ifdef ROCKSDB_UBSAN_RUN
  328. #if defined(__clang__)
  329. __attribute__((__no_sanitize__("alignment")))
  330. #elif defined(__GNUC__)
  331. __attribute__((__no_sanitize_undefined__))
  332. #endif
  333. #endif
  334. inline void
  335. GetUnaligned(const T* memory, T* value) {
  336. #if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
  337. char* nonAlignedMemory = reinterpret_cast<char*>(value);
  338. memcpy(nonAlignedMemory, reinterpret_cast<const char*>(memory), sizeof(T));
  339. #else
  340. *value = *memory;
  341. #endif
  342. }
  343. } // namespace ROCKSDB_NAMESPACE