unique_id.cc 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #include <cstdint>
  6. #include "table/unique_id_impl.h"
  7. #include "util/coding_lean.h"
  8. #include "util/hash.h"
  9. #include "util/string_util.h"
  10. namespace ROCKSDB_NAMESPACE {
  11. std::string EncodeSessionId(uint64_t upper, uint64_t lower) {
  12. std::string db_session_id(20U, '\0');
  13. char *buf = db_session_id.data();
  14. // Preserving `lower` is slightly tricky. 36^12 is slightly more than
  15. // 62 bits, so we use 12 chars plus the bottom two bits of one more.
  16. // (A tiny fraction of 20 digit strings go unused.)
  17. uint64_t a = (upper << 2) | (lower >> 62);
  18. uint64_t b = lower & (UINT64_MAX >> 2);
  19. PutBaseChars<36>(&buf, 8, a, /*uppercase*/ true);
  20. PutBaseChars<36>(&buf, 12, b, /*uppercase*/ true);
  21. assert(buf == &db_session_id.back() + 1);
  22. return db_session_id;
  23. }
  24. Status DecodeSessionId(const std::string &db_session_id, uint64_t *upper,
  25. uint64_t *lower) {
  26. const size_t len = db_session_id.size();
  27. if (len == 0) {
  28. return Status::NotSupported("Missing db_session_id");
  29. }
  30. // Anything from 13 to 24 chars is reasonable. We don't have to limit to
  31. // exactly 20.
  32. if (len < 13) {
  33. return Status::NotSupported("Too short db_session_id");
  34. }
  35. if (len > 24) {
  36. return Status::NotSupported("Too long db_session_id");
  37. }
  38. uint64_t a = 0, b = 0;
  39. const char *buf = &db_session_id.front();
  40. bool success = ParseBaseChars<36>(&buf, len - 12U, &a);
  41. if (!success) {
  42. return Status::NotSupported("Bad digit in db_session_id");
  43. }
  44. success = ParseBaseChars<36>(&buf, 12U, &b);
  45. if (!success) {
  46. return Status::NotSupported("Bad digit in db_session_id");
  47. }
  48. assert(buf == &db_session_id.back() + 1);
  49. *upper = a >> 2;
  50. *lower = (b & (UINT64_MAX >> 2)) | (a << 62);
  51. return Status::OK();
  52. }
  53. Status GetSstInternalUniqueId(const std::string &db_id,
  54. const std::string &db_session_id,
  55. uint64_t file_number, UniqueIdPtr out,
  56. bool force) {
  57. if (!force) {
  58. if (db_id.empty()) {
  59. return Status::NotSupported("Missing db_id");
  60. }
  61. if (file_number == 0) {
  62. return Status::NotSupported("Missing or bad file number");
  63. }
  64. if (db_session_id.empty()) {
  65. return Status::NotSupported("Missing db_session_id");
  66. }
  67. }
  68. uint64_t session_upper = 0; // Assignment to appease clang-analyze
  69. uint64_t session_lower = 0; // Assignment to appease clang-analyze
  70. {
  71. Status s = DecodeSessionId(db_session_id, &session_upper, &session_lower);
  72. if (!s.ok()) {
  73. if (!force) {
  74. return s;
  75. } else {
  76. // A reasonable fallback in case malformed
  77. Hash2x64(db_session_id.data(), db_session_id.size(), &session_upper,
  78. &session_lower);
  79. if (session_lower == 0) {
  80. session_lower = session_upper | 1;
  81. }
  82. }
  83. }
  84. }
  85. // Exactly preserve session lower to ensure that session ids generated
  86. // during the same process lifetime are guaranteed unique.
  87. // DBImpl also guarantees (in recent versions) that this is not zero,
  88. // so that we can guarantee unique ID is never all zeros. (Can't assert
  89. // that here because of testing and old versions.)
  90. // We put this first in anticipation of matching a small-ish set of cache
  91. // key prefixes to cover entries relevant to any DB.
  92. out.ptr[0] = session_lower;
  93. // Hash the session upper (~39 bits entropy) and DB id (120+ bits entropy)
  94. // for very high global uniqueness entropy.
  95. // (It is possible that many DBs descended from one common DB id are copied
  96. // around and proliferate, in which case session id is critical, but it is
  97. // more common for different DBs to have different DB ids.)
  98. uint64_t db_a, db_b;
  99. Hash2x64(db_id.data(), db_id.size(), session_upper, &db_a, &db_b);
  100. // Xor in file number for guaranteed uniqueness by file number for a given
  101. // session and DB id. (Xor slightly better than + here. See
  102. // https://github.com/pdillinger/unique_id )
  103. out.ptr[1] = db_a ^ file_number;
  104. // Extra (optional) global uniqueness
  105. if (out.extended) {
  106. out.ptr[2] = db_b;
  107. }
  108. return Status::OK();
  109. }
  110. namespace {
  111. // For InternalUniqueIdToExternal / ExternalUniqueIdToInternal we want all
  112. // zeros in first 128 bits to map to itself, so that excluding zero in
  113. // internal IDs (session_lower != 0 above) does the same for external IDs.
  114. // These values are meaningless except for making that work.
  115. constexpr uint64_t kHiOffsetForZero = 17391078804906429400U;
  116. constexpr uint64_t kLoOffsetForZero = 6417269962128484497U;
  117. } // namespace
  118. void InternalUniqueIdToExternal(UniqueIdPtr in_out) {
  119. uint64_t hi, lo;
  120. BijectiveHash2x64(in_out.ptr[1] + kHiOffsetForZero,
  121. in_out.ptr[0] + kLoOffsetForZero, &hi, &lo);
  122. in_out.ptr[0] = lo;
  123. in_out.ptr[1] = hi;
  124. if (in_out.extended) {
  125. in_out.ptr[2] += lo + hi;
  126. }
  127. }
  128. void ExternalUniqueIdToInternal(UniqueIdPtr in_out) {
  129. uint64_t lo = in_out.ptr[0];
  130. uint64_t hi = in_out.ptr[1];
  131. if (in_out.extended) {
  132. in_out.ptr[2] -= lo + hi;
  133. }
  134. BijectiveUnhash2x64(hi, lo, &hi, &lo);
  135. in_out.ptr[0] = lo - kLoOffsetForZero;
  136. in_out.ptr[1] = hi - kHiOffsetForZero;
  137. }
  138. std::string EncodeUniqueIdBytes(UniqueIdPtr in) {
  139. std::string ret(in.extended ? 24U : 16U, '\0');
  140. EncodeFixed64(ret.data(), in.ptr[0]);
  141. EncodeFixed64(&ret[8], in.ptr[1]);
  142. if (in.extended) {
  143. EncodeFixed64(&ret[16], in.ptr[2]);
  144. }
  145. return ret;
  146. }
  147. Status DecodeUniqueIdBytes(const std::string &unique_id, UniqueIdPtr out) {
  148. if (unique_id.size() != (out.extended ? 24 : 16)) {
  149. return Status::NotSupported("Not a valid unique_id");
  150. }
  151. const char *buf = &unique_id.front();
  152. out.ptr[0] = DecodeFixed64(&buf[0]);
  153. out.ptr[1] = DecodeFixed64(&buf[8]);
  154. if (out.extended) {
  155. out.ptr[2] = DecodeFixed64(&buf[16]);
  156. }
  157. return Status::OK();
  158. }
  159. template <typename ID>
  160. Status GetUniqueIdFromTablePropertiesHelper(const TableProperties &props,
  161. std::string *out_id) {
  162. ID tmp{};
  163. Status s = GetSstInternalUniqueId(props.db_id, props.db_session_id,
  164. props.orig_file_number, &tmp);
  165. if (s.ok()) {
  166. InternalUniqueIdToExternal(&tmp);
  167. *out_id = EncodeUniqueIdBytes(&tmp);
  168. } else {
  169. out_id->clear();
  170. }
  171. return s;
  172. }
  173. Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props,
  174. std::string *out_id) {
  175. return GetUniqueIdFromTablePropertiesHelper<UniqueId64x3>(props, out_id);
  176. }
  177. Status GetUniqueIdFromTableProperties(const TableProperties &props,
  178. std::string *out_id) {
  179. return GetUniqueIdFromTablePropertiesHelper<UniqueId64x2>(props, out_id);
  180. }
  181. std::string UniqueIdToHumanString(const std::string &id) {
  182. // Not so efficient, but that's OK
  183. std::string str = Slice(id).ToString(/*hex*/ true);
  184. for (size_t i = 16; i < str.size(); i += 17) {
  185. str.insert(i, "-");
  186. }
  187. return str;
  188. }
  189. std::string InternalUniqueIdToHumanString(UniqueIdPtr in) {
  190. std::string str = "{";
  191. str += std::to_string(in.ptr[0]);
  192. str += ",";
  193. str += std::to_string(in.ptr[1]);
  194. if (in.extended) {
  195. str += ",";
  196. str += std::to_string(in.ptr[2]);
  197. }
  198. str += "}";
  199. return str;
  200. }
  201. } // namespace ROCKSDB_NAMESPACE