string_util.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #include "util/string_util.h"
  7. #include <algorithm>
  8. #include <cerrno>
  9. #include <cinttypes>
  10. #include <cmath>
  11. #include <cstdio>
  12. #include <cstdlib>
  13. #include <sstream>
  14. #include <string>
  15. #include <utility>
  16. #include <vector>
  17. #include "port/port.h"
  18. #include "port/sys_time.h"
  19. #include "rocksdb/slice.h"
  20. namespace ROCKSDB_NAMESPACE {
  21. const std::string kNullptrString = "nullptr";
  22. std::vector<std::string> StringSplit(const std::string& arg, char delim) {
  23. std::vector<std::string> splits;
  24. std::stringstream ss(arg);
  25. std::string item;
  26. while (std::getline(ss, item, delim)) {
  27. splits.push_back(item);
  28. }
  29. return splits;
  30. }
  31. // for micros < 10ms, print "XX us".
  32. // for micros < 10sec, print "XX ms".
  33. // for micros >= 10 sec, print "XX sec".
  34. // for micros <= 1 hour, print Y:X M:S".
  35. // for micros > 1 hour, print Z:Y:X H:M:S".
  36. int AppendHumanMicros(uint64_t micros, char* output, int len,
  37. bool fixed_format) {
  38. if (micros < 10000 && !fixed_format) {
  39. return snprintf(output, len, "%" PRIu64 " us", micros);
  40. } else if (micros < 10000000 && !fixed_format) {
  41. return snprintf(output, len, "%.3lf ms",
  42. static_cast<double>(micros) / 1000);
  43. } else if (micros < 1000000l * 60 && !fixed_format) {
  44. return snprintf(output, len, "%.3lf sec",
  45. static_cast<double>(micros) / 1000000);
  46. } else if (micros < 1000000ll * 60 * 60 && !fixed_format) {
  47. return snprintf(output, len, "%02" PRIu64 ":%05.3f M:S",
  48. micros / 1000000 / 60,
  49. static_cast<double>(micros % 60000000) / 1000000);
  50. } else {
  51. return snprintf(output, len, "%02" PRIu64 ":%02" PRIu64 ":%05.3f H:M:S",
  52. micros / 1000000 / 3600, (micros / 1000000 / 60) % 60,
  53. static_cast<double>(micros % 60000000) / 1000000);
  54. }
  55. }
  56. // for sizes >=10TB, print "XXTB"
  57. // for sizes >=10GB, print "XXGB"
  58. // etc.
  59. // append file size summary to output and return the len
  60. int AppendHumanBytes(uint64_t bytes, char* output, int len) {
  61. const uint64_t ull10 = 10;
  62. if (bytes >= ull10 << 40) {
  63. return snprintf(output, len, "%" PRIu64 "TB", bytes >> 40);
  64. } else if (bytes >= ull10 << 30) {
  65. return snprintf(output, len, "%" PRIu64 "GB", bytes >> 30);
  66. } else if (bytes >= ull10 << 20) {
  67. return snprintf(output, len, "%" PRIu64 "MB", bytes >> 20);
  68. } else if (bytes >= ull10 << 10) {
  69. return snprintf(output, len, "%" PRIu64 "KB", bytes >> 10);
  70. } else {
  71. return snprintf(output, len, "%" PRIu64 "B", bytes);
  72. }
  73. }
  74. void AppendNumberTo(std::string* str, uint64_t num) {
  75. char buf[30];
  76. snprintf(buf, sizeof(buf), "%" PRIu64, num);
  77. str->append(buf);
  78. }
  79. void AppendEscapedStringTo(std::string* str, const Slice& value) {
  80. for (size_t i = 0; i < value.size(); i++) {
  81. char c = value[i];
  82. if (c >= ' ' && c <= '~') {
  83. str->push_back(c);
  84. } else {
  85. char buf[10];
  86. snprintf(buf, sizeof(buf), "\\x%02x",
  87. static_cast<unsigned int>(c) & 0xff);
  88. str->append(buf);
  89. }
  90. }
  91. }
  92. std::string NumberToHumanString(int64_t num) {
  93. char buf[21];
  94. int64_t absnum;
  95. if (num < 0) {
  96. // abs(INT64_MIN) is INT64_MAX+1 which overflows int64_t and become itself.
  97. // So we convert it to INT64_MAX to avoid fall into <10000 slot.
  98. absnum = num == INT64_MIN ? INT64_MAX : -num;
  99. } else {
  100. absnum = num;
  101. }
  102. if (absnum < 10000) {
  103. snprintf(buf, sizeof(buf), "%" PRIi64, num);
  104. } else if (absnum < 10000000) {
  105. snprintf(buf, sizeof(buf), "%" PRIi64 "K", num / 1000);
  106. } else if (absnum < 10000000000LL) {
  107. snprintf(buf, sizeof(buf), "%" PRIi64 "M", num / 1000000);
  108. } else {
  109. snprintf(buf, sizeof(buf), "%" PRIi64 "G", num / 1000000000);
  110. }
  111. return std::string(buf);
  112. }
  113. std::string BytesToHumanString(uint64_t bytes) {
  114. const char* size_name[] = {"KB", "MB", "GB", "TB"};
  115. double final_size = static_cast<double>(bytes);
  116. size_t size_idx;
  117. // always start with KB
  118. final_size /= 1024;
  119. size_idx = 0;
  120. while (size_idx < 3 && final_size >= 1024) {
  121. final_size /= 1024;
  122. size_idx++;
  123. }
  124. char buf[20];
  125. snprintf(buf, sizeof(buf), "%.2f %s", final_size, size_name[size_idx]);
  126. return std::string(buf);
  127. }
  128. std::string TimeToHumanString(int unixtime) {
  129. char time_buffer[80];
  130. time_t rawtime = unixtime;
  131. struct tm tInfo;
  132. struct tm* timeinfo = port::LocalTimeR(&rawtime, &tInfo);
  133. assert(timeinfo == &tInfo);
  134. strftime(time_buffer, 80, "%c", timeinfo);
  135. return std::string(time_buffer);
  136. }
  137. std::string EscapeString(const Slice& value) {
  138. std::string r;
  139. AppendEscapedStringTo(&r, value);
  140. return r;
  141. }
  142. bool ConsumeDecimalNumber(Slice* in, uint64_t* val) {
  143. uint64_t v = 0;
  144. int digits = 0;
  145. while (!in->empty()) {
  146. char c = (*in)[0];
  147. if (c >= '0' && c <= '9') {
  148. ++digits;
  149. const unsigned int delta = (c - '0');
  150. static const uint64_t kMaxUint64 = ~static_cast<uint64_t>(0);
  151. if (v > kMaxUint64 / 10 ||
  152. (v == kMaxUint64 / 10 && delta > kMaxUint64 % 10)) {
  153. // Overflow
  154. return false;
  155. }
  156. v = (v * 10) + delta;
  157. in->remove_prefix(1);
  158. } else {
  159. break;
  160. }
  161. }
  162. *val = v;
  163. return (digits > 0);
  164. }
  165. bool isSpecialChar(const char c) {
  166. if (c == '\\' || c == '#' || c == ':' || c == '\r' || c == '\n') {
  167. return true;
  168. }
  169. return false;
  170. }
  171. namespace {
  172. using CharMap = std::pair<char, char>;
  173. }
  174. char UnescapeChar(const char c) {
  175. static const CharMap convert_map[] = {{'r', '\r'}, {'n', '\n'}};
  176. auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
  177. [c](const CharMap& p) { return p.first == c; });
  178. if (iter == std::end(convert_map)) {
  179. return c;
  180. }
  181. return iter->second;
  182. }
  183. char EscapeChar(const char c) {
  184. static const CharMap convert_map[] = {{'\n', 'n'}, {'\r', 'r'}};
  185. auto iter = std::find_if(std::begin(convert_map), std::end(convert_map),
  186. [c](const CharMap& p) { return p.first == c; });
  187. if (iter == std::end(convert_map)) {
  188. return c;
  189. }
  190. return iter->second;
  191. }
  192. std::string EscapeOptionString(const std::string& raw_string) {
  193. std::string output;
  194. for (auto c : raw_string) {
  195. if (isSpecialChar(c)) {
  196. output += '\\';
  197. output += EscapeChar(c);
  198. } else {
  199. output += c;
  200. }
  201. }
  202. return output;
  203. }
  204. std::string UnescapeOptionString(const std::string& escaped_string) {
  205. bool escaped = false;
  206. std::string output;
  207. for (auto c : escaped_string) {
  208. if (escaped) {
  209. output += UnescapeChar(c);
  210. escaped = false;
  211. } else {
  212. if (c == '\\') {
  213. escaped = true;
  214. continue;
  215. }
  216. output += c;
  217. }
  218. }
  219. return output;
  220. }
  221. std::string trim(const std::string& str) {
  222. if (str.empty()) {
  223. return std::string();
  224. }
  225. size_t start = 0;
  226. size_t end = str.size() - 1;
  227. while (isspace(str[start]) != 0 && start < end) {
  228. ++start;
  229. }
  230. while (isspace(str[end]) != 0 && start < end) {
  231. --end;
  232. }
  233. if (start <= end) {
  234. return str.substr(start, end - start + 1);
  235. }
  236. return std::string();
  237. }
  238. bool EndsWith(const std::string& string, const std::string& pattern) {
  239. size_t plen = pattern.size();
  240. size_t slen = string.size();
  241. if (plen <= slen) {
  242. return string.compare(slen - plen, plen, pattern) == 0;
  243. } else {
  244. return false;
  245. }
  246. }
  247. bool StartsWith(const std::string& string, const std::string& pattern) {
  248. return string.compare(0, pattern.size(), pattern) == 0;
  249. }
  250. bool ParseBoolean(const std::string& type, const std::string& value) {
  251. if (value == "true" || value == "1") {
  252. return true;
  253. } else if (value == "false" || value == "0") {
  254. return false;
  255. }
  256. throw std::invalid_argument(type);
  257. }
  258. uint8_t ParseUint8(const std::string& value) {
  259. uint64_t num = ParseUint64(value);
  260. if ((num >> 8LL) == 0) {
  261. return static_cast<uint8_t>(num);
  262. } else {
  263. throw std::out_of_range(value);
  264. }
  265. }
  266. uint32_t ParseUint32(const std::string& value) {
  267. uint64_t num = ParseUint64(value);
  268. if ((num >> 32LL) == 0) {
  269. return static_cast<uint32_t>(num);
  270. } else {
  271. throw std::out_of_range(value);
  272. }
  273. }
  274. int32_t ParseInt32(const std::string& value) {
  275. int64_t num = ParseInt64(value);
  276. if (num <= std::numeric_limits<int32_t>::max() &&
  277. num >= std::numeric_limits<int32_t>::min()) {
  278. return static_cast<int32_t>(num);
  279. } else {
  280. throw std::out_of_range(value);
  281. }
  282. }
  283. uint64_t ParseUint64(const std::string& value) {
  284. size_t endchar;
  285. #ifndef CYGWIN
  286. uint64_t num = std::stoull(value.c_str(), &endchar);
  287. #else
  288. char* endptr;
  289. uint64_t num = std::strtoul(value.c_str(), &endptr, 0);
  290. endchar = endptr - value.c_str();
  291. #endif
  292. if (endchar < value.length()) {
  293. char c = value[endchar];
  294. if (c == 'k' || c == 'K') {
  295. num <<= 10LL;
  296. } else if (c == 'm' || c == 'M') {
  297. num <<= 20LL;
  298. } else if (c == 'g' || c == 'G') {
  299. num <<= 30LL;
  300. } else if (c == 't' || c == 'T') {
  301. num <<= 40LL;
  302. }
  303. }
  304. return num;
  305. }
  306. int64_t ParseInt64(const std::string& value) {
  307. size_t endchar;
  308. #ifndef CYGWIN
  309. int64_t num = std::stoll(value.c_str(), &endchar);
  310. #else
  311. char* endptr;
  312. int64_t num = std::strtoll(value.c_str(), &endptr, 0);
  313. endchar = endptr - value.c_str();
  314. #endif
  315. if (endchar < value.length()) {
  316. char c = value[endchar];
  317. if (c == 'k' || c == 'K') {
  318. num <<= 10LL;
  319. } else if (c == 'm' || c == 'M') {
  320. num <<= 20LL;
  321. } else if (c == 'g' || c == 'G') {
  322. num <<= 30LL;
  323. } else if (c == 't' || c == 'T') {
  324. num <<= 40LL;
  325. }
  326. }
  327. return num;
  328. }
  329. int ParseInt(const std::string& value) {
  330. size_t endchar;
  331. #ifndef CYGWIN
  332. int num = std::stoi(value.c_str(), &endchar);
  333. #else
  334. char* endptr;
  335. int num = std::strtoul(value.c_str(), &endptr, 0);
  336. endchar = endptr - value.c_str();
  337. #endif
  338. if (endchar < value.length()) {
  339. char c = value[endchar];
  340. if (c == 'k' || c == 'K') {
  341. num <<= 10;
  342. } else if (c == 'm' || c == 'M') {
  343. num <<= 20;
  344. } else if (c == 'g' || c == 'G') {
  345. num <<= 30;
  346. }
  347. }
  348. return num;
  349. }
  350. double ParseDouble(const std::string& value) {
  351. #ifndef CYGWIN
  352. return std::stod(value);
  353. #else
  354. return std::strtod(value.c_str(), 0);
  355. #endif
  356. }
  357. size_t ParseSizeT(const std::string& value) {
  358. return static_cast<size_t>(ParseUint64(value));
  359. }
  360. std::vector<int> ParseVectorInt(const std::string& value) {
  361. std::vector<int> result;
  362. size_t start = 0;
  363. while (start < value.size()) {
  364. size_t end = value.find(':', start);
  365. if (end == std::string::npos) {
  366. result.push_back(ParseInt(value.substr(start)));
  367. break;
  368. } else {
  369. result.push_back(ParseInt(value.substr(start, end - start)));
  370. start = end + 1;
  371. }
  372. }
  373. return result;
  374. }
  375. bool SerializeIntVector(const std::vector<int>& vec, std::string* value) {
  376. *value = "";
  377. for (size_t i = 0; i < vec.size(); ++i) {
  378. if (i > 0) {
  379. *value += ":";
  380. }
  381. *value += std::to_string(vec[i]);
  382. }
  383. return true;
  384. }
  385. int ParseTimeStringToSeconds(const std::string& value) {
  386. int hours, minutes;
  387. char colon;
  388. std::istringstream stream(value);
  389. stream >> hours >> colon >> minutes;
  390. if (stream.fail() || !stream.eof() || colon != ':') {
  391. return -1;
  392. }
  393. if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59) {
  394. return -1;
  395. }
  396. return hours * 3600 + minutes * 60;
  397. }
  398. bool TryParseTimeRangeString(const std::string& value, int& start_time,
  399. int& end_time) {
  400. if (value.empty()) {
  401. start_time = 0;
  402. end_time = 0;
  403. return true;
  404. }
  405. auto split = StringSplit(value, '-');
  406. if (split.size() != 2) {
  407. return false;
  408. }
  409. start_time = ParseTimeStringToSeconds(split[0]);
  410. if (start_time < 0) {
  411. return false;
  412. }
  413. end_time = ParseTimeStringToSeconds(split[1]);
  414. if (end_time < 0) {
  415. return false;
  416. }
  417. return true;
  418. }
  419. // Copied from folly/string.cpp:
  420. // https://github.com/facebook/folly/blob/0deef031cb8aab76dc7e736f8b7c22d701d5f36b/folly/String.cpp#L457
  421. // There are two variants of `strerror_r` function, one returns
  422. // `int`, and another returns `char*`. Selecting proper version using
  423. // preprocessor macros portably is extremely hard.
  424. //
  425. // For example, on Android function signature depends on `__USE_GNU` and
  426. // `__ANDROID_API__` macros (https://git.io/fjBBE).
  427. //
  428. // So we are using C++ overloading trick: we pass a pointer of
  429. // `strerror_r` to `invoke_strerror_r` function, and C++ compiler
  430. // selects proper function.
  431. #if !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
  432. [[maybe_unused]]
  433. static std::string invoke_strerror_r(int (*strerror_r)(int, char*, size_t),
  434. int err, char* buf, size_t buflen) {
  435. // Using XSI-compatible strerror_r
  436. int r = strerror_r(err, buf, buflen);
  437. // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero
  438. if (r != 0) {
  439. snprintf(buf, buflen, "Unknown error %d (strerror_r failed with error %d)",
  440. err, errno);
  441. }
  442. return buf;
  443. }
  444. [[maybe_unused]]
  445. static std::string invoke_strerror_r(char* (*strerror_r)(int, char*, size_t),
  446. int err, char* buf, size_t buflen) {
  447. // Using GNU strerror_r
  448. return strerror_r(err, buf, buflen);
  449. }
  450. #endif // !(defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER)))
  451. std::string errnoStr(int err) {
  452. char buf[1024];
  453. buf[0] = '\0';
  454. std::string result;
  455. // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html
  456. // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html
  457. #if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER))
  458. // mingw64 has no strerror_r, but Windows has strerror_s, which C11 added
  459. // as well. So maybe we should use this across all platforms (together
  460. // with strerrorlen_s). Note strerror_r and _s have swapped args.
  461. int r = strerror_s(buf, sizeof(buf), err);
  462. if (r != 0) {
  463. snprintf(buf, sizeof(buf),
  464. "Unknown error %d (strerror_r failed with error %d)", err, errno);
  465. }
  466. result.assign(buf);
  467. #else
  468. // Using any strerror_r
  469. result.assign(invoke_strerror_r(strerror_r, err, buf, sizeof(buf)));
  470. #endif
  471. return result;
  472. }
  473. } // namespace ROCKSDB_NAMESPACE