compression.h 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. #pragma once
  11. #include <algorithm>
  12. #include <limits>
  13. #ifdef ROCKSDB_MALLOC_USABLE_SIZE
  14. #ifdef OS_FREEBSD
  15. #include <malloc_np.h>
  16. #else // OS_FREEBSD
  17. #include <malloc.h>
  18. #endif // OS_FREEBSD
  19. #endif // ROCKSDB_MALLOC_USABLE_SIZE
  20. #include <string>
  21. #include "memory/memory_allocator.h"
  22. #include "rocksdb/options.h"
  23. #include "rocksdb/table.h"
  24. #include "util/coding.h"
  25. #include "util/compression_context_cache.h"
  26. #include "util/string_util.h"
  27. #ifdef SNAPPY
  28. #include <snappy.h>
  29. #endif
  30. #ifdef ZLIB
  31. #include <zlib.h>
  32. #endif
  33. #ifdef BZIP2
  34. #include <bzlib.h>
  35. #endif
  36. #if defined(LZ4)
  37. #include <lz4.h>
  38. #include <lz4hc.h>
  39. #endif
  40. #if defined(ZSTD)
  41. #include <zstd.h>
  42. #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
  43. #include <zdict.h>
  44. #endif // ZSTD_VERSION_NUMBER >= 10103
  45. namespace ROCKSDB_NAMESPACE {
  46. // Need this for the context allocation override
  47. // On windows we need to do this explicitly
  48. #if (ZSTD_VERSION_NUMBER >= 500)
  49. #if defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) && \
  50. defined(ZSTD_STATIC_LINKING_ONLY)
  51. #define ROCKSDB_ZSTD_CUSTOM_MEM
  52. namespace port {
  53. ZSTD_customMem GetJeZstdAllocationOverrides();
  54. } // namespace port
  55. #endif // defined(ROCKSDB_JEMALLOC) && defined(OS_WIN) &&
  56. // defined(ZSTD_STATIC_LINKING_ONLY)
  57. // We require `ZSTD_sizeof_DDict` and `ZSTD_createDDict_byReference` to use
  58. // `ZSTD_DDict`. The former was introduced in v1.0.0 and the latter was
  59. // introduced in v1.1.3. But an important bug fix for `ZSTD_sizeof_DDict` came
  60. // in v1.1.4, so that is the version we require. As of today's latest version
  61. // (v1.3.8), they are both still in the experimental API, which means they are
  62. // only exported when the compiler flag `ZSTD_STATIC_LINKING_ONLY` is set.
  63. #if defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
  64. #define ROCKSDB_ZSTD_DDICT
  65. #endif // defined(ZSTD_STATIC_LINKING_ONLY) && ZSTD_VERSION_NUMBER >= 10104
  66. // Cached data represents a portion that can be re-used
  67. // If, in the future we have more than one native context to
  68. // cache we can arrange this as a tuple
  69. class ZSTDUncompressCachedData {
  70. public:
  71. using ZSTDNativeContext = ZSTD_DCtx*;
  72. ZSTDUncompressCachedData() {}
  73. // Init from cache
  74. ZSTDUncompressCachedData(const ZSTDUncompressCachedData& o) = delete;
  75. ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
  76. ZSTDUncompressCachedData(ZSTDUncompressCachedData&& o) ROCKSDB_NOEXCEPT
  77. : ZSTDUncompressCachedData() {
  78. *this = std::move(o);
  79. }
  80. ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&& o)
  81. ROCKSDB_NOEXCEPT {
  82. assert(zstd_ctx_ == nullptr);
  83. std::swap(zstd_ctx_, o.zstd_ctx_);
  84. std::swap(cache_idx_, o.cache_idx_);
  85. return *this;
  86. }
  87. ZSTDNativeContext Get() const { return zstd_ctx_; }
  88. int64_t GetCacheIndex() const { return cache_idx_; }
  89. void CreateIfNeeded() {
  90. if (zstd_ctx_ == nullptr) {
  91. #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
  92. zstd_ctx_ =
  93. ZSTD_createDCtx_advanced(port::GetJeZstdAllocationOverrides());
  94. #else // ROCKSDB_ZSTD_CUSTOM_MEM
  95. zstd_ctx_ = ZSTD_createDCtx();
  96. #endif // ROCKSDB_ZSTD_CUSTOM_MEM
  97. cache_idx_ = -1;
  98. }
  99. }
  100. void InitFromCache(const ZSTDUncompressCachedData& o, int64_t idx) {
  101. zstd_ctx_ = o.zstd_ctx_;
  102. cache_idx_ = idx;
  103. }
  104. ~ZSTDUncompressCachedData() {
  105. if (zstd_ctx_ != nullptr && cache_idx_ == -1) {
  106. ZSTD_freeDCtx(zstd_ctx_);
  107. }
  108. }
  109. private:
  110. ZSTDNativeContext zstd_ctx_ = nullptr;
  111. int64_t cache_idx_ = -1; // -1 means this instance owns the context
  112. };
  113. #endif // (ZSTD_VERSION_NUMBER >= 500)
  114. } // namespace ROCKSDB_NAMESPACE
  115. #endif // ZSTD
  116. #if !(defined ZSTD) || !(ZSTD_VERSION_NUMBER >= 500)
  117. namespace ROCKSDB_NAMESPACE {
  118. class ZSTDUncompressCachedData {
  119. void* padding; // unused
  120. public:
  121. using ZSTDNativeContext = void*;
  122. ZSTDUncompressCachedData() {}
  123. ZSTDUncompressCachedData(const ZSTDUncompressCachedData&) {}
  124. ZSTDUncompressCachedData& operator=(const ZSTDUncompressCachedData&) = delete;
  125. ZSTDUncompressCachedData(ZSTDUncompressCachedData&&)
  126. ROCKSDB_NOEXCEPT = default;
  127. ZSTDUncompressCachedData& operator=(ZSTDUncompressCachedData&&)
  128. ROCKSDB_NOEXCEPT = default;
  129. ZSTDNativeContext Get() const { return nullptr; }
  130. int64_t GetCacheIndex() const { return -1; }
  131. void CreateIfNeeded() {}
  132. void InitFromCache(const ZSTDUncompressCachedData&, int64_t) {}
  133. private:
  134. void ignore_padding__() { padding = nullptr; }
  135. };
  136. } // namespace ROCKSDB_NAMESPACE
  137. #endif
  138. #if defined(XPRESS)
  139. #include "port/xpress.h"
  140. #endif
  141. namespace ROCKSDB_NAMESPACE {
  142. // Holds dictionary and related data, like ZSTD's digested compression
  143. // dictionary.
  144. struct CompressionDict {
  145. #if ZSTD_VERSION_NUMBER >= 700
  146. ZSTD_CDict* zstd_cdict_ = nullptr;
  147. #endif // ZSTD_VERSION_NUMBER >= 700
  148. std::string dict_;
  149. public:
  150. #if ZSTD_VERSION_NUMBER >= 700
  151. CompressionDict(std::string dict, CompressionType type, int level) {
  152. #else // ZSTD_VERSION_NUMBER >= 700
  153. CompressionDict(std::string dict, CompressionType /*type*/, int /*level*/) {
  154. #endif // ZSTD_VERSION_NUMBER >= 700
  155. dict_ = std::move(dict);
  156. #if ZSTD_VERSION_NUMBER >= 700
  157. zstd_cdict_ = nullptr;
  158. if (!dict_.empty() && (type == kZSTD || type == kZSTDNotFinalCompression)) {
  159. if (level == CompressionOptions::kDefaultCompressionLevel) {
  160. // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
  161. // https://github.com/facebook/zstd/issues/1148
  162. level = 3;
  163. }
  164. // Should be safe (but slower) if below call fails as we'll use the
  165. // raw dictionary to compress.
  166. zstd_cdict_ = ZSTD_createCDict(dict_.data(), dict_.size(), level);
  167. assert(zstd_cdict_ != nullptr);
  168. }
  169. #endif // ZSTD_VERSION_NUMBER >= 700
  170. }
  171. ~CompressionDict() {
  172. #if ZSTD_VERSION_NUMBER >= 700
  173. size_t res = 0;
  174. if (zstd_cdict_ != nullptr) {
  175. res = ZSTD_freeCDict(zstd_cdict_);
  176. }
  177. assert(res == 0); // Last I checked they can't fail
  178. (void)res; // prevent unused var warning
  179. #endif // ZSTD_VERSION_NUMBER >= 700
  180. }
  181. #if ZSTD_VERSION_NUMBER >= 700
  182. const ZSTD_CDict* GetDigestedZstdCDict() const { return zstd_cdict_; }
  183. #endif // ZSTD_VERSION_NUMBER >= 700
  184. Slice GetRawDict() const { return dict_; }
  185. static const CompressionDict& GetEmptyDict() {
  186. static CompressionDict empty_dict{};
  187. return empty_dict;
  188. }
  189. CompressionDict() = default;
  190. // Disable copy/move
  191. CompressionDict(const CompressionDict&) = delete;
  192. CompressionDict& operator=(const CompressionDict&) = delete;
  193. CompressionDict(CompressionDict&&) = delete;
  194. CompressionDict& operator=(CompressionDict&&) = delete;
  195. };
  196. // Holds dictionary and related data, like ZSTD's digested uncompression
  197. // dictionary.
  198. struct UncompressionDict {
  199. // Block containing the data for the compression dictionary in case the
  200. // constructor that takes a string parameter is used.
  201. std::string dict_;
  202. // Block containing the data for the compression dictionary in case the
  203. // constructor that takes a Slice parameter is used and the passed in
  204. // CacheAllocationPtr is not nullptr.
  205. CacheAllocationPtr allocation_;
  206. // Slice pointing to the compression dictionary data. Can point to
  207. // dict_, allocation_, or some other memory location, depending on how
  208. // the object was constructed.
  209. Slice slice_;
  210. #ifdef ROCKSDB_ZSTD_DDICT
  211. // Processed version of the contents of slice_ for ZSTD compression.
  212. ZSTD_DDict* zstd_ddict_ = nullptr;
  213. #endif // ROCKSDB_ZSTD_DDICT
  214. #ifdef ROCKSDB_ZSTD_DDICT
  215. UncompressionDict(std::string dict, bool using_zstd)
  216. #else // ROCKSDB_ZSTD_DDICT
  217. UncompressionDict(std::string dict, bool /* using_zstd */)
  218. #endif // ROCKSDB_ZSTD_DDICT
  219. : dict_(std::move(dict)), slice_(dict_) {
  220. #ifdef ROCKSDB_ZSTD_DDICT
  221. if (!slice_.empty() && using_zstd) {
  222. zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
  223. assert(zstd_ddict_ != nullptr);
  224. }
  225. #endif // ROCKSDB_ZSTD_DDICT
  226. }
  227. #ifdef ROCKSDB_ZSTD_DDICT
  228. UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
  229. bool using_zstd)
  230. #else // ROCKSDB_ZSTD_DDICT
  231. UncompressionDict(Slice slice, CacheAllocationPtr&& allocation,
  232. bool /* using_zstd */)
  233. #endif // ROCKSDB_ZSTD_DDICT
  234. : allocation_(std::move(allocation)), slice_(std::move(slice)) {
  235. #ifdef ROCKSDB_ZSTD_DDICT
  236. if (!slice_.empty() && using_zstd) {
  237. zstd_ddict_ = ZSTD_createDDict_byReference(slice_.data(), slice_.size());
  238. assert(zstd_ddict_ != nullptr);
  239. }
  240. #endif // ROCKSDB_ZSTD_DDICT
  241. }
  242. UncompressionDict(UncompressionDict&& rhs)
  243. : dict_(std::move(rhs.dict_)),
  244. allocation_(std::move(rhs.allocation_)),
  245. slice_(std::move(rhs.slice_))
  246. #ifdef ROCKSDB_ZSTD_DDICT
  247. ,
  248. zstd_ddict_(rhs.zstd_ddict_)
  249. #endif
  250. {
  251. #ifdef ROCKSDB_ZSTD_DDICT
  252. rhs.zstd_ddict_ = nullptr;
  253. #endif
  254. }
  255. ~UncompressionDict() {
  256. #ifdef ROCKSDB_ZSTD_DDICT
  257. size_t res = 0;
  258. if (zstd_ddict_ != nullptr) {
  259. res = ZSTD_freeDDict(zstd_ddict_);
  260. }
  261. assert(res == 0); // Last I checked they can't fail
  262. (void)res; // prevent unused var warning
  263. #endif // ROCKSDB_ZSTD_DDICT
  264. }
  265. UncompressionDict& operator=(UncompressionDict&& rhs) {
  266. if (this == &rhs) {
  267. return *this;
  268. }
  269. dict_ = std::move(rhs.dict_);
  270. allocation_ = std::move(rhs.allocation_);
  271. slice_ = std::move(rhs.slice_);
  272. #ifdef ROCKSDB_ZSTD_DDICT
  273. zstd_ddict_ = rhs.zstd_ddict_;
  274. rhs.zstd_ddict_ = nullptr;
  275. #endif
  276. return *this;
  277. }
  278. // The object is self-contained if the string constructor is used, or the
  279. // Slice constructor is invoked with a non-null allocation. Otherwise, it
  280. // is the caller's responsibility to ensure that the underlying storage
  281. // outlives this object.
  282. bool own_bytes() const { return !dict_.empty() || allocation_; }
  283. const Slice& GetRawDict() const { return slice_; }
  284. #ifdef ROCKSDB_ZSTD_DDICT
  285. const ZSTD_DDict* GetDigestedZstdDDict() const { return zstd_ddict_; }
  286. #endif // ROCKSDB_ZSTD_DDICT
  287. static const UncompressionDict& GetEmptyDict() {
  288. static UncompressionDict empty_dict{};
  289. return empty_dict;
  290. }
  291. size_t ApproximateMemoryUsage() const {
  292. size_t usage = sizeof(struct UncompressionDict);
  293. usage += dict_.size();
  294. if (allocation_) {
  295. auto allocator = allocation_.get_deleter().allocator;
  296. if (allocator) {
  297. usage += allocator->UsableSize(allocation_.get(), slice_.size());
  298. } else {
  299. usage += slice_.size();
  300. }
  301. }
  302. #ifdef ROCKSDB_ZSTD_DDICT
  303. usage += ZSTD_sizeof_DDict(zstd_ddict_);
  304. #endif // ROCKSDB_ZSTD_DDICT
  305. return usage;
  306. }
  307. UncompressionDict() = default;
  308. // Disable copy
  309. UncompressionDict(const CompressionDict&) = delete;
  310. UncompressionDict& operator=(const CompressionDict&) = delete;
  311. };
  312. class CompressionContext {
  313. private:
  314. #if defined(ZSTD) && (ZSTD_VERSION_NUMBER >= 500)
  315. ZSTD_CCtx* zstd_ctx_ = nullptr;
  316. void CreateNativeContext(CompressionType type) {
  317. if (type == kZSTD || type == kZSTDNotFinalCompression) {
  318. #ifdef ROCKSDB_ZSTD_CUSTOM_MEM
  319. zstd_ctx_ =
  320. ZSTD_createCCtx_advanced(port::GetJeZstdAllocationOverrides());
  321. #else // ROCKSDB_ZSTD_CUSTOM_MEM
  322. zstd_ctx_ = ZSTD_createCCtx();
  323. #endif // ROCKSDB_ZSTD_CUSTOM_MEM
  324. }
  325. }
  326. void DestroyNativeContext() {
  327. if (zstd_ctx_ != nullptr) {
  328. ZSTD_freeCCtx(zstd_ctx_);
  329. }
  330. }
  331. public:
  332. // callable inside ZSTD_Compress
  333. ZSTD_CCtx* ZSTDPreallocCtx() const {
  334. assert(zstd_ctx_ != nullptr);
  335. return zstd_ctx_;
  336. }
  337. #else // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
  338. private:
  339. void CreateNativeContext(CompressionType /* type */) {}
  340. void DestroyNativeContext() {}
  341. #endif // ZSTD && (ZSTD_VERSION_NUMBER >= 500)
  342. public:
  343. explicit CompressionContext(CompressionType type) {
  344. CreateNativeContext(type);
  345. }
  346. ~CompressionContext() { DestroyNativeContext(); }
  347. CompressionContext(const CompressionContext&) = delete;
  348. CompressionContext& operator=(const CompressionContext&) = delete;
  349. };
  350. class CompressionInfo {
  351. const CompressionOptions& opts_;
  352. const CompressionContext& context_;
  353. const CompressionDict& dict_;
  354. const CompressionType type_;
  355. const uint64_t sample_for_compression_;
  356. public:
  357. CompressionInfo(const CompressionOptions& _opts,
  358. const CompressionContext& _context,
  359. const CompressionDict& _dict, CompressionType _type,
  360. uint64_t _sample_for_compression)
  361. : opts_(_opts),
  362. context_(_context),
  363. dict_(_dict),
  364. type_(_type),
  365. sample_for_compression_(_sample_for_compression) {}
  366. const CompressionOptions& options() const { return opts_; }
  367. const CompressionContext& context() const { return context_; }
  368. const CompressionDict& dict() const { return dict_; }
  369. CompressionType type() const { return type_; }
  370. uint64_t SampleForCompression() const { return sample_for_compression_; }
  371. };
  372. class UncompressionContext {
  373. private:
  374. CompressionContextCache* ctx_cache_ = nullptr;
  375. ZSTDUncompressCachedData uncomp_cached_data_;
  376. public:
  377. struct NoCache {};
  378. // Do not use context cache, used by TableBuilder
  379. UncompressionContext(NoCache, CompressionType /* type */) {}
  380. explicit UncompressionContext(CompressionType type) {
  381. if (type == kZSTD || type == kZSTDNotFinalCompression) {
  382. ctx_cache_ = CompressionContextCache::Instance();
  383. uncomp_cached_data_ = ctx_cache_->GetCachedZSTDUncompressData();
  384. }
  385. }
  386. ~UncompressionContext() {
  387. if (uncomp_cached_data_.GetCacheIndex() != -1) {
  388. assert(ctx_cache_ != nullptr);
  389. ctx_cache_->ReturnCachedZSTDUncompressData(
  390. uncomp_cached_data_.GetCacheIndex());
  391. }
  392. }
  393. UncompressionContext(const UncompressionContext&) = delete;
  394. UncompressionContext& operator=(const UncompressionContext&) = delete;
  395. ZSTDUncompressCachedData::ZSTDNativeContext GetZSTDContext() const {
  396. return uncomp_cached_data_.Get();
  397. }
  398. };
  399. class UncompressionInfo {
  400. const UncompressionContext& context_;
  401. const UncompressionDict& dict_;
  402. const CompressionType type_;
  403. public:
  404. UncompressionInfo(const UncompressionContext& _context,
  405. const UncompressionDict& _dict, CompressionType _type)
  406. : context_(_context), dict_(_dict), type_(_type) {}
  407. const UncompressionContext& context() const { return context_; }
  408. const UncompressionDict& dict() const { return dict_; }
  409. CompressionType type() const { return type_; }
  410. };
  411. inline bool Snappy_Supported() {
  412. #ifdef SNAPPY
  413. return true;
  414. #else
  415. return false;
  416. #endif
  417. }
  418. inline bool Zlib_Supported() {
  419. #ifdef ZLIB
  420. return true;
  421. #else
  422. return false;
  423. #endif
  424. }
  425. inline bool BZip2_Supported() {
  426. #ifdef BZIP2
  427. return true;
  428. #else
  429. return false;
  430. #endif
  431. }
  432. inline bool LZ4_Supported() {
  433. #ifdef LZ4
  434. return true;
  435. #else
  436. return false;
  437. #endif
  438. }
  439. inline bool XPRESS_Supported() {
  440. #ifdef XPRESS
  441. return true;
  442. #else
  443. return false;
  444. #endif
  445. }
  446. inline bool ZSTD_Supported() {
  447. #ifdef ZSTD
  448. // ZSTD format is finalized since version 0.8.0.
  449. return (ZSTD_versionNumber() >= 800);
  450. #else
  451. return false;
  452. #endif
  453. }
  454. inline bool ZSTDNotFinal_Supported() {
  455. #ifdef ZSTD
  456. return true;
  457. #else
  458. return false;
  459. #endif
  460. }
  461. inline bool CompressionTypeSupported(CompressionType compression_type) {
  462. switch (compression_type) {
  463. case kNoCompression:
  464. return true;
  465. case kSnappyCompression:
  466. return Snappy_Supported();
  467. case kZlibCompression:
  468. return Zlib_Supported();
  469. case kBZip2Compression:
  470. return BZip2_Supported();
  471. case kLZ4Compression:
  472. return LZ4_Supported();
  473. case kLZ4HCCompression:
  474. return LZ4_Supported();
  475. case kXpressCompression:
  476. return XPRESS_Supported();
  477. case kZSTDNotFinalCompression:
  478. return ZSTDNotFinal_Supported();
  479. case kZSTD:
  480. return ZSTD_Supported();
  481. default:
  482. assert(false);
  483. return false;
  484. }
  485. }
  486. inline std::string CompressionTypeToString(CompressionType compression_type) {
  487. switch (compression_type) {
  488. case kNoCompression:
  489. return "NoCompression";
  490. case kSnappyCompression:
  491. return "Snappy";
  492. case kZlibCompression:
  493. return "Zlib";
  494. case kBZip2Compression:
  495. return "BZip2";
  496. case kLZ4Compression:
  497. return "LZ4";
  498. case kLZ4HCCompression:
  499. return "LZ4HC";
  500. case kXpressCompression:
  501. return "Xpress";
  502. case kZSTD:
  503. return "ZSTD";
  504. case kZSTDNotFinalCompression:
  505. return "ZSTDNotFinal";
  506. case kDisableCompressionOption:
  507. return "DisableOption";
  508. default:
  509. assert(false);
  510. return "";
  511. }
  512. }
  513. inline std::string CompressionOptionsToString(
  514. CompressionOptions& compression_options) {
  515. std::string result;
  516. result.reserve(512);
  517. result.append("window_bits=")
  518. .append(ToString(compression_options.window_bits))
  519. .append("; ");
  520. result.append("level=")
  521. .append(ToString(compression_options.level))
  522. .append("; ");
  523. result.append("strategy=")
  524. .append(ToString(compression_options.strategy))
  525. .append("; ");
  526. result.append("max_dict_bytes=")
  527. .append(ToString(compression_options.max_dict_bytes))
  528. .append("; ");
  529. result.append("zstd_max_train_bytes=")
  530. .append(ToString(compression_options.zstd_max_train_bytes))
  531. .append("; ");
  532. result.append("enabled=")
  533. .append(ToString(compression_options.enabled))
  534. .append("; ");
  535. return result;
  536. }
  537. // compress_format_version can have two values:
  538. // 1 -- decompressed sizes for BZip2 and Zlib are not included in the compressed
  539. // block. Also, decompressed sizes for LZ4 are encoded in platform-dependent
  540. // way.
  541. // 2 -- Zlib, BZip2 and LZ4 encode decompressed size as Varint32 just before the
  542. // start of compressed block. Snappy format is the same as version 1.
  543. inline bool Snappy_Compress(const CompressionInfo& /*info*/, const char* input,
  544. size_t length, ::std::string* output) {
  545. #ifdef SNAPPY
  546. output->resize(snappy::MaxCompressedLength(length));
  547. size_t outlen;
  548. snappy::RawCompress(input, length, &(*output)[0], &outlen);
  549. output->resize(outlen);
  550. return true;
  551. #else
  552. (void)input;
  553. (void)length;
  554. (void)output;
  555. return false;
  556. #endif
  557. }
  558. inline bool Snappy_GetUncompressedLength(const char* input, size_t length,
  559. size_t* result) {
  560. #ifdef SNAPPY
  561. return snappy::GetUncompressedLength(input, length, result);
  562. #else
  563. (void)input;
  564. (void)length;
  565. (void)result;
  566. return false;
  567. #endif
  568. }
  569. inline bool Snappy_Uncompress(const char* input, size_t length, char* output) {
  570. #ifdef SNAPPY
  571. return snappy::RawUncompress(input, length, output);
  572. #else
  573. (void)input;
  574. (void)length;
  575. (void)output;
  576. return false;
  577. #endif
  578. }
  579. namespace compression {
  580. // returns size
  581. inline size_t PutDecompressedSizeInfo(std::string* output, uint32_t length) {
  582. PutVarint32(output, length);
  583. return output->size();
  584. }
  585. inline bool GetDecompressedSizeInfo(const char** input_data,
  586. size_t* input_length,
  587. uint32_t* output_len) {
  588. auto new_input_data =
  589. GetVarint32Ptr(*input_data, *input_data + *input_length, output_len);
  590. if (new_input_data == nullptr) {
  591. return false;
  592. }
  593. *input_length -= (new_input_data - *input_data);
  594. *input_data = new_input_data;
  595. return true;
  596. }
  597. } // namespace compression
  598. // compress_format_version == 1 -- decompressed size is not included in the
  599. // block header
  600. // compress_format_version == 2 -- decompressed size is included in the block
  601. // header in varint32 format
  602. // @param compression_dict Data for presetting the compression library's
  603. // dictionary.
  604. inline bool Zlib_Compress(const CompressionInfo& info,
  605. uint32_t compress_format_version, const char* input,
  606. size_t length, ::std::string* output) {
  607. #ifdef ZLIB
  608. if (length > std::numeric_limits<uint32_t>::max()) {
  609. // Can't compress more than 4GB
  610. return false;
  611. }
  612. size_t output_header_len = 0;
  613. if (compress_format_version == 2) {
  614. output_header_len = compression::PutDecompressedSizeInfo(
  615. output, static_cast<uint32_t>(length));
  616. }
  617. // Resize output to be the plain data length.
  618. // This may not be big enough if the compression actually expands data.
  619. output->resize(output_header_len + length);
  620. // The memLevel parameter specifies how much memory should be allocated for
  621. // the internal compression state.
  622. // memLevel=1 uses minimum memory but is slow and reduces compression ratio.
  623. // memLevel=9 uses maximum memory for optimal speed.
  624. // The default value is 8. See zconf.h for more details.
  625. static const int memLevel = 8;
  626. int level;
  627. if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
  628. level = Z_DEFAULT_COMPRESSION;
  629. } else {
  630. level = info.options().level;
  631. }
  632. z_stream _stream;
  633. memset(&_stream, 0, sizeof(z_stream));
  634. int st = deflateInit2(&_stream, level, Z_DEFLATED, info.options().window_bits,
  635. memLevel, info.options().strategy);
  636. if (st != Z_OK) {
  637. return false;
  638. }
  639. Slice compression_dict = info.dict().GetRawDict();
  640. if (compression_dict.size()) {
  641. // Initialize the compression library's dictionary
  642. st = deflateSetDictionary(
  643. &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
  644. static_cast<unsigned int>(compression_dict.size()));
  645. if (st != Z_OK) {
  646. deflateEnd(&_stream);
  647. return false;
  648. }
  649. }
  650. // Compress the input, and put compressed data in output.
  651. _stream.next_in = (Bytef*)input;
  652. _stream.avail_in = static_cast<unsigned int>(length);
  653. // Initialize the output size.
  654. _stream.avail_out = static_cast<unsigned int>(length);
  655. _stream.next_out = reinterpret_cast<Bytef*>(&(*output)[output_header_len]);
  656. bool compressed = false;
  657. st = deflate(&_stream, Z_FINISH);
  658. if (st == Z_STREAM_END) {
  659. compressed = true;
  660. output->resize(output->size() - _stream.avail_out);
  661. }
  662. // The only return value we really care about is Z_STREAM_END.
  663. // Z_OK means insufficient output space. This means the compression is
  664. // bigger than decompressed size. Just fail the compression in that case.
  665. deflateEnd(&_stream);
  666. return compressed;
  667. #else
  668. (void)info;
  669. (void)compress_format_version;
  670. (void)input;
  671. (void)length;
  672. (void)output;
  673. return false;
  674. #endif
  675. }
  676. // compress_format_version == 1 -- decompressed size is not included in the
  677. // block header
  678. // compress_format_version == 2 -- decompressed size is included in the block
  679. // header in varint32 format
  680. // @param compression_dict Data for presetting the compression library's
  681. // dictionary.
  682. inline CacheAllocationPtr Zlib_Uncompress(
  683. const UncompressionInfo& info, const char* input_data, size_t input_length,
  684. int* decompress_size, uint32_t compress_format_version,
  685. MemoryAllocator* allocator = nullptr, int windowBits = -14) {
  686. #ifdef ZLIB
  687. uint32_t output_len = 0;
  688. if (compress_format_version == 2) {
  689. if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
  690. &output_len)) {
  691. return nullptr;
  692. }
  693. } else {
  694. // Assume the decompressed data size will 5x of compressed size, but round
  695. // to the page size
  696. size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
  697. output_len = static_cast<uint32_t>(
  698. std::min(proposed_output_len,
  699. static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
  700. }
  701. z_stream _stream;
  702. memset(&_stream, 0, sizeof(z_stream));
  703. // For raw inflate, the windowBits should be -8..-15.
  704. // If windowBits is bigger than zero, it will use either zlib
  705. // header or gzip header. Adding 32 to it will do automatic detection.
  706. int st =
  707. inflateInit2(&_stream, windowBits > 0 ? windowBits + 32 : windowBits);
  708. if (st != Z_OK) {
  709. return nullptr;
  710. }
  711. const Slice& compression_dict = info.dict().GetRawDict();
  712. if (compression_dict.size()) {
  713. // Initialize the compression library's dictionary
  714. st = inflateSetDictionary(
  715. &_stream, reinterpret_cast<const Bytef*>(compression_dict.data()),
  716. static_cast<unsigned int>(compression_dict.size()));
  717. if (st != Z_OK) {
  718. return nullptr;
  719. }
  720. }
  721. _stream.next_in = (Bytef*)input_data;
  722. _stream.avail_in = static_cast<unsigned int>(input_length);
  723. auto output = AllocateBlock(output_len, allocator);
  724. _stream.next_out = (Bytef*)output.get();
  725. _stream.avail_out = static_cast<unsigned int>(output_len);
  726. bool done = false;
  727. while (!done) {
  728. st = inflate(&_stream, Z_SYNC_FLUSH);
  729. switch (st) {
  730. case Z_STREAM_END:
  731. done = true;
  732. break;
  733. case Z_OK: {
  734. // No output space. Increase the output space by 20%.
  735. // We should never run out of output space if
  736. // compress_format_version == 2
  737. assert(compress_format_version != 2);
  738. size_t old_sz = output_len;
  739. uint32_t output_len_delta = output_len / 5;
  740. output_len += output_len_delta < 10 ? 10 : output_len_delta;
  741. auto tmp = AllocateBlock(output_len, allocator);
  742. memcpy(tmp.get(), output.get(), old_sz);
  743. output = std::move(tmp);
  744. // Set more output.
  745. _stream.next_out = (Bytef*)(output.get() + old_sz);
  746. _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
  747. break;
  748. }
  749. case Z_BUF_ERROR:
  750. default:
  751. inflateEnd(&_stream);
  752. return nullptr;
  753. }
  754. }
  755. // If we encoded decompressed block size, we should have no bytes left
  756. assert(compress_format_version != 2 || _stream.avail_out == 0);
  757. *decompress_size = static_cast<int>(output_len - _stream.avail_out);
  758. inflateEnd(&_stream);
  759. return output;
  760. #else
  761. (void)info;
  762. (void)input_data;
  763. (void)input_length;
  764. (void)decompress_size;
  765. (void)compress_format_version;
  766. (void)allocator;
  767. (void)windowBits;
  768. return nullptr;
  769. #endif
  770. }
  771. // compress_format_version == 1 -- decompressed size is not included in the
  772. // block header
  773. // compress_format_version == 2 -- decompressed size is included in the block
  774. // header in varint32 format
  775. inline bool BZip2_Compress(const CompressionInfo& /*info*/,
  776. uint32_t compress_format_version, const char* input,
  777. size_t length, ::std::string* output) {
  778. #ifdef BZIP2
  779. if (length > std::numeric_limits<uint32_t>::max()) {
  780. // Can't compress more than 4GB
  781. return false;
  782. }
  783. size_t output_header_len = 0;
  784. if (compress_format_version == 2) {
  785. output_header_len = compression::PutDecompressedSizeInfo(
  786. output, static_cast<uint32_t>(length));
  787. }
  788. // Resize output to be the plain data length.
  789. // This may not be big enough if the compression actually expands data.
  790. output->resize(output_header_len + length);
  791. bz_stream _stream;
  792. memset(&_stream, 0, sizeof(bz_stream));
  793. // Block size 1 is 100K.
  794. // 0 is for silent.
  795. // 30 is the default workFactor
  796. int st = BZ2_bzCompressInit(&_stream, 1, 0, 30);
  797. if (st != BZ_OK) {
  798. return false;
  799. }
  800. // Compress the input, and put compressed data in output.
  801. _stream.next_in = (char*)input;
  802. _stream.avail_in = static_cast<unsigned int>(length);
  803. // Initialize the output size.
  804. _stream.avail_out = static_cast<unsigned int>(length);
  805. _stream.next_out = reinterpret_cast<char*>(&(*output)[output_header_len]);
  806. bool compressed = false;
  807. st = BZ2_bzCompress(&_stream, BZ_FINISH);
  808. if (st == BZ_STREAM_END) {
  809. compressed = true;
  810. output->resize(output->size() - _stream.avail_out);
  811. }
  812. // The only return value we really care about is BZ_STREAM_END.
  813. // BZ_FINISH_OK means insufficient output space. This means the compression
  814. // is bigger than decompressed size. Just fail the compression in that case.
  815. BZ2_bzCompressEnd(&_stream);
  816. return compressed;
  817. #else
  818. (void)compress_format_version;
  819. (void)input;
  820. (void)length;
  821. (void)output;
  822. return false;
  823. #endif
  824. }
  825. // compress_format_version == 1 -- decompressed size is not included in the
  826. // block header
  827. // compress_format_version == 2 -- decompressed size is included in the block
  828. // header in varint32 format
  829. inline CacheAllocationPtr BZip2_Uncompress(
  830. const char* input_data, size_t input_length, int* decompress_size,
  831. uint32_t compress_format_version, MemoryAllocator* allocator = nullptr) {
  832. #ifdef BZIP2
  833. uint32_t output_len = 0;
  834. if (compress_format_version == 2) {
  835. if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
  836. &output_len)) {
  837. return nullptr;
  838. }
  839. } else {
  840. // Assume the decompressed data size will 5x of compressed size, but round
  841. // to the next page size
  842. size_t proposed_output_len = ((input_length * 5) & (~(4096 - 1))) + 4096;
  843. output_len = static_cast<uint32_t>(
  844. std::min(proposed_output_len,
  845. static_cast<size_t>(std::numeric_limits<uint32_t>::max())));
  846. }
  847. bz_stream _stream;
  848. memset(&_stream, 0, sizeof(bz_stream));
  849. int st = BZ2_bzDecompressInit(&_stream, 0, 0);
  850. if (st != BZ_OK) {
  851. return nullptr;
  852. }
  853. _stream.next_in = (char*)input_data;
  854. _stream.avail_in = static_cast<unsigned int>(input_length);
  855. auto output = AllocateBlock(output_len, allocator);
  856. _stream.next_out = (char*)output.get();
  857. _stream.avail_out = static_cast<unsigned int>(output_len);
  858. bool done = false;
  859. while (!done) {
  860. st = BZ2_bzDecompress(&_stream);
  861. switch (st) {
  862. case BZ_STREAM_END:
  863. done = true;
  864. break;
  865. case BZ_OK: {
  866. // No output space. Increase the output space by 20%.
  867. // We should never run out of output space if
  868. // compress_format_version == 2
  869. assert(compress_format_version != 2);
  870. uint32_t old_sz = output_len;
  871. output_len = output_len * 1.2;
  872. auto tmp = AllocateBlock(output_len, allocator);
  873. memcpy(tmp.get(), output.get(), old_sz);
  874. output = std::move(tmp);
  875. // Set more output.
  876. _stream.next_out = (char*)(output.get() + old_sz);
  877. _stream.avail_out = static_cast<unsigned int>(output_len - old_sz);
  878. break;
  879. }
  880. default:
  881. BZ2_bzDecompressEnd(&_stream);
  882. return nullptr;
  883. }
  884. }
  885. // If we encoded decompressed block size, we should have no bytes left
  886. assert(compress_format_version != 2 || _stream.avail_out == 0);
  887. *decompress_size = static_cast<int>(output_len - _stream.avail_out);
  888. BZ2_bzDecompressEnd(&_stream);
  889. return output;
  890. #else
  891. (void)input_data;
  892. (void)input_length;
  893. (void)decompress_size;
  894. (void)compress_format_version;
  895. (void)allocator;
  896. return nullptr;
  897. #endif
  898. }
  899. // compress_format_version == 1 -- decompressed size is included in the
  900. // block header using memcpy, which makes database non-portable)
  901. // compress_format_version == 2 -- decompressed size is included in the block
  902. // header in varint32 format
  903. // @param compression_dict Data for presetting the compression library's
  904. // dictionary.
  905. inline bool LZ4_Compress(const CompressionInfo& info,
  906. uint32_t compress_format_version, const char* input,
  907. size_t length, ::std::string* output) {
  908. #ifdef LZ4
  909. if (length > std::numeric_limits<uint32_t>::max()) {
  910. // Can't compress more than 4GB
  911. return false;
  912. }
  913. size_t output_header_len = 0;
  914. if (compress_format_version == 2) {
  915. // new encoding, using varint32 to store size information
  916. output_header_len = compression::PutDecompressedSizeInfo(
  917. output, static_cast<uint32_t>(length));
  918. } else {
  919. // legacy encoding, which is not really portable (depends on big/little
  920. // endianness)
  921. output_header_len = 8;
  922. output->resize(output_header_len);
  923. char* p = const_cast<char*>(output->c_str());
  924. memcpy(p, &length, sizeof(length));
  925. }
  926. int compress_bound = LZ4_compressBound(static_cast<int>(length));
  927. output->resize(static_cast<size_t>(output_header_len + compress_bound));
  928. int outlen;
  929. #if LZ4_VERSION_NUMBER >= 10400 // r124+
  930. LZ4_stream_t* stream = LZ4_createStream();
  931. Slice compression_dict = info.dict().GetRawDict();
  932. if (compression_dict.size()) {
  933. LZ4_loadDict(stream, compression_dict.data(),
  934. static_cast<int>(compression_dict.size()));
  935. }
  936. #if LZ4_VERSION_NUMBER >= 10700 // r129+
  937. outlen =
  938. LZ4_compress_fast_continue(stream, input, &(*output)[output_header_len],
  939. static_cast<int>(length), compress_bound, 1);
  940. #else // up to r128
  941. outlen = LZ4_compress_limitedOutput_continue(
  942. stream, input, &(*output)[output_header_len], static_cast<int>(length),
  943. compress_bound);
  944. #endif
  945. LZ4_freeStream(stream);
  946. #else // up to r123
  947. outlen = LZ4_compress_limitedOutput(input, &(*output)[output_header_len],
  948. static_cast<int>(length), compress_bound);
  949. #endif // LZ4_VERSION_NUMBER >= 10400
  950. if (outlen == 0) {
  951. return false;
  952. }
  953. output->resize(static_cast<size_t>(output_header_len + outlen));
  954. return true;
  955. #else // LZ4
  956. (void)info;
  957. (void)compress_format_version;
  958. (void)input;
  959. (void)length;
  960. (void)output;
  961. return false;
  962. #endif
  963. }
  964. // compress_format_version == 1 -- decompressed size is included in the
  965. // block header using memcpy, which makes database non-portable)
  966. // compress_format_version == 2 -- decompressed size is included in the block
  967. // header in varint32 format
  968. // @param compression_dict Data for presetting the compression library's
  969. // dictionary.
  970. inline CacheAllocationPtr LZ4_Uncompress(const UncompressionInfo& info,
  971. const char* input_data,
  972. size_t input_length,
  973. int* decompress_size,
  974. uint32_t compress_format_version,
  975. MemoryAllocator* allocator = nullptr) {
  976. #ifdef LZ4
  977. uint32_t output_len = 0;
  978. if (compress_format_version == 2) {
  979. // new encoding, using varint32 to store size information
  980. if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
  981. &output_len)) {
  982. return nullptr;
  983. }
  984. } else {
  985. // legacy encoding, which is not really portable (depends on big/little
  986. // endianness)
  987. if (input_length < 8) {
  988. return nullptr;
  989. }
  990. memcpy(&output_len, input_data, sizeof(output_len));
  991. input_length -= 8;
  992. input_data += 8;
  993. }
  994. auto output = AllocateBlock(output_len, allocator);
  995. #if LZ4_VERSION_NUMBER >= 10400 // r124+
  996. LZ4_streamDecode_t* stream = LZ4_createStreamDecode();
  997. const Slice& compression_dict = info.dict().GetRawDict();
  998. if (compression_dict.size()) {
  999. LZ4_setStreamDecode(stream, compression_dict.data(),
  1000. static_cast<int>(compression_dict.size()));
  1001. }
  1002. *decompress_size = LZ4_decompress_safe_continue(
  1003. stream, input_data, output.get(), static_cast<int>(input_length),
  1004. static_cast<int>(output_len));
  1005. LZ4_freeStreamDecode(stream);
  1006. #else // up to r123
  1007. *decompress_size = LZ4_decompress_safe(input_data, output.get(),
  1008. static_cast<int>(input_length),
  1009. static_cast<int>(output_len));
  1010. #endif // LZ4_VERSION_NUMBER >= 10400
  1011. if (*decompress_size < 0) {
  1012. return nullptr;
  1013. }
  1014. assert(*decompress_size == static_cast<int>(output_len));
  1015. return output;
  1016. #else // LZ4
  1017. (void)info;
  1018. (void)input_data;
  1019. (void)input_length;
  1020. (void)decompress_size;
  1021. (void)compress_format_version;
  1022. (void)allocator;
  1023. return nullptr;
  1024. #endif
  1025. }
  1026. // compress_format_version == 1 -- decompressed size is included in the
  1027. // block header using memcpy, which makes database non-portable)
  1028. // compress_format_version == 2 -- decompressed size is included in the block
  1029. // header in varint32 format
  1030. // @param compression_dict Data for presetting the compression library's
  1031. // dictionary.
  1032. inline bool LZ4HC_Compress(const CompressionInfo& info,
  1033. uint32_t compress_format_version, const char* input,
  1034. size_t length, ::std::string* output) {
  1035. #ifdef LZ4
  1036. if (length > std::numeric_limits<uint32_t>::max()) {
  1037. // Can't compress more than 4GB
  1038. return false;
  1039. }
  1040. size_t output_header_len = 0;
  1041. if (compress_format_version == 2) {
  1042. // new encoding, using varint32 to store size information
  1043. output_header_len = compression::PutDecompressedSizeInfo(
  1044. output, static_cast<uint32_t>(length));
  1045. } else {
  1046. // legacy encoding, which is not really portable (depends on big/little
  1047. // endianness)
  1048. output_header_len = 8;
  1049. output->resize(output_header_len);
  1050. char* p = const_cast<char*>(output->c_str());
  1051. memcpy(p, &length, sizeof(length));
  1052. }
  1053. int compress_bound = LZ4_compressBound(static_cast<int>(length));
  1054. output->resize(static_cast<size_t>(output_header_len + compress_bound));
  1055. int outlen;
  1056. int level;
  1057. if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
  1058. level = 0; // lz4hc.h says any value < 1 will be sanitized to default
  1059. } else {
  1060. level = info.options().level;
  1061. }
  1062. #if LZ4_VERSION_NUMBER >= 10400 // r124+
  1063. LZ4_streamHC_t* stream = LZ4_createStreamHC();
  1064. LZ4_resetStreamHC(stream, level);
  1065. Slice compression_dict = info.dict().GetRawDict();
  1066. const char* compression_dict_data =
  1067. compression_dict.size() > 0 ? compression_dict.data() : nullptr;
  1068. size_t compression_dict_size = compression_dict.size();
  1069. LZ4_loadDictHC(stream, compression_dict_data,
  1070. static_cast<int>(compression_dict_size));
  1071. #if LZ4_VERSION_NUMBER >= 10700 // r129+
  1072. outlen =
  1073. LZ4_compress_HC_continue(stream, input, &(*output)[output_header_len],
  1074. static_cast<int>(length), compress_bound);
  1075. #else // r124-r128
  1076. outlen = LZ4_compressHC_limitedOutput_continue(
  1077. stream, input, &(*output)[output_header_len], static_cast<int>(length),
  1078. compress_bound);
  1079. #endif // LZ4_VERSION_NUMBER >= 10700
  1080. LZ4_freeStreamHC(stream);
  1081. #elif LZ4_VERSION_MAJOR // r113-r123
  1082. outlen = LZ4_compressHC2_limitedOutput(input, &(*output)[output_header_len],
  1083. static_cast<int>(length),
  1084. compress_bound, level);
  1085. #else // up to r112
  1086. outlen =
  1087. LZ4_compressHC_limitedOutput(input, &(*output)[output_header_len],
  1088. static_cast<int>(length), compress_bound);
  1089. #endif // LZ4_VERSION_NUMBER >= 10400
  1090. if (outlen == 0) {
  1091. return false;
  1092. }
  1093. output->resize(static_cast<size_t>(output_header_len + outlen));
  1094. return true;
  1095. #else // LZ4
  1096. (void)info;
  1097. (void)compress_format_version;
  1098. (void)input;
  1099. (void)length;
  1100. (void)output;
  1101. return false;
  1102. #endif
  1103. }
  1104. #ifdef XPRESS
  1105. inline bool XPRESS_Compress(const char* input, size_t length,
  1106. std::string* output) {
  1107. return port::xpress::Compress(input, length, output);
  1108. }
  1109. #else
  1110. inline bool XPRESS_Compress(const char* /*input*/, size_t /*length*/,
  1111. std::string* /*output*/) {
  1112. return false;
  1113. }
  1114. #endif
  1115. #ifdef XPRESS
  1116. inline char* XPRESS_Uncompress(const char* input_data, size_t input_length,
  1117. int* decompress_size) {
  1118. return port::xpress::Decompress(input_data, input_length, decompress_size);
  1119. }
  1120. #else
  1121. inline char* XPRESS_Uncompress(const char* /*input_data*/,
  1122. size_t /*input_length*/,
  1123. int* /*decompress_size*/) {
  1124. return nullptr;
  1125. }
  1126. #endif
  1127. inline bool ZSTD_Compress(const CompressionInfo& info, const char* input,
  1128. size_t length, ::std::string* output) {
  1129. #ifdef ZSTD
  1130. if (length > std::numeric_limits<uint32_t>::max()) {
  1131. // Can't compress more than 4GB
  1132. return false;
  1133. }
  1134. size_t output_header_len = compression::PutDecompressedSizeInfo(
  1135. output, static_cast<uint32_t>(length));
  1136. size_t compressBound = ZSTD_compressBound(length);
  1137. output->resize(static_cast<size_t>(output_header_len + compressBound));
  1138. size_t outlen = 0;
  1139. int level;
  1140. if (info.options().level == CompressionOptions::kDefaultCompressionLevel) {
  1141. // 3 is the value of ZSTD_CLEVEL_DEFAULT (not exposed publicly), see
  1142. // https://github.com/facebook/zstd/issues/1148
  1143. level = 3;
  1144. } else {
  1145. level = info.options().level;
  1146. }
  1147. #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
  1148. ZSTD_CCtx* context = info.context().ZSTDPreallocCtx();
  1149. assert(context != nullptr);
  1150. #if ZSTD_VERSION_NUMBER >= 700 // v0.7.0+
  1151. if (info.dict().GetDigestedZstdCDict() != nullptr) {
  1152. outlen = ZSTD_compress_usingCDict(context, &(*output)[output_header_len],
  1153. compressBound, input, length,
  1154. info.dict().GetDigestedZstdCDict());
  1155. }
  1156. #endif // ZSTD_VERSION_NUMBER >= 700
  1157. if (outlen == 0) {
  1158. outlen = ZSTD_compress_usingDict(context, &(*output)[output_header_len],
  1159. compressBound, input, length,
  1160. info.dict().GetRawDict().data(),
  1161. info.dict().GetRawDict().size(), level);
  1162. }
  1163. #else // up to v0.4.x
  1164. outlen = ZSTD_compress(&(*output)[output_header_len], compressBound, input,
  1165. length, level);
  1166. #endif // ZSTD_VERSION_NUMBER >= 500
  1167. if (outlen == 0) {
  1168. return false;
  1169. }
  1170. output->resize(output_header_len + outlen);
  1171. return true;
  1172. #else // ZSTD
  1173. (void)info;
  1174. (void)input;
  1175. (void)length;
  1176. (void)output;
  1177. return false;
  1178. #endif
  1179. }
  1180. // @param compression_dict Data for presetting the compression library's
  1181. // dictionary.
  1182. inline CacheAllocationPtr ZSTD_Uncompress(
  1183. const UncompressionInfo& info, const char* input_data, size_t input_length,
  1184. int* decompress_size, MemoryAllocator* allocator = nullptr) {
  1185. #ifdef ZSTD
  1186. uint32_t output_len = 0;
  1187. if (!compression::GetDecompressedSizeInfo(&input_data, &input_length,
  1188. &output_len)) {
  1189. return nullptr;
  1190. }
  1191. auto output = AllocateBlock(output_len, allocator);
  1192. size_t actual_output_length = 0;
  1193. #if ZSTD_VERSION_NUMBER >= 500 // v0.5.0+
  1194. ZSTD_DCtx* context = info.context().GetZSTDContext();
  1195. assert(context != nullptr);
  1196. #ifdef ROCKSDB_ZSTD_DDICT
  1197. if (info.dict().GetDigestedZstdDDict() != nullptr) {
  1198. actual_output_length = ZSTD_decompress_usingDDict(
  1199. context, output.get(), output_len, input_data, input_length,
  1200. info.dict().GetDigestedZstdDDict());
  1201. }
  1202. #endif // ROCKSDB_ZSTD_DDICT
  1203. if (actual_output_length == 0) {
  1204. actual_output_length = ZSTD_decompress_usingDict(
  1205. context, output.get(), output_len, input_data, input_length,
  1206. info.dict().GetRawDict().data(), info.dict().GetRawDict().size());
  1207. }
  1208. #else // up to v0.4.x
  1209. (void)info;
  1210. actual_output_length =
  1211. ZSTD_decompress(output.get(), output_len, input_data, input_length);
  1212. #endif // ZSTD_VERSION_NUMBER >= 500
  1213. assert(actual_output_length == output_len);
  1214. *decompress_size = static_cast<int>(actual_output_length);
  1215. return output;
  1216. #else // ZSTD
  1217. (void)info;
  1218. (void)input_data;
  1219. (void)input_length;
  1220. (void)decompress_size;
  1221. (void)allocator;
  1222. return nullptr;
  1223. #endif
  1224. }
  1225. inline bool ZSTD_TrainDictionarySupported() {
  1226. #ifdef ZSTD
  1227. // Dictionary trainer is available since v0.6.1 for static linking, but not
  1228. // available for dynamic linking until v1.1.3. For now we enable the feature
  1229. // in v1.1.3+ only.
  1230. return (ZSTD_versionNumber() >= 10103);
  1231. #else
  1232. return false;
  1233. #endif
  1234. }
  1235. inline std::string ZSTD_TrainDictionary(const std::string& samples,
  1236. const std::vector<size_t>& sample_lens,
  1237. size_t max_dict_bytes) {
  1238. // Dictionary trainer is available since v0.6.1 for static linking, but not
  1239. // available for dynamic linking until v1.1.3. For now we enable the feature
  1240. // in v1.1.3+ only.
  1241. #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
  1242. assert(samples.empty() == sample_lens.empty());
  1243. if (samples.empty()) {
  1244. return "";
  1245. }
  1246. std::string dict_data(max_dict_bytes, '\0');
  1247. size_t dict_len = ZDICT_trainFromBuffer(
  1248. &dict_data[0], max_dict_bytes, &samples[0], &sample_lens[0],
  1249. static_cast<unsigned>(sample_lens.size()));
  1250. if (ZDICT_isError(dict_len)) {
  1251. return "";
  1252. }
  1253. assert(dict_len <= max_dict_bytes);
  1254. dict_data.resize(dict_len);
  1255. return dict_data;
  1256. #else // up to v1.1.2
  1257. assert(false);
  1258. (void)samples;
  1259. (void)sample_lens;
  1260. (void)max_dict_bytes;
  1261. return "";
  1262. #endif // ZSTD_VERSION_NUMBER >= 10103
  1263. }
  1264. inline std::string ZSTD_TrainDictionary(const std::string& samples,
  1265. size_t sample_len_shift,
  1266. size_t max_dict_bytes) {
  1267. // Dictionary trainer is available since v0.6.1, but ZSTD was marked stable
  1268. // only since v0.8.0. For now we enable the feature in stable versions only.
  1269. #if ZSTD_VERSION_NUMBER >= 10103 // v1.1.3+
  1270. // skips potential partial sample at the end of "samples"
  1271. size_t num_samples = samples.size() >> sample_len_shift;
  1272. std::vector<size_t> sample_lens(num_samples, size_t(1) << sample_len_shift);
  1273. return ZSTD_TrainDictionary(samples, sample_lens, max_dict_bytes);
  1274. #else // up to v1.1.2
  1275. assert(false);
  1276. (void)samples;
  1277. (void)sample_len_shift;
  1278. (void)max_dict_bytes;
  1279. return "";
  1280. #endif // ZSTD_VERSION_NUMBER >= 10103
  1281. }
  1282. } // namespace ROCKSDB_NAMESPACE