persistent_cache_tier.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. // Copyright (c) 2013, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #pragma once
  7. #ifndef ROCKSDB_LITE
  8. #include <limits>
  9. #include <list>
  10. #include <map>
  11. #include <string>
  12. #include <vector>
  13. #include "monitoring/histogram.h"
  14. #include "rocksdb/env.h"
  15. #include "rocksdb/persistent_cache.h"
  16. #include "rocksdb/status.h"
  17. // Persistent Cache
  18. //
  19. // Persistent cache is tiered key-value cache that can use persistent medium. It
  20. // is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM.
  21. // The code has been kept generic but significant benchmark/design/development
  22. // time has been spent to make sure the cache performs appropriately for
  23. // respective storage medium.
  24. // The file defines
  25. // PersistentCacheTier : Implementation that handles individual cache tier
  26. // PersistentTieresCache : Implementation that handles all tiers as a logical
  27. // unit
  28. //
  29. // PersistentTieredCache architecture:
  30. // +--------------------------+ PersistentCacheTier that handles multiple tiers
  31. // | +----------------+ |
  32. // | | RAM | PersistentCacheTier that handles RAM (VolatileCacheImpl)
  33. // | +----------------+ |
  34. // | | next |
  35. // | v |
  36. // | +----------------+ |
  37. // | | NVM | PersistentCacheTier implementation that handles NVM
  38. // | +----------------+ (BlockCacheImpl)
  39. // | | next |
  40. // | V |
  41. // | +----------------+ |
  42. // | | LE-SSD | PersistentCacheTier implementation that handles LE-SSD
  43. // | +----------------+ (BlockCacheImpl)
  44. // | | |
  45. // | V |
  46. // | null |
  47. // +--------------------------+
  48. // |
  49. // V
  50. // null
  51. namespace ROCKSDB_NAMESPACE {
  52. // Persistent Cache Config
  53. //
  54. // This struct captures all the options that are used to configure persistent
  55. // cache. Some of the terminologies used in naming the options are
  56. //
  57. // dispatch size :
  58. // This is the size in which IO is dispatched to the device
  59. //
  60. // write buffer size :
  61. // This is the size of an individual write buffer size. Write buffers are
  62. // grouped to form buffered file.
  63. //
  64. // cache size :
  65. // This is the logical maximum for the cache size
  66. //
  67. // qdepth :
  68. // This is the max number of IOs that can issues to the device in parallel
  69. //
  70. // pepeling :
  71. // The writer code path follows pipelined architecture, which means the
  72. // operations are handed off from one stage to another
  73. //
  74. // pipelining backlog size :
  75. // With the pipelined architecture, there can always be backlogging of ops in
  76. // pipeline queues. This is the maximum backlog size after which ops are dropped
  77. // from queue
  78. struct PersistentCacheConfig {
  79. explicit PersistentCacheConfig(
  80. Env* const _env, const std::string& _path, const uint64_t _cache_size,
  81. const std::shared_ptr<Logger>& _log,
  82. const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) {
  83. env = _env;
  84. path = _path;
  85. log = _log;
  86. cache_size = _cache_size;
  87. writer_dispatch_size = write_buffer_size = _write_buffer_size;
  88. }
  89. //
  90. // Validate the settings. Our intentions are to catch erroneous settings ahead
  91. // of time instead going violating invariants or causing dead locks.
  92. //
  93. Status ValidateSettings() const {
  94. // (1) check pre-conditions for variables
  95. if (!env || path.empty()) {
  96. return Status::InvalidArgument("empty or null args");
  97. }
  98. // (2) assert size related invariants
  99. // - cache size cannot be less than cache file size
  100. // - individual write buffer size cannot be greater than cache file size
  101. // - total write buffer size cannot be less than 2X cache file size
  102. if (cache_size < cache_file_size || write_buffer_size >= cache_file_size ||
  103. write_buffer_size * write_buffer_count() < 2 * cache_file_size) {
  104. return Status::InvalidArgument("invalid cache size");
  105. }
  106. // (2) check writer settings
  107. // - Queue depth cannot be 0
  108. // - writer_dispatch_size cannot be greater than writer_buffer_size
  109. // - dispatch size and buffer size need to be aligned
  110. if (!writer_qdepth || writer_dispatch_size > write_buffer_size ||
  111. write_buffer_size % writer_dispatch_size) {
  112. return Status::InvalidArgument("invalid writer settings");
  113. }
  114. return Status::OK();
  115. }
  116. //
  117. // Env abstraction to use for systmer level operations
  118. //
  119. Env* env;
  120. //
  121. // Path for the block cache where blocks are persisted
  122. //
  123. std::string path;
  124. //
  125. // Log handle for logging messages
  126. //
  127. std::shared_ptr<Logger> log;
  128. //
  129. // Enable direct IO for reading
  130. //
  131. bool enable_direct_reads = true;
  132. //
  133. // Enable direct IO for writing
  134. //
  135. bool enable_direct_writes = false;
  136. //
  137. // Logical cache size
  138. //
  139. uint64_t cache_size = std::numeric_limits<uint64_t>::max();
  140. // cache-file-size
  141. //
  142. // Cache consists of multiples of small files. This parameter defines the
  143. // size of an individual cache file
  144. //
  145. // default: 1M
  146. uint32_t cache_file_size = 100ULL * 1024 * 1024;
  147. // writer-qdepth
  148. //
  149. // The writers can issues IO to the devices in parallel. This parameter
  150. // controls the max number if IOs that can issues in parallel to the block
  151. // device
  152. //
  153. // default :1
  154. uint32_t writer_qdepth = 1;
  155. // pipeline-writes
  156. //
  157. // The write optionally follow pipelined architecture. This helps
  158. // avoid regression in the eviction code path of the primary tier. This
  159. // parameter defines if pipelining is enabled or disabled
  160. //
  161. // default: true
  162. bool pipeline_writes = true;
  163. // max-write-pipeline-backlog-size
  164. //
  165. // Max pipeline buffer size. This is the maximum backlog we can accumulate
  166. // while waiting for writes. After the limit, new ops will be dropped.
  167. //
  168. // Default: 1GiB
  169. uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024;
  170. // write-buffer-size
  171. //
  172. // This is the size in which buffer slabs are allocated.
  173. //
  174. // Default: 1M
  175. uint32_t write_buffer_size = 1ULL * 1024 * 1024;
  176. // write-buffer-count
  177. //
  178. // This is the total number of buffer slabs. This is calculated as a factor of
  179. // file size in order to avoid dead lock.
  180. size_t write_buffer_count() const {
  181. assert(write_buffer_size);
  182. return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size /
  183. write_buffer_size);
  184. }
  185. // writer-dispatch-size
  186. //
  187. // The writer thread will dispatch the IO at the specified IO size
  188. //
  189. // default: 1M
  190. uint64_t writer_dispatch_size = 1ULL * 1024 * 1024;
  191. // is_compressed
  192. //
  193. // This option determines if the cache will run in compressed mode or
  194. // uncompressed mode
  195. bool is_compressed = true;
  196. PersistentCacheConfig MakePersistentCacheConfig(
  197. const std::string& path, const uint64_t size,
  198. const std::shared_ptr<Logger>& log);
  199. std::string ToString() const;
  200. };
  201. // Persistent Cache Tier
  202. //
  203. // This a logical abstraction that defines a tier of the persistent cache. Tiers
  204. // can be stacked over one another. PersistentCahe provides the basic definition
  205. // for accessing/storing in the cache. PersistentCacheTier extends the interface
  206. // to enable management and stacking of tiers.
  207. class PersistentCacheTier : public PersistentCache {
  208. public:
  209. typedef std::shared_ptr<PersistentCacheTier> Tier;
  210. virtual ~PersistentCacheTier() {}
  211. // Open the persistent cache tier
  212. virtual Status Open();
  213. // Close the persistent cache tier
  214. virtual Status Close();
  215. // Reserve space up to 'size' bytes
  216. virtual bool Reserve(const size_t size);
  217. // Erase a key from the cache
  218. virtual bool Erase(const Slice& key);
  219. // Print stats to string recursively
  220. virtual std::string PrintStats();
  221. virtual PersistentCache::StatsType Stats() override;
  222. // Insert to page cache
  223. virtual Status Insert(const Slice& page_key, const char* data,
  224. const size_t size) override = 0;
  225. // Lookup page cache by page identifier
  226. virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
  227. size_t* size) override = 0;
  228. // Does it store compressed data ?
  229. virtual bool IsCompressed() override = 0;
  230. virtual std::string GetPrintableOptions() const override = 0;
  231. // Return a reference to next tier
  232. virtual Tier& next_tier() { return next_tier_; }
  233. // Set the value for next tier
  234. virtual void set_next_tier(const Tier& tier) {
  235. assert(!next_tier_);
  236. next_tier_ = tier;
  237. }
  238. virtual void TEST_Flush() {
  239. if (next_tier_) {
  240. next_tier_->TEST_Flush();
  241. }
  242. }
  243. private:
  244. Tier next_tier_; // next tier
  245. };
  246. // PersistentTieredCache
  247. //
  248. // Abstraction that helps you construct a tiers of persistent caches as a
  249. // unified cache. The tier(s) of cache will act a single tier for management
  250. // ease and support PersistentCache methods for accessing data.
  251. class PersistentTieredCache : public PersistentCacheTier {
  252. public:
  253. virtual ~PersistentTieredCache();
  254. Status Open() override;
  255. Status Close() override;
  256. bool Erase(const Slice& key) override;
  257. std::string PrintStats() override;
  258. PersistentCache::StatsType Stats() override;
  259. Status Insert(const Slice& page_key, const char* data,
  260. const size_t size) override;
  261. Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
  262. size_t* size) override;
  263. bool IsCompressed() override;
  264. std::string GetPrintableOptions() const override {
  265. return "PersistentTieredCache";
  266. }
  267. void AddTier(const Tier& tier);
  268. Tier& next_tier() override {
  269. auto it = tiers_.end();
  270. return (*it)->next_tier();
  271. }
  272. void set_next_tier(const Tier& tier) override {
  273. auto it = tiers_.end();
  274. (*it)->set_next_tier(tier);
  275. }
  276. void TEST_Flush() override {
  277. assert(!tiers_.empty());
  278. tiers_.front()->TEST_Flush();
  279. PersistentCacheTier::TEST_Flush();
  280. }
  281. protected:
  282. std::list<Tier> tiers_; // list of tiers top-down
  283. };
  284. } // namespace ROCKSDB_NAMESPACE
  285. #endif