| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336 |
- // Copyright (c) 2013, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- #pragma once
- #ifndef ROCKSDB_LITE
- #include <limits>
- #include <list>
- #include <map>
- #include <string>
- #include <vector>
- #include "monitoring/histogram.h"
- #include "rocksdb/env.h"
- #include "rocksdb/persistent_cache.h"
- #include "rocksdb/status.h"
- // Persistent Cache
- //
- // Persistent cache is tiered key-value cache that can use persistent medium. It
- // is a generic design and can leverage any storage medium -- disk/SSD/NVM/RAM.
- // The code has been kept generic but significant benchmark/design/development
- // time has been spent to make sure the cache performs appropriately for
- // respective storage medium.
- // The file defines
- // PersistentCacheTier : Implementation that handles individual cache tier
- // PersistentTieresCache : Implementation that handles all tiers as a logical
- // unit
- //
- // PersistentTieredCache architecture:
- // +--------------------------+ PersistentCacheTier that handles multiple tiers
- // | +----------------+ |
- // | | RAM | PersistentCacheTier that handles RAM (VolatileCacheImpl)
- // | +----------------+ |
- // | | next |
- // | v |
- // | +----------------+ |
- // | | NVM | PersistentCacheTier implementation that handles NVM
- // | +----------------+ (BlockCacheImpl)
- // | | next |
- // | V |
- // | +----------------+ |
- // | | LE-SSD | PersistentCacheTier implementation that handles LE-SSD
- // | +----------------+ (BlockCacheImpl)
- // | | |
- // | V |
- // | null |
- // +--------------------------+
- // |
- // V
- // null
- namespace ROCKSDB_NAMESPACE {
- // Persistent Cache Config
- //
- // This struct captures all the options that are used to configure persistent
- // cache. Some of the terminologies used in naming the options are
- //
- // dispatch size :
- // This is the size in which IO is dispatched to the device
- //
- // write buffer size :
- // This is the size of an individual write buffer size. Write buffers are
- // grouped to form buffered file.
- //
- // cache size :
- // This is the logical maximum for the cache size
- //
- // qdepth :
- // This is the max number of IOs that can issues to the device in parallel
- //
- // pepeling :
- // The writer code path follows pipelined architecture, which means the
- // operations are handed off from one stage to another
- //
- // pipelining backlog size :
- // With the pipelined architecture, there can always be backlogging of ops in
- // pipeline queues. This is the maximum backlog size after which ops are dropped
- // from queue
- struct PersistentCacheConfig {
- explicit PersistentCacheConfig(
- Env* const _env, const std::string& _path, const uint64_t _cache_size,
- const std::shared_ptr<Logger>& _log,
- const uint32_t _write_buffer_size = 1 * 1024 * 1024 /*1MB*/) {
- env = _env;
- path = _path;
- log = _log;
- cache_size = _cache_size;
- writer_dispatch_size = write_buffer_size = _write_buffer_size;
- }
- //
- // Validate the settings. Our intentions are to catch erroneous settings ahead
- // of time instead going violating invariants or causing dead locks.
- //
- Status ValidateSettings() const {
- // (1) check pre-conditions for variables
- if (!env || path.empty()) {
- return Status::InvalidArgument("empty or null args");
- }
- // (2) assert size related invariants
- // - cache size cannot be less than cache file size
- // - individual write buffer size cannot be greater than cache file size
- // - total write buffer size cannot be less than 2X cache file size
- if (cache_size < cache_file_size || write_buffer_size >= cache_file_size ||
- write_buffer_size * write_buffer_count() < 2 * cache_file_size) {
- return Status::InvalidArgument("invalid cache size");
- }
- // (2) check writer settings
- // - Queue depth cannot be 0
- // - writer_dispatch_size cannot be greater than writer_buffer_size
- // - dispatch size and buffer size need to be aligned
- if (!writer_qdepth || writer_dispatch_size > write_buffer_size ||
- write_buffer_size % writer_dispatch_size) {
- return Status::InvalidArgument("invalid writer settings");
- }
- return Status::OK();
- }
- //
- // Env abstraction to use for systmer level operations
- //
- Env* env;
- //
- // Path for the block cache where blocks are persisted
- //
- std::string path;
- //
- // Log handle for logging messages
- //
- std::shared_ptr<Logger> log;
- //
- // Enable direct IO for reading
- //
- bool enable_direct_reads = true;
- //
- // Enable direct IO for writing
- //
- bool enable_direct_writes = false;
- //
- // Logical cache size
- //
- uint64_t cache_size = std::numeric_limits<uint64_t>::max();
- // cache-file-size
- //
- // Cache consists of multiples of small files. This parameter defines the
- // size of an individual cache file
- //
- // default: 1M
- uint32_t cache_file_size = 100ULL * 1024 * 1024;
- // writer-qdepth
- //
- // The writers can issues IO to the devices in parallel. This parameter
- // controls the max number if IOs that can issues in parallel to the block
- // device
- //
- // default :1
- uint32_t writer_qdepth = 1;
- // pipeline-writes
- //
- // The write optionally follow pipelined architecture. This helps
- // avoid regression in the eviction code path of the primary tier. This
- // parameter defines if pipelining is enabled or disabled
- //
- // default: true
- bool pipeline_writes = true;
- // max-write-pipeline-backlog-size
- //
- // Max pipeline buffer size. This is the maximum backlog we can accumulate
- // while waiting for writes. After the limit, new ops will be dropped.
- //
- // Default: 1GiB
- uint64_t max_write_pipeline_backlog_size = 1ULL * 1024 * 1024 * 1024;
- // write-buffer-size
- //
- // This is the size in which buffer slabs are allocated.
- //
- // Default: 1M
- uint32_t write_buffer_size = 1ULL * 1024 * 1024;
- // write-buffer-count
- //
- // This is the total number of buffer slabs. This is calculated as a factor of
- // file size in order to avoid dead lock.
- size_t write_buffer_count() const {
- assert(write_buffer_size);
- return static_cast<size_t>((writer_qdepth + 1.2) * cache_file_size /
- write_buffer_size);
- }
- // writer-dispatch-size
- //
- // The writer thread will dispatch the IO at the specified IO size
- //
- // default: 1M
- uint64_t writer_dispatch_size = 1ULL * 1024 * 1024;
- // is_compressed
- //
- // This option determines if the cache will run in compressed mode or
- // uncompressed mode
- bool is_compressed = true;
- PersistentCacheConfig MakePersistentCacheConfig(
- const std::string& path, const uint64_t size,
- const std::shared_ptr<Logger>& log);
- std::string ToString() const;
- };
- // Persistent Cache Tier
- //
- // This a logical abstraction that defines a tier of the persistent cache. Tiers
- // can be stacked over one another. PersistentCahe provides the basic definition
- // for accessing/storing in the cache. PersistentCacheTier extends the interface
- // to enable management and stacking of tiers.
- class PersistentCacheTier : public PersistentCache {
- public:
- typedef std::shared_ptr<PersistentCacheTier> Tier;
- virtual ~PersistentCacheTier() {}
- // Open the persistent cache tier
- virtual Status Open();
- // Close the persistent cache tier
- virtual Status Close();
- // Reserve space up to 'size' bytes
- virtual bool Reserve(const size_t size);
- // Erase a key from the cache
- virtual bool Erase(const Slice& key);
- // Print stats to string recursively
- virtual std::string PrintStats();
- virtual PersistentCache::StatsType Stats() override;
- // Insert to page cache
- virtual Status Insert(const Slice& page_key, const char* data,
- const size_t size) override = 0;
- // Lookup page cache by page identifier
- virtual Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
- size_t* size) override = 0;
- // Does it store compressed data ?
- virtual bool IsCompressed() override = 0;
- virtual std::string GetPrintableOptions() const override = 0;
- // Return a reference to next tier
- virtual Tier& next_tier() { return next_tier_; }
- // Set the value for next tier
- virtual void set_next_tier(const Tier& tier) {
- assert(!next_tier_);
- next_tier_ = tier;
- }
- virtual void TEST_Flush() {
- if (next_tier_) {
- next_tier_->TEST_Flush();
- }
- }
- private:
- Tier next_tier_; // next tier
- };
- // PersistentTieredCache
- //
- // Abstraction that helps you construct a tiers of persistent caches as a
- // unified cache. The tier(s) of cache will act a single tier for management
- // ease and support PersistentCache methods for accessing data.
- class PersistentTieredCache : public PersistentCacheTier {
- public:
- virtual ~PersistentTieredCache();
- Status Open() override;
- Status Close() override;
- bool Erase(const Slice& key) override;
- std::string PrintStats() override;
- PersistentCache::StatsType Stats() override;
- Status Insert(const Slice& page_key, const char* data,
- const size_t size) override;
- Status Lookup(const Slice& page_key, std::unique_ptr<char[]>* data,
- size_t* size) override;
- bool IsCompressed() override;
- std::string GetPrintableOptions() const override {
- return "PersistentTieredCache";
- }
- void AddTier(const Tier& tier);
- Tier& next_tier() override {
- auto it = tiers_.end();
- return (*it)->next_tier();
- }
- void set_next_tier(const Tier& tier) override {
- auto it = tiers_.end();
- (*it)->set_next_tier(tier);
- }
- void TEST_Flush() override {
- assert(!tiers_.empty());
- tiers_.front()->TEST_Flush();
- PersistentCacheTier::TEST_Flush();
- }
- protected:
- std::list<Tier> tiers_; // list of tiers top-down
- };
- } // namespace ROCKSDB_NAMESPACE
- #endif
|