clock_cache.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "cache/clock_cache.h"
  10. #ifndef SUPPORT_CLOCK_CACHE
  11. namespace ROCKSDB_NAMESPACE {
  12. std::shared_ptr<Cache> NewClockCache(
  13. size_t /*capacity*/, int /*num_shard_bits*/, bool /*strict_capacity_limit*/,
  14. CacheMetadataChargePolicy /*metadata_charge_policy*/) {
  15. // Clock cache not supported.
  16. return nullptr;
  17. }
  18. } // namespace ROCKSDB_NAMESPACE
  19. #else
  20. #include <assert.h>
  21. #include <atomic>
  22. #include <deque>
  23. // "tbb/concurrent_hash_map.h" requires RTTI if exception is enabled.
  24. // Disable it so users can chooose to disable RTTI.
  25. #ifndef ROCKSDB_USE_RTTI
  26. #define TBB_USE_EXCEPTIONS 0
  27. #endif
  28. #include "tbb/concurrent_hash_map.h"
  29. #include "cache/sharded_cache.h"
  30. #include "port/malloc.h"
  31. #include "port/port.h"
  32. #include "util/autovector.h"
  33. #include "util/mutexlock.h"
  34. namespace ROCKSDB_NAMESPACE {
  35. namespace {
  36. // An implementation of the Cache interface based on CLOCK algorithm, with
  37. // better concurrent performance than LRUCache. The idea of CLOCK algorithm
  38. // is to maintain all cache entries in a circular list, and an iterator
  39. // (the "head") pointing to the last examined entry. Eviction starts from the
  40. // current head. Each entry is given a second chance before eviction, if it
  41. // has been access since last examine. In contrast to LRU, no modification
  42. // to the internal data-structure (except for flipping the usage bit) needs
  43. // to be done upon lookup. This gives us oppertunity to implement a cache
  44. // with better concurrency.
  45. //
  46. // Each cache entry is represented by a cache handle, and all the handles
  47. // are arranged in a circular list, as describe above. Upon erase of an entry,
  48. // we never remove the handle. Instead, the handle is put into a recycle bin
  49. // to be re-use. This is to avoid memory dealocation, which is hard to deal
  50. // with in concurrent environment.
  51. //
  52. // The cache also maintains a concurrent hash map for lookup. Any concurrent
  53. // hash map implementation should do the work. We currently use
  54. // tbb::concurrent_hash_map because it supports concurrent erase.
  55. //
  56. // Each cache handle has the following flags and counters, which are squeeze
  57. // in an atomic interger, to make sure the handle always be in a consistent
  58. // state:
  59. //
  60. // * In-cache bit: whether the entry is reference by the cache itself. If
  61. // an entry is in cache, its key would also be available in the hash map.
  62. // * Usage bit: whether the entry has been access by user since last
  63. // examine for eviction. Can be reset by eviction.
  64. // * Reference count: reference count by user.
  65. //
  66. // An entry can be reference only when it's in cache. An entry can be evicted
  67. // only when it is in cache, has no usage since last examine, and reference
  68. // count is zero.
  69. //
  70. // The follow figure shows a possible layout of the cache. Boxes represents
  71. // cache handles and numbers in each box being in-cache bit, usage bit and
  72. // reference count respectively.
  73. //
  74. // hash map:
  75. // +-------+--------+
  76. // | key | handle |
  77. // +-------+--------+
  78. // | "foo" | 5 |-------------------------------------+
  79. // +-------+--------+ |
  80. // | "bar" | 2 |--+ |
  81. // +-------+--------+ | |
  82. // | |
  83. // head | |
  84. // | | |
  85. // circular list: | | |
  86. // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
  87. // |(0,0,0)|---|(1,1,0)|---|(0,0,0)|---|(0,1,3)|---|(1,0,0)|---| ...
  88. // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
  89. // | |
  90. // +-------+ +-----------+
  91. // | |
  92. // +---+---+
  93. // recycle bin: | 1 | 3 |
  94. // +---+---+
  95. //
  96. // Suppose we try to insert "baz" into the cache at this point and the cache is
  97. // full. The cache will first look for entries to evict, starting from where
  98. // head points to (the second entry). It resets usage bit of the second entry,
  99. // skips the third and fourth entry since they are not in cache, and finally
  100. // evict the fifth entry ("foo"). It looks at recycle bin for available handle,
  101. // grabs handle 3, and insert the key into the handle. The following figure
  102. // shows the resulting layout.
  103. //
  104. // hash map:
  105. // +-------+--------+
  106. // | key | handle |
  107. // +-------+--------+
  108. // | "baz" | 3 |-------------+
  109. // +-------+--------+ |
  110. // | "bar" | 2 |--+ |
  111. // +-------+--------+ | |
  112. // | |
  113. // | | head
  114. // | | |
  115. // circular list: | | |
  116. // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
  117. // |(0,0,0)|---|(1,0,0)|---|(1,0,0)|---|(0,1,3)|---|(0,0,0)|---| ...
  118. // +-------+ +-------+ +-------+ +-------+ +-------+ +-------
  119. // | |
  120. // +-------+ +-----------------------------------+
  121. // | |
  122. // +---+---+
  123. // recycle bin: | 1 | 5 |
  124. // +---+---+
  125. //
  126. // A global mutex guards the circular list, the head, and the recycle bin.
  127. // We additionally require that modifying the hash map needs to hold the mutex.
  128. // As such, Modifying the cache (such as Insert() and Erase()) require to
  129. // hold the mutex. Lookup() only access the hash map and the flags associated
  130. // with each handle, and don't require explicit locking. Release() has to
  131. // acquire the mutex only when it releases the last reference to the entry and
  132. // the entry has been erased from cache explicitly. A future improvement could
  133. // be to remove the mutex completely.
  134. //
  135. // Benchmark:
  136. // We run readrandom db_bench on a test DB of size 13GB, with size of each
  137. // level:
  138. //
  139. // Level Files Size(MB)
  140. // -------------------------
  141. // L0 1 0.01
  142. // L1 18 17.32
  143. // L2 230 182.94
  144. // L3 1186 1833.63
  145. // L4 4602 8140.30
  146. //
  147. // We test with both 32 and 16 read threads, with 2GB cache size (the whole DB
  148. // doesn't fits in) and 64GB cache size (the whole DB can fit in cache), and
  149. // whether to put index and filter blocks in block cache. The benchmark runs
  150. // with
  151. // with RocksDB 4.10. We got the following result:
  152. //
  153. // Threads Cache Cache ClockCache LRUCache
  154. // Size Index/Filter Throughput(MB/s) Hit Throughput(MB/s) Hit
  155. // 32 2GB yes 466.7 85.9% 433.7 86.5%
  156. // 32 2GB no 529.9 72.7% 532.7 73.9%
  157. // 32 64GB yes 649.9 99.9% 507.9 99.9%
  158. // 32 64GB no 740.4 99.9% 662.8 99.9%
  159. // 16 2GB yes 278.4 85.9% 283.4 86.5%
  160. // 16 2GB no 318.6 72.7% 335.8 73.9%
  161. // 16 64GB yes 391.9 99.9% 353.3 99.9%
  162. // 16 64GB no 433.8 99.8% 419.4 99.8%
  163. // Cache entry meta data.
  164. struct CacheHandle {
  165. Slice key;
  166. uint32_t hash;
  167. void* value;
  168. size_t charge;
  169. void (*deleter)(const Slice&, void* value);
  170. // Flags and counters associated with the cache handle:
  171. // lowest bit: n-cache bit
  172. // second lowest bit: usage bit
  173. // the rest bits: reference count
  174. // The handle is unused when flags equals to 0. The thread decreases the count
  175. // to 0 is responsible to put the handle back to recycle_ and cleanup memory.
  176. std::atomic<uint32_t> flags;
  177. CacheHandle() = default;
  178. CacheHandle(const CacheHandle& a) { *this = a; }
  179. CacheHandle(const Slice& k, void* v,
  180. void (*del)(const Slice& key, void* value))
  181. : key(k), value(v), deleter(del) {}
  182. CacheHandle& operator=(const CacheHandle& a) {
  183. // Only copy members needed for deletion.
  184. key = a.key;
  185. value = a.value;
  186. deleter = a.deleter;
  187. return *this;
  188. }
  189. inline static size_t CalcTotalCharge(
  190. Slice key, size_t charge,
  191. CacheMetadataChargePolicy metadata_charge_policy) {
  192. size_t meta_charge = 0;
  193. if (metadata_charge_policy == kFullChargeCacheMetadata) {
  194. meta_charge += sizeof(CacheHandle);
  195. #ifdef ROCKSDB_MALLOC_USABLE_SIZE
  196. meta_charge +=
  197. malloc_usable_size(static_cast<void*>(const_cast<char*>(key.data())));
  198. #else
  199. meta_charge += key.size();
  200. #endif
  201. }
  202. return charge + meta_charge;
  203. }
  204. inline size_t CalcTotalCharge(
  205. CacheMetadataChargePolicy metadata_charge_policy) {
  206. return CalcTotalCharge(key, charge, metadata_charge_policy);
  207. }
  208. };
  209. // Key of hash map. We store hash value with the key for convenience.
  210. struct CacheKey {
  211. Slice key;
  212. uint32_t hash_value;
  213. CacheKey() = default;
  214. CacheKey(const Slice& k, uint32_t h) {
  215. key = k;
  216. hash_value = h;
  217. }
  218. static bool equal(const CacheKey& a, const CacheKey& b) {
  219. return a.hash_value == b.hash_value && a.key == b.key;
  220. }
  221. static size_t hash(const CacheKey& a) {
  222. return static_cast<size_t>(a.hash_value);
  223. }
  224. };
  225. struct CleanupContext {
  226. // List of values to be deleted, along with the key and deleter.
  227. autovector<CacheHandle> to_delete_value;
  228. // List of keys to be deleted.
  229. autovector<const char*> to_delete_key;
  230. };
  231. // A cache shard which maintains its own CLOCK cache.
  232. class ClockCacheShard final : public CacheShard {
  233. public:
  234. // Hash map type.
  235. typedef tbb::concurrent_hash_map<CacheKey, CacheHandle*, CacheKey> HashTable;
  236. ClockCacheShard();
  237. ~ClockCacheShard() override;
  238. // Interfaces
  239. void SetCapacity(size_t capacity) override;
  240. void SetStrictCapacityLimit(bool strict_capacity_limit) override;
  241. Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
  242. void (*deleter)(const Slice& key, void* value),
  243. Cache::Handle** handle, Cache::Priority priority) override;
  244. Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
  245. // If the entry in in cache, increase reference count and return true.
  246. // Return false otherwise.
  247. //
  248. // Not necessary to hold mutex_ before being called.
  249. bool Ref(Cache::Handle* handle) override;
  250. bool Release(Cache::Handle* handle, bool force_erase = false) override;
  251. void Erase(const Slice& key, uint32_t hash) override;
  252. bool EraseAndConfirm(const Slice& key, uint32_t hash,
  253. CleanupContext* context);
  254. size_t GetUsage() const override;
  255. size_t GetPinnedUsage() const override;
  256. void EraseUnRefEntries() override;
  257. void ApplyToAllCacheEntries(void (*callback)(void*, size_t),
  258. bool thread_safe) override;
  259. private:
  260. static const uint32_t kInCacheBit = 1;
  261. static const uint32_t kUsageBit = 2;
  262. static const uint32_t kRefsOffset = 2;
  263. static const uint32_t kOneRef = 1 << kRefsOffset;
  264. // Helper functions to extract cache handle flags and counters.
  265. static bool InCache(uint32_t flags) { return flags & kInCacheBit; }
  266. static bool HasUsage(uint32_t flags) { return flags & kUsageBit; }
  267. static uint32_t CountRefs(uint32_t flags) { return flags >> kRefsOffset; }
  268. // Decrease reference count of the entry. If this decreases the count to 0,
  269. // recycle the entry. If set_usage is true, also set the usage bit.
  270. //
  271. // returns true if a value is erased.
  272. //
  273. // Not necessary to hold mutex_ before being called.
  274. bool Unref(CacheHandle* handle, bool set_usage, CleanupContext* context);
  275. // Unset in-cache bit of the entry. Recycle the handle if necessary.
  276. //
  277. // returns true if a value is erased.
  278. //
  279. // Has to hold mutex_ before being called.
  280. bool UnsetInCache(CacheHandle* handle, CleanupContext* context);
  281. // Put the handle back to recycle_ list, and put the value associated with
  282. // it into to-be-deleted list. It doesn't cleanup the key as it might be
  283. // reused by another handle.
  284. //
  285. // Has to hold mutex_ before being called.
  286. void RecycleHandle(CacheHandle* handle, CleanupContext* context);
  287. // Delete keys and values in to-be-deleted list. Call the method without
  288. // holding mutex, as destructors can be expensive.
  289. void Cleanup(const CleanupContext& context);
  290. // Examine the handle for eviction. If the handle is in cache, usage bit is
  291. // not set, and referece count is 0, evict it from cache. Otherwise unset
  292. // the usage bit.
  293. //
  294. // Has to hold mutex_ before being called.
  295. bool TryEvict(CacheHandle* value, CleanupContext* context);
  296. // Scan through the circular list, evict entries until we get enough capacity
  297. // for new cache entry of specific size. Return true if success, false
  298. // otherwise.
  299. //
  300. // Has to hold mutex_ before being called.
  301. bool EvictFromCache(size_t charge, CleanupContext* context);
  302. CacheHandle* Insert(const Slice& key, uint32_t hash, void* value,
  303. size_t change,
  304. void (*deleter)(const Slice& key, void* value),
  305. bool hold_reference, CleanupContext* context);
  306. // Guards list_, head_, and recycle_. In addition, updating table_ also has
  307. // to hold the mutex, to avoid the cache being in inconsistent state.
  308. mutable port::Mutex mutex_;
  309. // The circular list of cache handles. Initially the list is empty. Once a
  310. // handle is needed by insertion, and no more handles are available in
  311. // recycle bin, one more handle is appended to the end.
  312. //
  313. // We use std::deque for the circular list because we want to make sure
  314. // pointers to handles are valid through out the life-cycle of the cache
  315. // (in contrast to std::vector), and be able to grow the list (in contrast
  316. // to statically allocated arrays).
  317. std::deque<CacheHandle> list_;
  318. // Pointer to the next handle in the circular list to be examine for
  319. // eviction.
  320. size_t head_;
  321. // Recycle bin of cache handles.
  322. autovector<CacheHandle*> recycle_;
  323. // Maximum cache size.
  324. std::atomic<size_t> capacity_;
  325. // Current total size of the cache.
  326. std::atomic<size_t> usage_;
  327. // Total un-released cache size.
  328. std::atomic<size_t> pinned_usage_;
  329. // Whether allow insert into cache if cache is full.
  330. std::atomic<bool> strict_capacity_limit_;
  331. // Hash table (tbb::concurrent_hash_map) for lookup.
  332. HashTable table_;
  333. };
  334. ClockCacheShard::ClockCacheShard()
  335. : head_(0), usage_(0), pinned_usage_(0), strict_capacity_limit_(false) {}
  336. ClockCacheShard::~ClockCacheShard() {
  337. for (auto& handle : list_) {
  338. uint32_t flags = handle.flags.load(std::memory_order_relaxed);
  339. if (InCache(flags) || CountRefs(flags) > 0) {
  340. if (handle.deleter != nullptr) {
  341. (*handle.deleter)(handle.key, handle.value);
  342. }
  343. delete[] handle.key.data();
  344. }
  345. }
  346. }
  347. size_t ClockCacheShard::GetUsage() const {
  348. return usage_.load(std::memory_order_relaxed);
  349. }
  350. size_t ClockCacheShard::GetPinnedUsage() const {
  351. return pinned_usage_.load(std::memory_order_relaxed);
  352. }
  353. void ClockCacheShard::ApplyToAllCacheEntries(void (*callback)(void*, size_t),
  354. bool thread_safe) {
  355. if (thread_safe) {
  356. mutex_.Lock();
  357. }
  358. for (auto& handle : list_) {
  359. // Use relaxed semantics instead of acquire semantics since we are either
  360. // holding mutex, or don't have thread safe requirement.
  361. uint32_t flags = handle.flags.load(std::memory_order_relaxed);
  362. if (InCache(flags)) {
  363. callback(handle.value, handle.charge);
  364. }
  365. }
  366. if (thread_safe) {
  367. mutex_.Unlock();
  368. }
  369. }
  370. void ClockCacheShard::RecycleHandle(CacheHandle* handle,
  371. CleanupContext* context) {
  372. mutex_.AssertHeld();
  373. assert(!InCache(handle->flags) && CountRefs(handle->flags) == 0);
  374. context->to_delete_key.push_back(handle->key.data());
  375. context->to_delete_value.emplace_back(*handle);
  376. size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
  377. handle->key.clear();
  378. handle->value = nullptr;
  379. handle->deleter = nullptr;
  380. recycle_.push_back(handle);
  381. usage_.fetch_sub(total_charge, std::memory_order_relaxed);
  382. }
  383. void ClockCacheShard::Cleanup(const CleanupContext& context) {
  384. for (const CacheHandle& handle : context.to_delete_value) {
  385. if (handle.deleter) {
  386. (*handle.deleter)(handle.key, handle.value);
  387. }
  388. }
  389. for (const char* key : context.to_delete_key) {
  390. delete[] key;
  391. }
  392. }
  393. bool ClockCacheShard::Ref(Cache::Handle* h) {
  394. auto handle = reinterpret_cast<CacheHandle*>(h);
  395. // CAS loop to increase reference count.
  396. uint32_t flags = handle->flags.load(std::memory_order_relaxed);
  397. while (InCache(flags)) {
  398. // Use acquire semantics on success, as further operations on the cache
  399. // entry has to be order after reference count is increased.
  400. if (handle->flags.compare_exchange_weak(flags, flags + kOneRef,
  401. std::memory_order_acquire,
  402. std::memory_order_relaxed)) {
  403. if (CountRefs(flags) == 0) {
  404. // No reference count before the operation.
  405. size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
  406. pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
  407. }
  408. return true;
  409. }
  410. }
  411. return false;
  412. }
  413. bool ClockCacheShard::Unref(CacheHandle* handle, bool set_usage,
  414. CleanupContext* context) {
  415. if (set_usage) {
  416. handle->flags.fetch_or(kUsageBit, std::memory_order_relaxed);
  417. }
  418. // Use acquire-release semantics as previous operations on the cache entry
  419. // has to be order before reference count is decreased, and potential cleanup
  420. // of the entry has to be order after.
  421. uint32_t flags = handle->flags.fetch_sub(kOneRef, std::memory_order_acq_rel);
  422. assert(CountRefs(flags) > 0);
  423. if (CountRefs(flags) == 1) {
  424. // this is the last reference.
  425. size_t total_charge = handle->CalcTotalCharge(metadata_charge_policy_);
  426. pinned_usage_.fetch_sub(total_charge, std::memory_order_relaxed);
  427. // Cleanup if it is the last reference.
  428. if (!InCache(flags)) {
  429. MutexLock l(&mutex_);
  430. RecycleHandle(handle, context);
  431. }
  432. }
  433. return context->to_delete_value.size();
  434. }
  435. bool ClockCacheShard::UnsetInCache(CacheHandle* handle,
  436. CleanupContext* context) {
  437. mutex_.AssertHeld();
  438. // Use acquire-release semantics as previous operations on the cache entry
  439. // has to be order before reference count is decreased, and potential cleanup
  440. // of the entry has to be order after.
  441. uint32_t flags =
  442. handle->flags.fetch_and(~kInCacheBit, std::memory_order_acq_rel);
  443. // Cleanup if it is the last reference.
  444. if (InCache(flags) && CountRefs(flags) == 0) {
  445. RecycleHandle(handle, context);
  446. }
  447. return context->to_delete_value.size();
  448. }
  449. bool ClockCacheShard::TryEvict(CacheHandle* handle, CleanupContext* context) {
  450. mutex_.AssertHeld();
  451. uint32_t flags = kInCacheBit;
  452. if (handle->flags.compare_exchange_strong(flags, 0, std::memory_order_acquire,
  453. std::memory_order_relaxed)) {
  454. bool erased __attribute__((__unused__)) =
  455. table_.erase(CacheKey(handle->key, handle->hash));
  456. assert(erased);
  457. RecycleHandle(handle, context);
  458. return true;
  459. }
  460. handle->flags.fetch_and(~kUsageBit, std::memory_order_relaxed);
  461. return false;
  462. }
  463. bool ClockCacheShard::EvictFromCache(size_t charge, CleanupContext* context) {
  464. size_t usage = usage_.load(std::memory_order_relaxed);
  465. size_t capacity = capacity_.load(std::memory_order_relaxed);
  466. if (usage == 0) {
  467. return charge <= capacity;
  468. }
  469. size_t new_head = head_;
  470. bool second_iteration = false;
  471. while (usage + charge > capacity) {
  472. assert(new_head < list_.size());
  473. if (TryEvict(&list_[new_head], context)) {
  474. usage = usage_.load(std::memory_order_relaxed);
  475. }
  476. new_head = (new_head + 1 >= list_.size()) ? 0 : new_head + 1;
  477. if (new_head == head_) {
  478. if (second_iteration) {
  479. return false;
  480. } else {
  481. second_iteration = true;
  482. }
  483. }
  484. }
  485. head_ = new_head;
  486. return true;
  487. }
  488. void ClockCacheShard::SetCapacity(size_t capacity) {
  489. CleanupContext context;
  490. {
  491. MutexLock l(&mutex_);
  492. capacity_.store(capacity, std::memory_order_relaxed);
  493. EvictFromCache(0, &context);
  494. }
  495. Cleanup(context);
  496. }
  497. void ClockCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
  498. strict_capacity_limit_.store(strict_capacity_limit,
  499. std::memory_order_relaxed);
  500. }
  501. CacheHandle* ClockCacheShard::Insert(
  502. const Slice& key, uint32_t hash, void* value, size_t charge,
  503. void (*deleter)(const Slice& key, void* value), bool hold_reference,
  504. CleanupContext* context) {
  505. size_t total_charge =
  506. CacheHandle::CalcTotalCharge(key, charge, metadata_charge_policy_);
  507. MutexLock l(&mutex_);
  508. bool success = EvictFromCache(total_charge, context);
  509. bool strict = strict_capacity_limit_.load(std::memory_order_relaxed);
  510. if (!success && (strict || !hold_reference)) {
  511. context->to_delete_key.push_back(key.data());
  512. if (!hold_reference) {
  513. context->to_delete_value.emplace_back(key, value, deleter);
  514. }
  515. return nullptr;
  516. }
  517. // Grab available handle from recycle bin. If recycle bin is empty, create
  518. // and append new handle to end of circular list.
  519. CacheHandle* handle = nullptr;
  520. if (!recycle_.empty()) {
  521. handle = recycle_.back();
  522. recycle_.pop_back();
  523. } else {
  524. list_.emplace_back();
  525. handle = &list_.back();
  526. }
  527. // Fill handle.
  528. handle->key = key;
  529. handle->hash = hash;
  530. handle->value = value;
  531. handle->charge = charge;
  532. handle->deleter = deleter;
  533. uint32_t flags = hold_reference ? kInCacheBit + kOneRef : kInCacheBit;
  534. handle->flags.store(flags, std::memory_order_relaxed);
  535. HashTable::accessor accessor;
  536. if (table_.find(accessor, CacheKey(key, hash))) {
  537. CacheHandle* existing_handle = accessor->second;
  538. table_.erase(accessor);
  539. UnsetInCache(existing_handle, context);
  540. }
  541. table_.insert(HashTable::value_type(CacheKey(key, hash), handle));
  542. if (hold_reference) {
  543. pinned_usage_.fetch_add(total_charge, std::memory_order_relaxed);
  544. }
  545. usage_.fetch_add(total_charge, std::memory_order_relaxed);
  546. return handle;
  547. }
  548. Status ClockCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
  549. size_t charge,
  550. void (*deleter)(const Slice& key, void* value),
  551. Cache::Handle** out_handle,
  552. Cache::Priority /*priority*/) {
  553. CleanupContext context;
  554. HashTable::accessor accessor;
  555. char* key_data = new char[key.size()];
  556. memcpy(key_data, key.data(), key.size());
  557. Slice key_copy(key_data, key.size());
  558. CacheHandle* handle = Insert(key_copy, hash, value, charge, deleter,
  559. out_handle != nullptr, &context);
  560. Status s;
  561. if (out_handle != nullptr) {
  562. if (handle == nullptr) {
  563. s = Status::Incomplete("Insert failed due to LRU cache being full.");
  564. } else {
  565. *out_handle = reinterpret_cast<Cache::Handle*>(handle);
  566. }
  567. }
  568. Cleanup(context);
  569. return s;
  570. }
  571. Cache::Handle* ClockCacheShard::Lookup(const Slice& key, uint32_t hash) {
  572. HashTable::const_accessor accessor;
  573. if (!table_.find(accessor, CacheKey(key, hash))) {
  574. return nullptr;
  575. }
  576. CacheHandle* handle = accessor->second;
  577. accessor.release();
  578. // Ref() could fail if another thread sneak in and evict/erase the cache
  579. // entry before we are able to hold reference.
  580. if (!Ref(reinterpret_cast<Cache::Handle*>(handle))) {
  581. return nullptr;
  582. }
  583. // Double check the key since the handle may now representing another key
  584. // if other threads sneak in, evict/erase the entry and re-used the handle
  585. // for another cache entry.
  586. if (hash != handle->hash || key != handle->key) {
  587. CleanupContext context;
  588. Unref(handle, false, &context);
  589. // It is possible Unref() delete the entry, so we need to cleanup.
  590. Cleanup(context);
  591. return nullptr;
  592. }
  593. return reinterpret_cast<Cache::Handle*>(handle);
  594. }
  595. bool ClockCacheShard::Release(Cache::Handle* h, bool force_erase) {
  596. CleanupContext context;
  597. CacheHandle* handle = reinterpret_cast<CacheHandle*>(h);
  598. bool erased = Unref(handle, true, &context);
  599. if (force_erase && !erased) {
  600. erased = EraseAndConfirm(handle->key, handle->hash, &context);
  601. }
  602. Cleanup(context);
  603. return erased;
  604. }
  605. void ClockCacheShard::Erase(const Slice& key, uint32_t hash) {
  606. CleanupContext context;
  607. EraseAndConfirm(key, hash, &context);
  608. Cleanup(context);
  609. }
  610. bool ClockCacheShard::EraseAndConfirm(const Slice& key, uint32_t hash,
  611. CleanupContext* context) {
  612. MutexLock l(&mutex_);
  613. HashTable::accessor accessor;
  614. bool erased = false;
  615. if (table_.find(accessor, CacheKey(key, hash))) {
  616. CacheHandle* handle = accessor->second;
  617. table_.erase(accessor);
  618. erased = UnsetInCache(handle, context);
  619. }
  620. return erased;
  621. }
  622. void ClockCacheShard::EraseUnRefEntries() {
  623. CleanupContext context;
  624. {
  625. MutexLock l(&mutex_);
  626. table_.clear();
  627. for (auto& handle : list_) {
  628. UnsetInCache(&handle, &context);
  629. }
  630. }
  631. Cleanup(context);
  632. }
  633. class ClockCache final : public ShardedCache {
  634. public:
  635. ClockCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
  636. CacheMetadataChargePolicy metadata_charge_policy)
  637. : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
  638. int num_shards = 1 << num_shard_bits;
  639. shards_ = new ClockCacheShard[num_shards];
  640. for (int i = 0; i < num_shards; i++) {
  641. shards_[i].set_metadata_charge_policy(metadata_charge_policy);
  642. }
  643. SetCapacity(capacity);
  644. SetStrictCapacityLimit(strict_capacity_limit);
  645. }
  646. ~ClockCache() override { delete[] shards_; }
  647. const char* Name() const override { return "ClockCache"; }
  648. CacheShard* GetShard(int shard) override {
  649. return reinterpret_cast<CacheShard*>(&shards_[shard]);
  650. }
  651. const CacheShard* GetShard(int shard) const override {
  652. return reinterpret_cast<CacheShard*>(&shards_[shard]);
  653. }
  654. void* Value(Handle* handle) override {
  655. return reinterpret_cast<const CacheHandle*>(handle)->value;
  656. }
  657. size_t GetCharge(Handle* handle) const override {
  658. return reinterpret_cast<const CacheHandle*>(handle)->charge;
  659. }
  660. uint32_t GetHash(Handle* handle) const override {
  661. return reinterpret_cast<const CacheHandle*>(handle)->hash;
  662. }
  663. void DisownData() override { shards_ = nullptr; }
  664. private:
  665. ClockCacheShard* shards_;
  666. };
  667. } // end anonymous namespace
  668. std::shared_ptr<Cache> NewClockCache(
  669. size_t capacity, int num_shard_bits, bool strict_capacity_limit,
  670. CacheMetadataChargePolicy metadata_charge_policy) {
  671. if (num_shard_bits < 0) {
  672. num_shard_bits = GetDefaultCacheShardBits(capacity);
  673. }
  674. return std::make_shared<ClockCache>(
  675. capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
  676. }
  677. } // namespace ROCKSDB_NAMESPACE
  678. #endif // SUPPORT_CLOCK_CACHE