hash_table.h 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. // Copyright (c) 2013, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #pragma once
  7. #include <assert.h>
  8. #include <list>
  9. #include <vector>
  10. #ifdef OS_LINUX
  11. #include <sys/mman.h>
  12. #endif
  13. #include "rocksdb/env.h"
  14. #include "util/mutexlock.h"
  15. namespace ROCKSDB_NAMESPACE {
  16. // HashTable<T, Hash, Equal>
  17. //
  18. // Traditional implementation of hash table with synchronization built on top
  19. // don't perform very well in multi-core scenarios. This is an implementation
  20. // designed for multi-core scenarios with high lock contention.
  21. //
  22. // |<-------- alpha ------------->|
  23. // Buckets Collision list
  24. // ---- +----+ +---+---+--- ...... ---+---+---+
  25. // / | |--->| | | | | |
  26. // / +----+ +---+---+--- ...... ---+---+---+
  27. // / | |
  28. // Locks/ +----+
  29. // +--+/ . .
  30. // | | . .
  31. // +--+ . .
  32. // | | . .
  33. // +--+ . .
  34. // | | . .
  35. // +--+ . .
  36. // \ +----+
  37. // \ | |
  38. // \ +----+
  39. // \ | |
  40. // \---- +----+
  41. //
  42. // The lock contention is spread over an array of locks. This helps improve
  43. // concurrent access. The spine is designed for a certain capacity and load
  44. // factor. When the capacity planning is done correctly we can expect
  45. // O(load_factor = 1) insert, access and remove time.
  46. //
  47. // Micro benchmark on debug build gives about .5 Million/sec rate of insert,
  48. // erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
  49. // blocks were of 4K, the hash table can support a virtual throughput of
  50. // 6 GB/s.
  51. //
  52. // T Object type (contains both key and value)
  53. // Hash Function that returns an hash from type T
  54. // Equal Returns if two objects are equal
  55. // (We need explicit equal for pointer type)
  56. //
  57. template <class T, class Hash, class Equal>
  58. class HashTable {
  59. public:
  60. explicit HashTable(const size_t capacity = 1024 * 1024,
  61. const float load_factor = 2.0, const uint32_t nlocks = 256)
  62. : nbuckets_(
  63. static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
  64. nlocks_(nlocks) {
  65. // pre-conditions
  66. assert(capacity);
  67. assert(load_factor);
  68. assert(nbuckets_);
  69. assert(nlocks_);
  70. buckets_.reset(new Bucket[nbuckets_]);
  71. #ifdef OS_LINUX
  72. mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
  73. #endif
  74. // initialize locks
  75. locks_.reset(new port::RWMutex[nlocks_]);
  76. #ifdef OS_LINUX
  77. mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
  78. #endif
  79. // post-conditions
  80. assert(buckets_);
  81. assert(locks_);
  82. }
  83. virtual ~HashTable() { AssertEmptyBuckets(); }
  84. //
  85. // Insert given record to hash table
  86. //
  87. bool Insert(const T& t) {
  88. const uint64_t h = Hash()(t);
  89. const uint32_t bucket_idx = h % nbuckets_;
  90. const uint32_t lock_idx = bucket_idx % nlocks_;
  91. WriteLock _(&locks_[lock_idx]);
  92. auto& bucket = buckets_[bucket_idx];
  93. return Insert(&bucket, t);
  94. }
  95. // Lookup hash table
  96. //
  97. // Please note that read lock should be held by the caller. This is because
  98. // the caller owns the data, and should hold the read lock as long as he
  99. // operates on the data.
  100. bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
  101. const uint64_t h = Hash()(t);
  102. const uint32_t bucket_idx = h % nbuckets_;
  103. const uint32_t lock_idx = bucket_idx % nlocks_;
  104. port::RWMutex& lock = locks_[lock_idx];
  105. lock.ReadLock();
  106. auto& bucket = buckets_[bucket_idx];
  107. if (Find(&bucket, t, ret)) {
  108. *ret_lock = &lock;
  109. return true;
  110. }
  111. lock.ReadUnlock();
  112. return false;
  113. }
  114. //
  115. // Erase a given key from the hash table
  116. //
  117. bool Erase(const T& t, T* ret) {
  118. const uint64_t h = Hash()(t);
  119. const uint32_t bucket_idx = h % nbuckets_;
  120. const uint32_t lock_idx = bucket_idx % nlocks_;
  121. WriteLock _(&locks_[lock_idx]);
  122. auto& bucket = buckets_[bucket_idx];
  123. return Erase(&bucket, t, ret);
  124. }
  125. // Fetch the mutex associated with a key
  126. // This call is used to hold the lock for a given data for extended period of
  127. // time.
  128. port::RWMutex* GetMutex(const T& t) {
  129. const uint64_t h = Hash()(t);
  130. const uint32_t bucket_idx = h % nbuckets_;
  131. const uint32_t lock_idx = bucket_idx % nlocks_;
  132. return &locks_[lock_idx];
  133. }
  134. void Clear(void (*fn)(T)) {
  135. for (uint32_t i = 0; i < nbuckets_; ++i) {
  136. const uint32_t lock_idx = i % nlocks_;
  137. WriteLock _(&locks_[lock_idx]);
  138. for (auto& t : buckets_[i].list_) {
  139. (*fn)(t);
  140. }
  141. buckets_[i].list_.clear();
  142. }
  143. }
  144. protected:
  145. // Models bucket of keys that hash to the same bucket number
  146. struct Bucket {
  147. std::list<T> list_;
  148. };
  149. // Substitute for std::find with custom comparator operator
  150. typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
  151. for (auto it = list->begin(); it != list->end(); ++it) {
  152. if (Equal()(*it, t)) {
  153. return it;
  154. }
  155. }
  156. return list->end();
  157. }
  158. bool Insert(Bucket* bucket, const T& t) {
  159. // Check if the key already exists
  160. auto it = Find(&bucket->list_, t);
  161. if (it != bucket->list_.end()) {
  162. return false;
  163. }
  164. // insert to bucket
  165. bucket->list_.push_back(t);
  166. return true;
  167. }
  168. bool Find(Bucket* bucket, const T& t, T* ret) {
  169. auto it = Find(&bucket->list_, t);
  170. if (it != bucket->list_.end()) {
  171. if (ret) {
  172. *ret = *it;
  173. }
  174. return true;
  175. }
  176. return false;
  177. }
  178. bool Erase(Bucket* bucket, const T& t, T* ret) {
  179. auto it = Find(&bucket->list_, t);
  180. if (it != bucket->list_.end()) {
  181. if (ret) {
  182. *ret = *it;
  183. }
  184. bucket->list_.erase(it);
  185. return true;
  186. }
  187. return false;
  188. }
  189. // assert that all buckets are empty
  190. void AssertEmptyBuckets() {
  191. #ifndef NDEBUG
  192. for (size_t i = 0; i < nbuckets_; ++i) {
  193. WriteLock _(&locks_[i % nlocks_]);
  194. assert(buckets_[i].list_.empty());
  195. }
  196. #endif
  197. }
  198. const uint32_t nbuckets_; // No. of buckets in the spine
  199. std::unique_ptr<Bucket[]> buckets_; // Spine of the hash buckets
  200. const uint32_t nlocks_; // No. of locks
  201. std::unique_ptr<port::RWMutex[]> locks_; // Granular locks
  202. };
  203. } // namespace ROCKSDB_NAMESPACE