hash_table.h 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. // Copyright (c) 2013, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #pragma once
  7. #ifndef ROCKSDB_LITE
  8. #include <assert.h>
  9. #include <list>
  10. #include <vector>
  11. #ifdef OS_LINUX
  12. #include <sys/mman.h>
  13. #endif
  14. #include "rocksdb/env.h"
  15. #include "util/mutexlock.h"
  16. namespace ROCKSDB_NAMESPACE {
  17. // HashTable<T, Hash, Equal>
  18. //
  19. // Traditional implementation of hash table with synchronization built on top
  20. // don't perform very well in multi-core scenarios. This is an implementation
  21. // designed for multi-core scenarios with high lock contention.
  22. //
  23. // |<-------- alpha ------------->|
  24. // Buckets Collision list
  25. // ---- +----+ +---+---+--- ...... ---+---+---+
  26. // / | |--->| | | | | |
  27. // / +----+ +---+---+--- ...... ---+---+---+
  28. // / | |
  29. // Locks/ +----+
  30. // +--+/ . .
  31. // | | . .
  32. // +--+ . .
  33. // | | . .
  34. // +--+ . .
  35. // | | . .
  36. // +--+ . .
  37. // \ +----+
  38. // \ | |
  39. // \ +----+
  40. // \ | |
  41. // \---- +----+
  42. //
  43. // The lock contention is spread over an array of locks. This helps improve
  44. // concurrent access. The spine is designed for a certain capacity and load
  45. // factor. When the capacity planning is done correctly we can expect
  46. // O(load_factor = 1) insert, access and remove time.
  47. //
  48. // Micro benchmark on debug build gives about .5 Million/sec rate of insert,
  49. // erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
  50. // blocks were of 4K, the hash table can support a virtual throughput of
  51. // 6 GB/s.
  52. //
  53. // T Object type (contains both key and value)
  54. // Hash Function that returns an hash from type T
  55. // Equal Returns if two objects are equal
  56. // (We need explicit equal for pointer type)
  57. //
  58. template <class T, class Hash, class Equal>
  59. class HashTable {
  60. public:
  61. explicit HashTable(const size_t capacity = 1024 * 1024,
  62. const float load_factor = 2.0, const uint32_t nlocks = 256)
  63. : nbuckets_(
  64. static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
  65. nlocks_(nlocks) {
  66. // pre-conditions
  67. assert(capacity);
  68. assert(load_factor);
  69. assert(nbuckets_);
  70. assert(nlocks_);
  71. buckets_.reset(new Bucket[nbuckets_]);
  72. #ifdef OS_LINUX
  73. mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
  74. #endif
  75. // initialize locks
  76. locks_.reset(new port::RWMutex[nlocks_]);
  77. #ifdef OS_LINUX
  78. mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
  79. #endif
  80. // post-conditions
  81. assert(buckets_);
  82. assert(locks_);
  83. }
  84. virtual ~HashTable() { AssertEmptyBuckets(); }
  85. //
  86. // Insert given record to hash table
  87. //
  88. bool Insert(const T& t) {
  89. const uint64_t h = Hash()(t);
  90. const uint32_t bucket_idx = h % nbuckets_;
  91. const uint32_t lock_idx = bucket_idx % nlocks_;
  92. WriteLock _(&locks_[lock_idx]);
  93. auto& bucket = buckets_[bucket_idx];
  94. return Insert(&bucket, t);
  95. }
  96. // Lookup hash table
  97. //
  98. // Please note that read lock should be held by the caller. This is because
  99. // the caller owns the data, and should hold the read lock as long as he
  100. // operates on the data.
  101. bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
  102. const uint64_t h = Hash()(t);
  103. const uint32_t bucket_idx = h % nbuckets_;
  104. const uint32_t lock_idx = bucket_idx % nlocks_;
  105. port::RWMutex& lock = locks_[lock_idx];
  106. lock.ReadLock();
  107. auto& bucket = buckets_[bucket_idx];
  108. if (Find(&bucket, t, ret)) {
  109. *ret_lock = &lock;
  110. return true;
  111. }
  112. lock.ReadUnlock();
  113. return false;
  114. }
  115. //
  116. // Erase a given key from the hash table
  117. //
  118. bool Erase(const T& t, T* ret) {
  119. const uint64_t h = Hash()(t);
  120. const uint32_t bucket_idx = h % nbuckets_;
  121. const uint32_t lock_idx = bucket_idx % nlocks_;
  122. WriteLock _(&locks_[lock_idx]);
  123. auto& bucket = buckets_[bucket_idx];
  124. return Erase(&bucket, t, ret);
  125. }
  126. // Fetch the mutex associated with a key
  127. // This call is used to hold the lock for a given data for extended period of
  128. // time.
  129. port::RWMutex* GetMutex(const T& t) {
  130. const uint64_t h = Hash()(t);
  131. const uint32_t bucket_idx = h % nbuckets_;
  132. const uint32_t lock_idx = bucket_idx % nlocks_;
  133. return &locks_[lock_idx];
  134. }
  135. void Clear(void (*fn)(T)) {
  136. for (uint32_t i = 0; i < nbuckets_; ++i) {
  137. const uint32_t lock_idx = i % nlocks_;
  138. WriteLock _(&locks_[lock_idx]);
  139. for (auto& t : buckets_[i].list_) {
  140. (*fn)(t);
  141. }
  142. buckets_[i].list_.clear();
  143. }
  144. }
  145. protected:
  146. // Models bucket of keys that hash to the same bucket number
  147. struct Bucket {
  148. std::list<T> list_;
  149. };
  150. // Substitute for std::find with custom comparator operator
  151. typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
  152. for (auto it = list->begin(); it != list->end(); ++it) {
  153. if (Equal()(*it, t)) {
  154. return it;
  155. }
  156. }
  157. return list->end();
  158. }
  159. bool Insert(Bucket* bucket, const T& t) {
  160. // Check if the key already exists
  161. auto it = Find(&bucket->list_, t);
  162. if (it != bucket->list_.end()) {
  163. return false;
  164. }
  165. // insert to bucket
  166. bucket->list_.push_back(t);
  167. return true;
  168. }
  169. bool Find(Bucket* bucket, const T& t, T* ret) {
  170. auto it = Find(&bucket->list_, t);
  171. if (it != bucket->list_.end()) {
  172. if (ret) {
  173. *ret = *it;
  174. }
  175. return true;
  176. }
  177. return false;
  178. }
  179. bool Erase(Bucket* bucket, const T& t, T* ret) {
  180. auto it = Find(&bucket->list_, t);
  181. if (it != bucket->list_.end()) {
  182. if (ret) {
  183. *ret = *it;
  184. }
  185. bucket->list_.erase(it);
  186. return true;
  187. }
  188. return false;
  189. }
  190. // assert that all buckets are empty
  191. void AssertEmptyBuckets() {
  192. #ifndef NDEBUG
  193. for (size_t i = 0; i < nbuckets_; ++i) {
  194. WriteLock _(&locks_[i % nlocks_]);
  195. assert(buckets_[i].list_.empty());
  196. }
  197. #endif
  198. }
  199. const uint32_t nbuckets_; // No. of buckets in the spine
  200. std::unique_ptr<Bucket[]> buckets_; // Spine of the hash buckets
  201. const uint32_t nlocks_; // No. of locks
  202. std::unique_ptr<port::RWMutex[]> locks_; // Granular locks
  203. };
  204. } // namespace ROCKSDB_NAMESPACE
  205. #endif