| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- // Copyright (c) 2013, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- #pragma once
- #ifndef ROCKSDB_LITE
- #include <assert.h>
- #include <list>
- #include <vector>
- #ifdef OS_LINUX
- #include <sys/mman.h>
- #endif
- #include "rocksdb/env.h"
- #include "util/mutexlock.h"
- namespace ROCKSDB_NAMESPACE {
- // HashTable<T, Hash, Equal>
- //
- // Traditional implementation of hash table with synchronization built on top
- // don't perform very well in multi-core scenarios. This is an implementation
- // designed for multi-core scenarios with high lock contention.
- //
- // |<-------- alpha ------------->|
- // Buckets Collision list
- // ---- +----+ +---+---+--- ...... ---+---+---+
- // / | |--->| | | | | |
- // / +----+ +---+---+--- ...... ---+---+---+
- // / | |
- // Locks/ +----+
- // +--+/ . .
- // | | . .
- // +--+ . .
- // | | . .
- // +--+ . .
- // | | . .
- // +--+ . .
- // \ +----+
- // \ | |
- // \ +----+
- // \ | |
- // \---- +----+
- //
- // The lock contention is spread over an array of locks. This helps improve
- // concurrent access. The spine is designed for a certain capacity and load
- // factor. When the capacity planning is done correctly we can expect
- // O(load_factor = 1) insert, access and remove time.
- //
- // Micro benchmark on debug build gives about .5 Million/sec rate of insert,
- // erase and lookup in parallel (total of about 1.5 Million ops/sec). If the
- // blocks were of 4K, the hash table can support a virtual throughput of
- // 6 GB/s.
- //
- // T Object type (contains both key and value)
- // Hash Function that returns an hash from type T
- // Equal Returns if two objects are equal
- // (We need explicit equal for pointer type)
- //
- template <class T, class Hash, class Equal>
- class HashTable {
- public:
- explicit HashTable(const size_t capacity = 1024 * 1024,
- const float load_factor = 2.0, const uint32_t nlocks = 256)
- : nbuckets_(
- static_cast<uint32_t>(load_factor ? capacity / load_factor : 0)),
- nlocks_(nlocks) {
- // pre-conditions
- assert(capacity);
- assert(load_factor);
- assert(nbuckets_);
- assert(nlocks_);
- buckets_.reset(new Bucket[nbuckets_]);
- #ifdef OS_LINUX
- mlock(buckets_.get(), nbuckets_ * sizeof(Bucket));
- #endif
- // initialize locks
- locks_.reset(new port::RWMutex[nlocks_]);
- #ifdef OS_LINUX
- mlock(locks_.get(), nlocks_ * sizeof(port::RWMutex));
- #endif
- // post-conditions
- assert(buckets_);
- assert(locks_);
- }
- virtual ~HashTable() { AssertEmptyBuckets(); }
- //
- // Insert given record to hash table
- //
- bool Insert(const T& t) {
- const uint64_t h = Hash()(t);
- const uint32_t bucket_idx = h % nbuckets_;
- const uint32_t lock_idx = bucket_idx % nlocks_;
- WriteLock _(&locks_[lock_idx]);
- auto& bucket = buckets_[bucket_idx];
- return Insert(&bucket, t);
- }
- // Lookup hash table
- //
- // Please note that read lock should be held by the caller. This is because
- // the caller owns the data, and should hold the read lock as long as he
- // operates on the data.
- bool Find(const T& t, T* ret, port::RWMutex** ret_lock) {
- const uint64_t h = Hash()(t);
- const uint32_t bucket_idx = h % nbuckets_;
- const uint32_t lock_idx = bucket_idx % nlocks_;
- port::RWMutex& lock = locks_[lock_idx];
- lock.ReadLock();
- auto& bucket = buckets_[bucket_idx];
- if (Find(&bucket, t, ret)) {
- *ret_lock = &lock;
- return true;
- }
- lock.ReadUnlock();
- return false;
- }
- //
- // Erase a given key from the hash table
- //
- bool Erase(const T& t, T* ret) {
- const uint64_t h = Hash()(t);
- const uint32_t bucket_idx = h % nbuckets_;
- const uint32_t lock_idx = bucket_idx % nlocks_;
- WriteLock _(&locks_[lock_idx]);
- auto& bucket = buckets_[bucket_idx];
- return Erase(&bucket, t, ret);
- }
- // Fetch the mutex associated with a key
- // This call is used to hold the lock for a given data for extended period of
- // time.
- port::RWMutex* GetMutex(const T& t) {
- const uint64_t h = Hash()(t);
- const uint32_t bucket_idx = h % nbuckets_;
- const uint32_t lock_idx = bucket_idx % nlocks_;
- return &locks_[lock_idx];
- }
- void Clear(void (*fn)(T)) {
- for (uint32_t i = 0; i < nbuckets_; ++i) {
- const uint32_t lock_idx = i % nlocks_;
- WriteLock _(&locks_[lock_idx]);
- for (auto& t : buckets_[i].list_) {
- (*fn)(t);
- }
- buckets_[i].list_.clear();
- }
- }
- protected:
- // Models bucket of keys that hash to the same bucket number
- struct Bucket {
- std::list<T> list_;
- };
- // Substitute for std::find with custom comparator operator
- typename std::list<T>::iterator Find(std::list<T>* list, const T& t) {
- for (auto it = list->begin(); it != list->end(); ++it) {
- if (Equal()(*it, t)) {
- return it;
- }
- }
- return list->end();
- }
- bool Insert(Bucket* bucket, const T& t) {
- // Check if the key already exists
- auto it = Find(&bucket->list_, t);
- if (it != bucket->list_.end()) {
- return false;
- }
- // insert to bucket
- bucket->list_.push_back(t);
- return true;
- }
- bool Find(Bucket* bucket, const T& t, T* ret) {
- auto it = Find(&bucket->list_, t);
- if (it != bucket->list_.end()) {
- if (ret) {
- *ret = *it;
- }
- return true;
- }
- return false;
- }
- bool Erase(Bucket* bucket, const T& t, T* ret) {
- auto it = Find(&bucket->list_, t);
- if (it != bucket->list_.end()) {
- if (ret) {
- *ret = *it;
- }
- bucket->list_.erase(it);
- return true;
- }
- return false;
- }
- // assert that all buckets are empty
- void AssertEmptyBuckets() {
- #ifndef NDEBUG
- for (size_t i = 0; i < nbuckets_; ++i) {
- WriteLock _(&locks_[i % nlocks_]);
- assert(buckets_[i].list_.empty());
- }
- #endif
- }
- const uint32_t nbuckets_; // No. of buckets in the spine
- std::unique_ptr<Bucket[]> buckets_; // Spine of the hash buckets
- const uint32_t nlocks_; // No. of locks
- std::unique_ptr<port::RWMutex[]> locks_; // Granular locks
- };
- } // namespace ROCKSDB_NAMESPACE
- #endif
|