hash_skiplist_rep.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #include <atomic>
  7. #include "db/memtable.h"
  8. #include "memory/arena.h"
  9. #include "memtable/skiplist.h"
  10. #include "port/port.h"
  11. #include "rocksdb/memtablerep.h"
  12. #include "rocksdb/slice.h"
  13. #include "rocksdb/slice_transform.h"
  14. #include "rocksdb/utilities/options_type.h"
  15. #include "util/murmurhash.h"
  16. namespace ROCKSDB_NAMESPACE {
  17. namespace {
  18. class HashSkipListRep : public MemTableRep {
  19. public:
  20. HashSkipListRep(const MemTableRep::KeyComparator& compare,
  21. Allocator* allocator, const SliceTransform* transform,
  22. size_t bucket_size, int32_t skiplist_height,
  23. int32_t skiplist_branching_factor);
  24. void Insert(KeyHandle handle) override;
  25. bool Contains(const char* key) const override;
  26. size_t ApproximateMemoryUsage() override;
  27. void Get(const LookupKey& k, void* callback_args,
  28. bool (*callback_func)(void* arg, const char* entry)) override;
  29. ~HashSkipListRep() override;
  30. MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override;
  31. MemTableRep::Iterator* GetDynamicPrefixIterator(
  32. Arena* arena = nullptr) override;
  33. private:
  34. friend class DynamicIterator;
  35. using Bucket = SkipList<const char*, const MemTableRep::KeyComparator&>;
  36. size_t bucket_size_;
  37. const int32_t skiplist_height_;
  38. const int32_t skiplist_branching_factor_;
  39. // Maps slices (which are transformed user keys) to buckets of keys sharing
  40. // the same transform.
  41. std::atomic<Bucket*>* buckets_;
  42. // The user-supplied transform whose domain is the user keys.
  43. const SliceTransform* transform_;
  44. const MemTableRep::KeyComparator& compare_;
  45. // immutable after construction
  46. Allocator* const allocator_;
  47. inline size_t GetHash(const Slice& slice) const {
  48. return MurmurHash(slice.data(), static_cast<int>(slice.size()), 0) %
  49. bucket_size_;
  50. }
  51. inline Bucket* GetBucket(size_t i) const {
  52. return buckets_[i].load(std::memory_order_acquire);
  53. }
  54. inline Bucket* GetBucket(const Slice& slice) const {
  55. return GetBucket(GetHash(slice));
  56. }
  57. // Get a bucket from buckets_. If the bucket hasn't been initialized yet,
  58. // initialize it before returning.
  59. Bucket* GetInitializedBucket(const Slice& transformed);
  60. class Iterator : public MemTableRep::Iterator {
  61. public:
  62. explicit Iterator(Bucket* list, bool own_list = true,
  63. Arena* arena = nullptr)
  64. : list_(list), iter_(list), own_list_(own_list), arena_(arena) {}
  65. ~Iterator() override {
  66. // if we own the list, we should also delete it
  67. if (own_list_) {
  68. assert(list_ != nullptr);
  69. delete list_;
  70. }
  71. }
  72. // Returns true iff the iterator is positioned at a valid node.
  73. bool Valid() const override { return list_ != nullptr && iter_.Valid(); }
  74. // Returns the key at the current position.
  75. // REQUIRES: Valid()
  76. const char* key() const override {
  77. assert(Valid());
  78. return iter_.key();
  79. }
  80. // Advances to the next position.
  81. // REQUIRES: Valid()
  82. void Next() override {
  83. assert(Valid());
  84. iter_.Next();
  85. }
  86. // Advances to the previous position.
  87. // REQUIRES: Valid()
  88. void Prev() override {
  89. assert(Valid());
  90. iter_.Prev();
  91. }
  92. // Advance to the first entry with a key >= target
  93. void Seek(const Slice& internal_key, const char* memtable_key) override {
  94. if (list_ != nullptr) {
  95. const char* encoded_key = (memtable_key != nullptr)
  96. ? memtable_key
  97. : EncodeKey(&tmp_, internal_key);
  98. iter_.Seek(encoded_key);
  99. }
  100. }
  101. // Retreat to the last entry with a key <= target
  102. void SeekForPrev(const Slice& /*internal_key*/,
  103. const char* /*memtable_key*/) override {
  104. // not supported
  105. assert(false);
  106. }
  107. // Position at the first entry in collection.
  108. // Final state of iterator is Valid() iff collection is not empty.
  109. void SeekToFirst() override {
  110. if (list_ != nullptr) {
  111. iter_.SeekToFirst();
  112. }
  113. }
  114. // Position at the last entry in collection.
  115. // Final state of iterator is Valid() iff collection is not empty.
  116. void SeekToLast() override {
  117. if (list_ != nullptr) {
  118. iter_.SeekToLast();
  119. }
  120. }
  121. protected:
  122. void Reset(Bucket* list) {
  123. if (own_list_) {
  124. assert(list_ != nullptr);
  125. delete list_;
  126. }
  127. list_ = list;
  128. iter_.SetList(list);
  129. own_list_ = false;
  130. }
  131. private:
  132. // if list_ is nullptr, we should NEVER call any methods on iter_
  133. // if list_ is nullptr, this Iterator is not Valid()
  134. Bucket* list_;
  135. Bucket::Iterator iter_;
  136. // here we track if we own list_. If we own it, we are also
  137. // responsible for it's cleaning. This is a poor man's std::shared_ptr
  138. bool own_list_;
  139. std::unique_ptr<Arena> arena_;
  140. std::string tmp_; // For passing to EncodeKey
  141. };
  142. class DynamicIterator : public HashSkipListRep::Iterator {
  143. public:
  144. explicit DynamicIterator(const HashSkipListRep& memtable_rep)
  145. : HashSkipListRep::Iterator(nullptr, false),
  146. memtable_rep_(memtable_rep) {}
  147. // Advance to the first entry with a key >= target
  148. void Seek(const Slice& k, const char* memtable_key) override {
  149. auto transformed = memtable_rep_.transform_->Transform(ExtractUserKey(k));
  150. Reset(memtable_rep_.GetBucket(transformed));
  151. HashSkipListRep::Iterator::Seek(k, memtable_key);
  152. }
  153. // Position at the first entry in collection.
  154. // Final state of iterator is Valid() iff collection is not empty.
  155. void SeekToFirst() override {
  156. // Prefix iterator does not support total order.
  157. // We simply set the iterator to invalid state
  158. Reset(nullptr);
  159. }
  160. // Position at the last entry in collection.
  161. // Final state of iterator is Valid() iff collection is not empty.
  162. void SeekToLast() override {
  163. // Prefix iterator does not support total order.
  164. // We simply set the iterator to invalid state
  165. Reset(nullptr);
  166. }
  167. private:
  168. // the underlying memtable
  169. const HashSkipListRep& memtable_rep_;
  170. };
  171. class EmptyIterator : public MemTableRep::Iterator {
  172. // This is used when there wasn't a bucket. It is cheaper than
  173. // instantiating an empty bucket over which to iterate.
  174. public:
  175. EmptyIterator() = default;
  176. bool Valid() const override { return false; }
  177. const char* key() const override {
  178. assert(false);
  179. return nullptr;
  180. }
  181. void Next() override {}
  182. void Prev() override {}
  183. void Seek(const Slice& /*internal_key*/,
  184. const char* /*memtable_key*/) override {}
  185. void SeekForPrev(const Slice& /*internal_key*/,
  186. const char* /*memtable_key*/) override {}
  187. void SeekToFirst() override {}
  188. void SeekToLast() override {}
  189. private:
  190. };
  191. };
  192. HashSkipListRep::HashSkipListRep(const MemTableRep::KeyComparator& compare,
  193. Allocator* allocator,
  194. const SliceTransform* transform,
  195. size_t bucket_size, int32_t skiplist_height,
  196. int32_t skiplist_branching_factor)
  197. : MemTableRep(allocator),
  198. bucket_size_(bucket_size),
  199. skiplist_height_(skiplist_height),
  200. skiplist_branching_factor_(skiplist_branching_factor),
  201. transform_(transform),
  202. compare_(compare),
  203. allocator_(allocator) {
  204. auto mem =
  205. allocator->AllocateAligned(sizeof(std::atomic<void*>) * bucket_size);
  206. buckets_ = new (mem) std::atomic<Bucket*>[bucket_size];
  207. for (size_t i = 0; i < bucket_size_; ++i) {
  208. buckets_[i].store(nullptr, std::memory_order_relaxed);
  209. }
  210. }
  211. HashSkipListRep::~HashSkipListRep() = default;
  212. HashSkipListRep::Bucket* HashSkipListRep::GetInitializedBucket(
  213. const Slice& transformed) {
  214. size_t hash = GetHash(transformed);
  215. auto bucket = GetBucket(hash);
  216. if (bucket == nullptr) {
  217. auto addr = allocator_->AllocateAligned(sizeof(Bucket));
  218. bucket = new (addr) Bucket(compare_, allocator_, skiplist_height_,
  219. skiplist_branching_factor_);
  220. buckets_[hash].store(bucket, std::memory_order_release);
  221. }
  222. return bucket;
  223. }
  224. void HashSkipListRep::Insert(KeyHandle handle) {
  225. auto* key = static_cast<char*>(handle);
  226. assert(!Contains(key));
  227. auto transformed = transform_->Transform(UserKey(key));
  228. auto bucket = GetInitializedBucket(transformed);
  229. bucket->Insert(key);
  230. }
  231. bool HashSkipListRep::Contains(const char* key) const {
  232. auto transformed = transform_->Transform(UserKey(key));
  233. auto bucket = GetBucket(transformed);
  234. if (bucket == nullptr) {
  235. return false;
  236. }
  237. return bucket->Contains(key);
  238. }
  239. size_t HashSkipListRep::ApproximateMemoryUsage() { return 0; }
  240. void HashSkipListRep::Get(const LookupKey& k, void* callback_args,
  241. bool (*callback_func)(void* arg, const char* entry)) {
  242. auto transformed = transform_->Transform(k.user_key());
  243. auto bucket = GetBucket(transformed);
  244. if (bucket != nullptr) {
  245. Bucket::Iterator iter(bucket);
  246. for (iter.Seek(k.memtable_key().data());
  247. iter.Valid() && callback_func(callback_args, iter.key());
  248. iter.Next()) {
  249. }
  250. }
  251. }
  252. MemTableRep::Iterator* HashSkipListRep::GetIterator(Arena* arena) {
  253. // allocate a new arena of similar size to the one currently in use
  254. Arena* new_arena = new Arena(allocator_->BlockSize());
  255. auto list = new Bucket(compare_, new_arena);
  256. for (size_t i = 0; i < bucket_size_; ++i) {
  257. auto bucket = GetBucket(i);
  258. if (bucket != nullptr) {
  259. Bucket::Iterator itr(bucket);
  260. for (itr.SeekToFirst(); itr.Valid(); itr.Next()) {
  261. list->Insert(itr.key());
  262. }
  263. }
  264. }
  265. if (arena == nullptr) {
  266. return new Iterator(list, true, new_arena);
  267. } else {
  268. auto mem = arena->AllocateAligned(sizeof(Iterator));
  269. return new (mem) Iterator(list, true, new_arena);
  270. }
  271. }
  272. MemTableRep::Iterator* HashSkipListRep::GetDynamicPrefixIterator(Arena* arena) {
  273. if (arena == nullptr) {
  274. return new DynamicIterator(*this);
  275. } else {
  276. auto mem = arena->AllocateAligned(sizeof(DynamicIterator));
  277. return new (mem) DynamicIterator(*this);
  278. }
  279. }
  280. struct HashSkipListRepOptions {
  281. static const char* kName() { return "HashSkipListRepFactoryOptions"; }
  282. size_t bucket_count;
  283. int32_t skiplist_height;
  284. int32_t skiplist_branching_factor;
  285. };
  286. static std::unordered_map<std::string, OptionTypeInfo> hash_skiplist_info = {
  287. {"bucket_count",
  288. {offsetof(struct HashSkipListRepOptions, bucket_count), OptionType::kSizeT,
  289. OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
  290. {"skiplist_height",
  291. {offsetof(struct HashSkipListRepOptions, skiplist_height),
  292. OptionType::kInt32T, OptionVerificationType::kNormal,
  293. OptionTypeFlags::kNone}},
  294. {"branching_factor",
  295. {offsetof(struct HashSkipListRepOptions, skiplist_branching_factor),
  296. OptionType::kInt32T, OptionVerificationType::kNormal,
  297. OptionTypeFlags::kNone}},
  298. };
  299. class HashSkipListRepFactory : public MemTableRepFactory {
  300. public:
  301. explicit HashSkipListRepFactory(size_t bucket_count, int32_t skiplist_height,
  302. int32_t skiplist_branching_factor) {
  303. options_.bucket_count = bucket_count;
  304. options_.skiplist_height = skiplist_height;
  305. options_.skiplist_branching_factor = skiplist_branching_factor;
  306. RegisterOptions(&options_, &hash_skiplist_info);
  307. }
  308. using MemTableRepFactory::CreateMemTableRep;
  309. MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator& compare,
  310. Allocator* allocator,
  311. const SliceTransform* transform,
  312. Logger* logger) override;
  313. static const char* kClassName() { return "HashSkipListRepFactory"; }
  314. static const char* kNickName() { return "prefix_hash"; }
  315. const char* Name() const override { return kClassName(); }
  316. const char* NickName() const override { return kNickName(); }
  317. private:
  318. HashSkipListRepOptions options_;
  319. };
  320. } // namespace
  321. MemTableRep* HashSkipListRepFactory::CreateMemTableRep(
  322. const MemTableRep::KeyComparator& compare, Allocator* allocator,
  323. const SliceTransform* transform, Logger* /*logger*/) {
  324. return new HashSkipListRep(compare, allocator, transform,
  325. options_.bucket_count, options_.skiplist_height,
  326. options_.skiplist_branching_factor);
  327. }
  328. MemTableRepFactory* NewHashSkipListRepFactory(
  329. size_t bucket_count, int32_t skiplist_height,
  330. int32_t skiplist_branching_factor) {
  331. return new HashSkipListRepFactory(bucket_count, skiplist_height,
  332. skiplist_branching_factor);
  333. }
  334. } // namespace ROCKSDB_NAMESPACE