skiplist.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. // Thread safety
  11. // -------------
  12. //
  13. // Writes require external synchronization, most likely a mutex.
  14. // Reads require a guarantee that the SkipList will not be destroyed
  15. // while the read is in progress. Apart from that, reads progress
  16. // without any internal locking or synchronization.
  17. //
  18. // Invariants:
  19. //
  20. // (1) Allocated nodes are never deleted until the SkipList is
  21. // destroyed. This is trivially guaranteed by the code since we
  22. // never delete any skip list nodes.
  23. //
  24. // (2) The contents of a Node except for the next/prev pointers are
  25. // immutable after the Node has been linked into the SkipList.
  26. // Only Insert() modifies the list, and it is careful to initialize
  27. // a node and use release-stores to publish the nodes in one or
  28. // more lists.
  29. //
  30. // ... prev vs. next pointer ordering ...
  31. //
  32. #pragma once
  33. #include <assert.h>
  34. #include <stdlib.h>
  35. #include <atomic>
  36. #include "memory/allocator.h"
  37. #include "port/port.h"
  38. #include "util/random.h"
  39. namespace ROCKSDB_NAMESPACE {
  40. template<typename Key, class Comparator>
  41. class SkipList {
  42. private:
  43. struct Node;
  44. public:
  45. // Create a new SkipList object that will use "cmp" for comparing keys,
  46. // and will allocate memory using "*allocator". Objects allocated in the
  47. // allocator must remain allocated for the lifetime of the skiplist object.
  48. explicit SkipList(Comparator cmp, Allocator* allocator,
  49. int32_t max_height = 12, int32_t branching_factor = 4);
  50. // No copying allowed
  51. SkipList(const SkipList&) = delete;
  52. void operator=(const SkipList&) = delete;
  53. // Insert key into the list.
  54. // REQUIRES: nothing that compares equal to key is currently in the list.
  55. void Insert(const Key& key);
  56. // Returns true iff an entry that compares equal to key is in the list.
  57. bool Contains(const Key& key) const;
  58. // Return estimated number of entries smaller than `key`.
  59. uint64_t EstimateCount(const Key& key) const;
  60. // Iteration over the contents of a skip list
  61. class Iterator {
  62. public:
  63. // Initialize an iterator over the specified list.
  64. // The returned iterator is not valid.
  65. explicit Iterator(const SkipList* list);
  66. // Change the underlying skiplist used for this iterator
  67. // This enables us not changing the iterator without deallocating
  68. // an old one and then allocating a new one
  69. void SetList(const SkipList* list);
  70. // Returns true iff the iterator is positioned at a valid node.
  71. bool Valid() const;
  72. // Returns the key at the current position.
  73. // REQUIRES: Valid()
  74. const Key& key() const;
  75. // Advances to the next position.
  76. // REQUIRES: Valid()
  77. void Next();
  78. // Advances to the previous position.
  79. // REQUIRES: Valid()
  80. void Prev();
  81. // Advance to the first entry with a key >= target
  82. void Seek(const Key& target);
  83. // Retreat to the last entry with a key <= target
  84. void SeekForPrev(const Key& target);
  85. // Position at the first entry in list.
  86. // Final state of iterator is Valid() iff list is not empty.
  87. void SeekToFirst();
  88. // Position at the last entry in list.
  89. // Final state of iterator is Valid() iff list is not empty.
  90. void SeekToLast();
  91. private:
  92. const SkipList* list_;
  93. Node* node_;
  94. // Intentionally copyable
  95. };
  96. private:
  97. const uint16_t kMaxHeight_;
  98. const uint16_t kBranching_;
  99. const uint32_t kScaledInverseBranching_;
  100. // Immutable after construction
  101. Comparator const compare_;
  102. Allocator* const allocator_; // Allocator used for allocations of nodes
  103. Node* const head_;
  104. // Modified only by Insert(). Read racily by readers, but stale
  105. // values are ok.
  106. std::atomic<int> max_height_; // Height of the entire list
  107. // Used for optimizing sequential insert patterns. Tricky. prev_[i] for
  108. // i up to max_height_ is the predecessor of prev_[0] and prev_height_
  109. // is the height of prev_[0]. prev_[0] can only be equal to head before
  110. // insertion, in which case max_height_ and prev_height_ are 1.
  111. Node** prev_;
  112. int32_t prev_height_;
  113. inline int GetMaxHeight() const {
  114. return max_height_.load(std::memory_order_relaxed);
  115. }
  116. Node* NewNode(const Key& key, int height);
  117. int RandomHeight();
  118. bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
  119. bool LessThan(const Key& a, const Key& b) const {
  120. return (compare_(a, b) < 0);
  121. }
  122. // Return true if key is greater than the data stored in "n"
  123. bool KeyIsAfterNode(const Key& key, Node* n) const;
  124. // Returns the earliest node with a key >= key.
  125. // Return nullptr if there is no such node.
  126. Node* FindGreaterOrEqual(const Key& key) const;
  127. // Return the latest node with a key < key.
  128. // Return head_ if there is no such node.
  129. // Fills prev[level] with pointer to previous node at "level" for every
  130. // level in [0..max_height_-1], if prev is non-null.
  131. Node* FindLessThan(const Key& key, Node** prev = nullptr) const;
  132. // Return the last node in the list.
  133. // Return head_ if list is empty.
  134. Node* FindLast() const;
  135. };
  136. // Implementation details follow
  137. template<typename Key, class Comparator>
  138. struct SkipList<Key, Comparator>::Node {
  139. explicit Node(const Key& k) : key(k) { }
  140. Key const key;
  141. // Accessors/mutators for links. Wrapped in methods so we can
  142. // add the appropriate barriers as necessary.
  143. Node* Next(int n) {
  144. assert(n >= 0);
  145. // Use an 'acquire load' so that we observe a fully initialized
  146. // version of the returned Node.
  147. return (next_[n].load(std::memory_order_acquire));
  148. }
  149. void SetNext(int n, Node* x) {
  150. assert(n >= 0);
  151. // Use a 'release store' so that anybody who reads through this
  152. // pointer observes a fully initialized version of the inserted node.
  153. next_[n].store(x, std::memory_order_release);
  154. }
  155. // No-barrier variants that can be safely used in a few locations.
  156. Node* NoBarrier_Next(int n) {
  157. assert(n >= 0);
  158. return next_[n].load(std::memory_order_relaxed);
  159. }
  160. void NoBarrier_SetNext(int n, Node* x) {
  161. assert(n >= 0);
  162. next_[n].store(x, std::memory_order_relaxed);
  163. }
  164. private:
  165. // Array of length equal to the node height. next_[0] is lowest level link.
  166. std::atomic<Node*> next_[1];
  167. };
  168. template<typename Key, class Comparator>
  169. typename SkipList<Key, Comparator>::Node*
  170. SkipList<Key, Comparator>::NewNode(const Key& key, int height) {
  171. char* mem = allocator_->AllocateAligned(
  172. sizeof(Node) + sizeof(std::atomic<Node*>) * (height - 1));
  173. return new (mem) Node(key);
  174. }
  175. template<typename Key, class Comparator>
  176. inline SkipList<Key, Comparator>::Iterator::Iterator(const SkipList* list) {
  177. SetList(list);
  178. }
  179. template<typename Key, class Comparator>
  180. inline void SkipList<Key, Comparator>::Iterator::SetList(const SkipList* list) {
  181. list_ = list;
  182. node_ = nullptr;
  183. }
  184. template<typename Key, class Comparator>
  185. inline bool SkipList<Key, Comparator>::Iterator::Valid() const {
  186. return node_ != nullptr;
  187. }
  188. template<typename Key, class Comparator>
  189. inline const Key& SkipList<Key, Comparator>::Iterator::key() const {
  190. assert(Valid());
  191. return node_->key;
  192. }
  193. template<typename Key, class Comparator>
  194. inline void SkipList<Key, Comparator>::Iterator::Next() {
  195. assert(Valid());
  196. node_ = node_->Next(0);
  197. }
  198. template<typename Key, class Comparator>
  199. inline void SkipList<Key, Comparator>::Iterator::Prev() {
  200. // Instead of using explicit "prev" links, we just search for the
  201. // last node that falls before key.
  202. assert(Valid());
  203. node_ = list_->FindLessThan(node_->key);
  204. if (node_ == list_->head_) {
  205. node_ = nullptr;
  206. }
  207. }
  208. template<typename Key, class Comparator>
  209. inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
  210. node_ = list_->FindGreaterOrEqual(target);
  211. }
  212. template <typename Key, class Comparator>
  213. inline void SkipList<Key, Comparator>::Iterator::SeekForPrev(
  214. const Key& target) {
  215. Seek(target);
  216. if (!Valid()) {
  217. SeekToLast();
  218. }
  219. while (Valid() && list_->LessThan(target, key())) {
  220. Prev();
  221. }
  222. }
  223. template <typename Key, class Comparator>
  224. inline void SkipList<Key, Comparator>::Iterator::SeekToFirst() {
  225. node_ = list_->head_->Next(0);
  226. }
  227. template<typename Key, class Comparator>
  228. inline void SkipList<Key, Comparator>::Iterator::SeekToLast() {
  229. node_ = list_->FindLast();
  230. if (node_ == list_->head_) {
  231. node_ = nullptr;
  232. }
  233. }
  234. template<typename Key, class Comparator>
  235. int SkipList<Key, Comparator>::RandomHeight() {
  236. auto rnd = Random::GetTLSInstance();
  237. // Increase height with probability 1 in kBranching
  238. int height = 1;
  239. while (height < kMaxHeight_ && rnd->Next() < kScaledInverseBranching_) {
  240. height++;
  241. }
  242. assert(height > 0);
  243. assert(height <= kMaxHeight_);
  244. return height;
  245. }
  246. template<typename Key, class Comparator>
  247. bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
  248. // nullptr n is considered infinite
  249. return (n != nullptr) && (compare_(n->key, key) < 0);
  250. }
  251. template<typename Key, class Comparator>
  252. typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::
  253. FindGreaterOrEqual(const Key& key) const {
  254. // Note: It looks like we could reduce duplication by implementing
  255. // this function as FindLessThan(key)->Next(0), but we wouldn't be able
  256. // to exit early on equality and the result wouldn't even be correct.
  257. // A concurrent insert might occur after FindLessThan(key) but before
  258. // we get a chance to call Next(0).
  259. Node* x = head_;
  260. int level = GetMaxHeight() - 1;
  261. Node* last_bigger = nullptr;
  262. while (true) {
  263. assert(x != nullptr);
  264. Node* next = x->Next(level);
  265. // Make sure the lists are sorted
  266. assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x));
  267. // Make sure we haven't overshot during our search
  268. assert(x == head_ || KeyIsAfterNode(key, x));
  269. int cmp = (next == nullptr || next == last_bigger)
  270. ? 1 : compare_(next->key, key);
  271. if (cmp == 0 || (cmp > 0 && level == 0)) {
  272. return next;
  273. } else if (cmp < 0) {
  274. // Keep searching in this list
  275. x = next;
  276. } else {
  277. // Switch to next list, reuse compare_() result
  278. last_bigger = next;
  279. level--;
  280. }
  281. }
  282. }
  283. template<typename Key, class Comparator>
  284. typename SkipList<Key, Comparator>::Node*
  285. SkipList<Key, Comparator>::FindLessThan(const Key& key, Node** prev) const {
  286. Node* x = head_;
  287. int level = GetMaxHeight() - 1;
  288. // KeyIsAfter(key, last_not_after) is definitely false
  289. Node* last_not_after = nullptr;
  290. while (true) {
  291. assert(x != nullptr);
  292. Node* next = x->Next(level);
  293. assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x));
  294. assert(x == head_ || KeyIsAfterNode(key, x));
  295. if (next != last_not_after && KeyIsAfterNode(key, next)) {
  296. // Keep searching in this list
  297. x = next;
  298. } else {
  299. if (prev != nullptr) {
  300. prev[level] = x;
  301. }
  302. if (level == 0) {
  303. return x;
  304. } else {
  305. // Switch to next list, reuse KeyIUsAfterNode() result
  306. last_not_after = next;
  307. level--;
  308. }
  309. }
  310. }
  311. }
  312. template<typename Key, class Comparator>
  313. typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
  314. const {
  315. Node* x = head_;
  316. int level = GetMaxHeight() - 1;
  317. while (true) {
  318. Node* next = x->Next(level);
  319. if (next == nullptr) {
  320. if (level == 0) {
  321. return x;
  322. } else {
  323. // Switch to next list
  324. level--;
  325. }
  326. } else {
  327. x = next;
  328. }
  329. }
  330. }
  331. template <typename Key, class Comparator>
  332. uint64_t SkipList<Key, Comparator>::EstimateCount(const Key& key) const {
  333. uint64_t count = 0;
  334. Node* x = head_;
  335. int level = GetMaxHeight() - 1;
  336. while (true) {
  337. assert(x == head_ || compare_(x->key, key) < 0);
  338. Node* next = x->Next(level);
  339. if (next == nullptr || compare_(next->key, key) >= 0) {
  340. if (level == 0) {
  341. return count;
  342. } else {
  343. // Switch to next list
  344. count *= kBranching_;
  345. level--;
  346. }
  347. } else {
  348. x = next;
  349. count++;
  350. }
  351. }
  352. }
  353. template <typename Key, class Comparator>
  354. SkipList<Key, Comparator>::SkipList(const Comparator cmp, Allocator* allocator,
  355. int32_t max_height,
  356. int32_t branching_factor)
  357. : kMaxHeight_(static_cast<uint16_t>(max_height)),
  358. kBranching_(static_cast<uint16_t>(branching_factor)),
  359. kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_),
  360. compare_(cmp),
  361. allocator_(allocator),
  362. head_(NewNode(0 /* any key will do */, max_height)),
  363. max_height_(1),
  364. prev_height_(1) {
  365. assert(max_height > 0 && kMaxHeight_ == static_cast<uint32_t>(max_height));
  366. assert(branching_factor > 0 &&
  367. kBranching_ == static_cast<uint32_t>(branching_factor));
  368. assert(kScaledInverseBranching_ > 0);
  369. // Allocate the prev_ Node* array, directly from the passed-in allocator.
  370. // prev_ does not need to be freed, as its life cycle is tied up with
  371. // the allocator as a whole.
  372. prev_ = reinterpret_cast<Node**>(
  373. allocator_->AllocateAligned(sizeof(Node*) * kMaxHeight_));
  374. for (int i = 0; i < kMaxHeight_; i++) {
  375. head_->SetNext(i, nullptr);
  376. prev_[i] = head_;
  377. }
  378. }
  379. template<typename Key, class Comparator>
  380. void SkipList<Key, Comparator>::Insert(const Key& key) {
  381. // fast path for sequential insertion
  382. if (!KeyIsAfterNode(key, prev_[0]->NoBarrier_Next(0)) &&
  383. (prev_[0] == head_ || KeyIsAfterNode(key, prev_[0]))) {
  384. assert(prev_[0] != head_ || (prev_height_ == 1 && GetMaxHeight() == 1));
  385. // Outside of this method prev_[1..max_height_] is the predecessor
  386. // of prev_[0], and prev_height_ refers to prev_[0]. Inside Insert
  387. // prev_[0..max_height - 1] is the predecessor of key. Switch from
  388. // the external state to the internal
  389. for (int i = 1; i < prev_height_; i++) {
  390. prev_[i] = prev_[0];
  391. }
  392. } else {
  393. // TODO(opt): we could use a NoBarrier predecessor search as an
  394. // optimization for architectures where memory_order_acquire needs
  395. // a synchronization instruction. Doesn't matter on x86
  396. FindLessThan(key, prev_);
  397. }
  398. // Our data structure does not allow duplicate insertion
  399. assert(prev_[0]->Next(0) == nullptr || !Equal(key, prev_[0]->Next(0)->key));
  400. int height = RandomHeight();
  401. if (height > GetMaxHeight()) {
  402. for (int i = GetMaxHeight(); i < height; i++) {
  403. prev_[i] = head_;
  404. }
  405. //fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
  406. // It is ok to mutate max_height_ without any synchronization
  407. // with concurrent readers. A concurrent reader that observes
  408. // the new value of max_height_ will see either the old value of
  409. // new level pointers from head_ (nullptr), or a new value set in
  410. // the loop below. In the former case the reader will
  411. // immediately drop to the next level since nullptr sorts after all
  412. // keys. In the latter case the reader will use the new node.
  413. max_height_.store(height, std::memory_order_relaxed);
  414. }
  415. Node* x = NewNode(key, height);
  416. for (int i = 0; i < height; i++) {
  417. // NoBarrier_SetNext() suffices since we will add a barrier when
  418. // we publish a pointer to "x" in prev[i].
  419. x->NoBarrier_SetNext(i, prev_[i]->NoBarrier_Next(i));
  420. prev_[i]->SetNext(i, x);
  421. }
  422. prev_[0] = x;
  423. prev_height_ = height;
  424. }
  425. template<typename Key, class Comparator>
  426. bool SkipList<Key, Comparator>::Contains(const Key& key) const {
  427. Node* x = FindGreaterOrEqual(key);
  428. if (x != nullptr && Equal(key, x->key)) {
  429. return true;
  430. } else {
  431. return false;
  432. }
  433. }
  434. } // namespace ROCKSDB_NAMESPACE