skiplist.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. //
  10. // Thread safety
  11. // -------------
  12. //
  13. // Writes require external synchronization, most likely a mutex.
  14. // Reads require a guarantee that the SkipList will not be destroyed
  15. // while the read is in progress. Apart from that, reads progress
  16. // without any internal locking or synchronization.
  17. //
  18. // Invariants:
  19. //
  20. // (1) Allocated nodes are never deleted until the SkipList is
  21. // destroyed. This is trivially guaranteed by the code since we
  22. // never delete any skip list nodes.
  23. //
  24. // (2) The contents of a Node except for the next/prev pointers are
  25. // immutable after the Node has been linked into the SkipList.
  26. // Only Insert() modifies the list, and it is careful to initialize
  27. // a node and use release-stores to publish the nodes in one or
  28. // more lists.
  29. //
  30. // ... prev vs. next pointer ordering ...
  31. //
  32. #pragma once
  33. #include <assert.h>
  34. #include <stdlib.h>
  35. #include "memory/allocator.h"
  36. #include "port/port.h"
  37. #include "util/atomic.h"
  38. #include "util/random.h"
  39. namespace ROCKSDB_NAMESPACE {
  40. template <typename Key, class Comparator>
  41. class SkipList {
  42. private:
  43. struct Node;
  44. public:
  45. // Create a new SkipList object that will use "cmp" for comparing keys,
  46. // and will allocate memory using "*allocator". Objects allocated in the
  47. // allocator must remain allocated for the lifetime of the skiplist object.
  48. explicit SkipList(Comparator cmp, Allocator* allocator,
  49. int32_t max_height = 12, int32_t branching_factor = 4);
  50. // No copying allowed
  51. SkipList(const SkipList&) = delete;
  52. void operator=(const SkipList&) = delete;
  53. // Insert key into the list.
  54. // REQUIRES: nothing that compares equal to key is currently in the list.
  55. void Insert(const Key& key);
  56. // Returns true iff an entry that compares equal to key is in the list.
  57. bool Contains(const Key& key) const;
  58. // Return estimated number of entries from `start_ikey` to `end_ikey`.
  59. uint64_t ApproximateNumEntries(const Slice& start_ikey,
  60. const Slice& end_ikey) const;
  61. // Iteration over the contents of a skip list
  62. class Iterator {
  63. public:
  64. // Initialize an iterator over the specified list.
  65. // The returned iterator is not valid.
  66. explicit Iterator(const SkipList* list);
  67. // Change the underlying skiplist used for this iterator
  68. // This enables us not changing the iterator without deallocating
  69. // an old one and then allocating a new one
  70. void SetList(const SkipList* list);
  71. // Returns true iff the iterator is positioned at a valid node.
  72. bool Valid() const;
  73. // Returns the key at the current position.
  74. // REQUIRES: Valid()
  75. const Key& key() const;
  76. // Advances to the next position.
  77. // REQUIRES: Valid()
  78. void Next();
  79. // Advances to the previous position.
  80. // REQUIRES: Valid()
  81. void Prev();
  82. // Advance to the first entry with a key >= target
  83. void Seek(const Key& target);
  84. // Retreat to the last entry with a key <= target
  85. void SeekForPrev(const Key& target);
  86. // Position at the first entry in list.
  87. // Final state of iterator is Valid() iff list is not empty.
  88. void SeekToFirst();
  89. // Position at the last entry in list.
  90. // Final state of iterator is Valid() iff list is not empty.
  91. void SeekToLast();
  92. private:
  93. const SkipList* list_;
  94. Node* node_;
  95. // Intentionally copyable
  96. };
  97. private:
  98. const uint16_t kMaxHeight_;
  99. const uint16_t kBranching_;
  100. const uint32_t kScaledInverseBranching_;
  101. // Immutable after construction
  102. Comparator const compare_;
  103. Allocator* const allocator_; // Allocator used for allocations of nodes
  104. Node* const head_;
  105. // Modified only by Insert(). Read racily by readers, but stale
  106. // values are ok.
  107. RelaxedAtomic<int> max_height_; // Height of the entire list
  108. // Used for optimizing sequential insert patterns. Tricky. prev_[i] for
  109. // i up to max_height_ is the predecessor of prev_[0] and prev_height_
  110. // is the height of prev_[0]. prev_[0] can only be equal to head before
  111. // insertion, in which case max_height_ and prev_height_ are 1.
  112. int32_t prev_height_;
  113. Node** prev_;
  114. inline int GetMaxHeight() const { return max_height_.LoadRelaxed(); }
  115. Node* NewNode(const Key& key, int height);
  116. int RandomHeight();
  117. bool Equal(const Key& a, const Key& b) const { return (compare_(a, b) == 0); }
  118. bool LessThan(const Key& a, const Key& b) const {
  119. return (compare_(a, b) < 0);
  120. }
  121. // Return true if key is greater than the data stored in "n"
  122. bool KeyIsAfterNode(const Key& key, Node* n) const;
  123. // Returns the earliest node with a key >= key.
  124. // Return nullptr if there is no such node.
  125. Node* FindGreaterOrEqual(const Key& key) const;
  126. // Return the latest node with a key < key.
  127. // Return head_ if there is no such node.
  128. // Fills prev[level] with pointer to previous node at "level" for every
  129. // level in [0..max_height_-1], if prev is non-null.
  130. Node* FindLessThan(const Key& key, Node** prev = nullptr) const;
  131. // Return the last node in the list.
  132. // Return head_ if list is empty.
  133. Node* FindLast() const;
  134. };
  135. // Implementation details follow
  136. template <typename Key, class Comparator>
  137. struct SkipList<Key, Comparator>::Node {
  138. explicit Node(const Key& k) : key(k) {}
  139. Key const key;
  140. // Accessors/mutators for links. Wrapped in methods so we can
  141. // add the appropriate barriers as necessary.
  142. Node* Next(int n) {
  143. assert(n >= 0);
  144. // Use an 'acquire load' so that we observe a fully initialized
  145. // version of the returned Node.
  146. return (next_[n].Load());
  147. }
  148. void SetNext(int n, Node* x) {
  149. assert(n >= 0);
  150. // Use a 'release store' so that anybody who reads through this
  151. // pointer observes a fully initialized version of the inserted node.
  152. next_[n].Store(x);
  153. }
  154. // No-barrier variants that can be safely used in a few locations.
  155. Node* NoBarrier_Next(int n) {
  156. assert(n >= 0);
  157. return next_[n].LoadRelaxed();
  158. }
  159. void NoBarrier_SetNext(int n, Node* x) {
  160. assert(n >= 0);
  161. next_[n].StoreRelaxed(x);
  162. }
  163. private:
  164. // Array of length equal to the node height. next_[0] is lowest level link.
  165. AcqRelAtomic<Node*> next_[1];
  166. };
  167. template <typename Key, class Comparator>
  168. typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::NewNode(
  169. const Key& key, int height) {
  170. char* mem = allocator_->AllocateAligned(
  171. sizeof(Node) + sizeof(AcqRelAtomic<Node*>) * (height - 1));
  172. return new (mem) Node(key);
  173. }
  174. template <typename Key, class Comparator>
  175. inline SkipList<Key, Comparator>::Iterator::Iterator(const SkipList* list) {
  176. SetList(list);
  177. }
  178. template <typename Key, class Comparator>
  179. inline void SkipList<Key, Comparator>::Iterator::SetList(const SkipList* list) {
  180. list_ = list;
  181. node_ = nullptr;
  182. }
  183. template <typename Key, class Comparator>
  184. inline bool SkipList<Key, Comparator>::Iterator::Valid() const {
  185. return node_ != nullptr;
  186. }
  187. template <typename Key, class Comparator>
  188. inline const Key& SkipList<Key, Comparator>::Iterator::key() const {
  189. assert(Valid());
  190. return node_->key;
  191. }
  192. template <typename Key, class Comparator>
  193. inline void SkipList<Key, Comparator>::Iterator::Next() {
  194. assert(Valid());
  195. node_ = node_->Next(0);
  196. }
  197. template <typename Key, class Comparator>
  198. inline void SkipList<Key, Comparator>::Iterator::Prev() {
  199. // Instead of using explicit "prev" links, we just search for the
  200. // last node that falls before key.
  201. assert(Valid());
  202. node_ = list_->FindLessThan(node_->key);
  203. if (node_ == list_->head_) {
  204. node_ = nullptr;
  205. }
  206. }
  207. template <typename Key, class Comparator>
  208. inline void SkipList<Key, Comparator>::Iterator::Seek(const Key& target) {
  209. node_ = list_->FindGreaterOrEqual(target);
  210. }
  211. template <typename Key, class Comparator>
  212. inline void SkipList<Key, Comparator>::Iterator::SeekForPrev(
  213. const Key& target) {
  214. Seek(target);
  215. if (!Valid()) {
  216. SeekToLast();
  217. }
  218. while (Valid() && list_->LessThan(target, key())) {
  219. Prev();
  220. }
  221. }
  222. template <typename Key, class Comparator>
  223. inline void SkipList<Key, Comparator>::Iterator::SeekToFirst() {
  224. node_ = list_->head_->Next(0);
  225. }
  226. template <typename Key, class Comparator>
  227. inline void SkipList<Key, Comparator>::Iterator::SeekToLast() {
  228. node_ = list_->FindLast();
  229. if (node_ == list_->head_) {
  230. node_ = nullptr;
  231. }
  232. }
  233. template <typename Key, class Comparator>
  234. int SkipList<Key, Comparator>::RandomHeight() {
  235. auto rnd = Random::GetTLSInstance();
  236. // Increase height with probability 1 in kBranching
  237. int height = 1;
  238. while (height < kMaxHeight_ && rnd->Next() < kScaledInverseBranching_) {
  239. height++;
  240. }
  241. assert(height > 0);
  242. assert(height <= kMaxHeight_);
  243. return height;
  244. }
  245. template <typename Key, class Comparator>
  246. bool SkipList<Key, Comparator>::KeyIsAfterNode(const Key& key, Node* n) const {
  247. // nullptr n is considered infinite
  248. return (n != nullptr) && (compare_(n->key, key) < 0);
  249. }
  250. template <typename Key, class Comparator>
  251. typename SkipList<Key, Comparator>::Node*
  252. SkipList<Key, Comparator>::FindGreaterOrEqual(const Key& key) const {
  253. // Note: It looks like we could reduce duplication by implementing
  254. // this function as FindLessThan(key)->Next(0), but we wouldn't be able
  255. // to exit early on equality and the result wouldn't even be correct.
  256. // A concurrent insert might occur after FindLessThan(key) but before
  257. // we get a chance to call Next(0).
  258. Node* x = head_;
  259. int level = GetMaxHeight() - 1;
  260. Node* last_bigger = nullptr;
  261. while (true) {
  262. assert(x != nullptr);
  263. Node* next = x->Next(level);
  264. // Make sure the lists are sorted
  265. assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x));
  266. // Make sure we haven't overshot during our search
  267. assert(x == head_ || KeyIsAfterNode(key, x));
  268. int cmp =
  269. (next == nullptr || next == last_bigger) ? 1 : compare_(next->key, key);
  270. if (cmp == 0 || (cmp > 0 && level == 0)) {
  271. return next;
  272. } else if (cmp < 0) {
  273. // Keep searching in this list
  274. x = next;
  275. } else {
  276. // Switch to next list, reuse compare_() result
  277. last_bigger = next;
  278. level--;
  279. }
  280. }
  281. }
  282. template <typename Key, class Comparator>
  283. typename SkipList<Key, Comparator>::Node*
  284. SkipList<Key, Comparator>::FindLessThan(const Key& key, Node** prev) const {
  285. Node* x = head_;
  286. int level = GetMaxHeight() - 1;
  287. // KeyIsAfter(key, last_not_after) is definitely false
  288. Node* last_not_after = nullptr;
  289. while (true) {
  290. assert(x != nullptr);
  291. Node* next = x->Next(level);
  292. assert(x == head_ || next == nullptr || KeyIsAfterNode(next->key, x));
  293. assert(x == head_ || KeyIsAfterNode(key, x));
  294. if (next != last_not_after && KeyIsAfterNode(key, next)) {
  295. // Keep searching in this list
  296. x = next;
  297. } else {
  298. if (prev != nullptr) {
  299. prev[level] = x;
  300. }
  301. if (level == 0) {
  302. return x;
  303. } else {
  304. // Switch to next list, reuse KeyIUsAfterNode() result
  305. last_not_after = next;
  306. level--;
  307. }
  308. }
  309. }
  310. }
  311. template <typename Key, class Comparator>
  312. typename SkipList<Key, Comparator>::Node* SkipList<Key, Comparator>::FindLast()
  313. const {
  314. Node* x = head_;
  315. int level = GetMaxHeight() - 1;
  316. while (true) {
  317. Node* next = x->Next(level);
  318. if (next == nullptr) {
  319. if (level == 0) {
  320. return x;
  321. } else {
  322. // Switch to next list
  323. level--;
  324. }
  325. } else {
  326. x = next;
  327. }
  328. }
  329. }
  330. template <typename Key, class Comparator>
  331. uint64_t SkipList<Key, Comparator>::ApproximateNumEntries(
  332. const Slice& start_ikey, const Slice& end_ikey) const {
  333. // See InlineSkipList<Comparator>::ApproximateNumEntries() (copy-paste)
  334. Node* lb = head_;
  335. Node* ub = nullptr;
  336. uint64_t count = 0;
  337. for (int level = GetMaxHeight() - 1; level >= 0; level--) {
  338. auto sufficient_samples = static_cast<uint64_t>(level) * kBranching_ + 10U;
  339. if (count >= sufficient_samples) {
  340. // No more counting; apply powers of kBranching and avoid floating point
  341. count *= kBranching_;
  342. continue;
  343. }
  344. count = 0;
  345. Node* next;
  346. // Get a more precise lower bound (for start key)
  347. for (;;) {
  348. next = lb->Next(level);
  349. if (next == ub) {
  350. break;
  351. }
  352. assert(next != nullptr);
  353. if (compare_(next->Key(), start_ikey) >= 0) {
  354. break;
  355. }
  356. lb = next;
  357. }
  358. // Count entries on this level until upper bound (for end key)
  359. for (;;) {
  360. if (next == ub) {
  361. break;
  362. }
  363. assert(next != nullptr);
  364. if (compare_(next->Key(), end_ikey) >= 0) {
  365. // Save refined upper bound to potentially save key comparison
  366. ub = next;
  367. break;
  368. }
  369. count++;
  370. next = next->Next(level);
  371. }
  372. }
  373. return count;
  374. }
  375. template <typename Key, class Comparator>
  376. SkipList<Key, Comparator>::SkipList(const Comparator cmp, Allocator* allocator,
  377. int32_t max_height,
  378. int32_t branching_factor)
  379. : kMaxHeight_(static_cast<uint16_t>(max_height)),
  380. kBranching_(static_cast<uint16_t>(branching_factor)),
  381. kScaledInverseBranching_((Random::kMaxNext + 1) / kBranching_),
  382. compare_(cmp),
  383. allocator_(allocator),
  384. head_(NewNode({} /* any key will do */, max_height)),
  385. max_height_(1),
  386. prev_height_(1) {
  387. assert(max_height > 0 && kMaxHeight_ == static_cast<uint32_t>(max_height));
  388. assert(branching_factor > 0 &&
  389. kBranching_ == static_cast<uint32_t>(branching_factor));
  390. assert(kScaledInverseBranching_ > 0);
  391. // Allocate the prev_ Node* array, directly from the passed-in allocator.
  392. // prev_ does not need to be freed, as its life cycle is tied up with
  393. // the allocator as a whole.
  394. prev_ = reinterpret_cast<Node**>(
  395. allocator_->AllocateAligned(sizeof(Node*) * kMaxHeight_));
  396. for (int i = 0; i < kMaxHeight_; i++) {
  397. head_->SetNext(i, nullptr);
  398. prev_[i] = head_;
  399. }
  400. }
  401. template <typename Key, class Comparator>
  402. void SkipList<Key, Comparator>::Insert(const Key& key) {
  403. // fast path for sequential insertion
  404. if (!KeyIsAfterNode(key, prev_[0]->NoBarrier_Next(0)) &&
  405. (prev_[0] == head_ || KeyIsAfterNode(key, prev_[0]))) {
  406. assert(prev_[0] != head_ || (prev_height_ == 1 && GetMaxHeight() == 1));
  407. // Outside of this method prev_[1..max_height_] is the predecessor
  408. // of prev_[0], and prev_height_ refers to prev_[0]. Inside Insert
  409. // prev_[0..max_height - 1] is the predecessor of key. Switch from
  410. // the external state to the internal
  411. for (int i = 1; i < prev_height_; i++) {
  412. prev_[i] = prev_[0];
  413. }
  414. } else {
  415. // TODO(opt): we could use a NoBarrier predecessor search as an
  416. // optimization for architectures where memory_order_acquire needs
  417. // a synchronization instruction. Doesn't matter on x86
  418. FindLessThan(key, prev_);
  419. }
  420. // Our data structure does not allow duplicate insertion
  421. assert(prev_[0]->Next(0) == nullptr || !Equal(key, prev_[0]->Next(0)->key));
  422. int height = RandomHeight();
  423. if (height > GetMaxHeight()) {
  424. for (int i = GetMaxHeight(); i < height; i++) {
  425. prev_[i] = head_;
  426. }
  427. // fprintf(stderr, "Change height from %d to %d\n", max_height_, height);
  428. // It is ok to mutate max_height_ without any synchronization
  429. // with concurrent readers. A concurrent reader that observes
  430. // the new value of max_height_ will see either the old value of
  431. // new level pointers from head_ (nullptr), or a new value set in
  432. // the loop below. In the former case the reader will
  433. // immediately drop to the next level since nullptr sorts after all
  434. // keys. In the latter case the reader will use the new node.
  435. max_height_.StoreRelaxed(height);
  436. }
  437. Node* x = NewNode(key, height);
  438. for (int i = 0; i < height; i++) {
  439. // NoBarrier_SetNext() suffices since we will add a barrier when
  440. // we publish a pointer to "x" in prev[i].
  441. x->NoBarrier_SetNext(i, prev_[i]->NoBarrier_Next(i));
  442. prev_[i]->SetNext(i, x);
  443. }
  444. prev_[0] = x;
  445. prev_height_ = height;
  446. }
  447. template <typename Key, class Comparator>
  448. bool SkipList<Key, Comparator>::Contains(const Key& key) const {
  449. Node* x = FindGreaterOrEqual(key);
  450. if (x != nullptr && Equal(key, x->key)) {
  451. return true;
  452. } else {
  453. return false;
  454. }
  455. }
  456. } // namespace ROCKSDB_NAMESPACE