skiplistrep.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. #include <random>
  7. #include "db/memtable.h"
  8. #include "memory/arena.h"
  9. #include "memtable/inlineskiplist.h"
  10. #include "rocksdb/memtablerep.h"
  11. #include "rocksdb/utilities/options_type.h"
  12. #include "util/string_util.h"
  13. namespace ROCKSDB_NAMESPACE {
  14. namespace {
  15. class SkipListRep : public MemTableRep {
  16. InlineSkipList<const MemTableRep::KeyComparator&> skip_list_;
  17. const MemTableRep::KeyComparator& cmp_;
  18. const SliceTransform* transform_;
  19. const size_t lookahead_;
  20. friend class LookaheadIterator;
  21. public:
  22. explicit SkipListRep(const MemTableRep::KeyComparator& compare,
  23. Allocator* allocator, const SliceTransform* transform,
  24. const size_t lookahead)
  25. : MemTableRep(allocator),
  26. skip_list_(compare, allocator),
  27. cmp_(compare),
  28. transform_(transform),
  29. lookahead_(lookahead) {}
  30. KeyHandle Allocate(const size_t len, char** buf) override {
  31. *buf = skip_list_.AllocateKey(len);
  32. return static_cast<KeyHandle>(*buf);
  33. }
  34. // Insert key into the list.
  35. // REQUIRES: nothing that compares equal to key is currently in the list.
  36. void Insert(KeyHandle handle) override {
  37. skip_list_.Insert(static_cast<char*>(handle));
  38. }
  39. bool InsertKey(KeyHandle handle) override {
  40. return skip_list_.Insert(static_cast<char*>(handle));
  41. }
  42. void InsertWithHint(KeyHandle handle, void** hint) override {
  43. skip_list_.InsertWithHint(static_cast<char*>(handle), hint);
  44. }
  45. bool InsertKeyWithHint(KeyHandle handle, void** hint) override {
  46. return skip_list_.InsertWithHint(static_cast<char*>(handle), hint);
  47. }
  48. void InsertWithHintConcurrently(KeyHandle handle, void** hint) override {
  49. skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle), hint);
  50. }
  51. bool InsertKeyWithHintConcurrently(KeyHandle handle, void** hint) override {
  52. return skip_list_.InsertWithHintConcurrently(static_cast<char*>(handle),
  53. hint);
  54. }
  55. void InsertConcurrently(KeyHandle handle) override {
  56. skip_list_.InsertConcurrently(static_cast<char*>(handle));
  57. }
  58. bool InsertKeyConcurrently(KeyHandle handle) override {
  59. return skip_list_.InsertConcurrently(static_cast<char*>(handle));
  60. }
  61. // Returns true iff an entry that compares equal to key is in the list.
  62. bool Contains(const char* key) const override {
  63. return skip_list_.Contains(key);
  64. }
  65. size_t ApproximateMemoryUsage() override {
  66. // All memory is allocated through allocator; nothing to report here
  67. return 0;
  68. }
  69. void Get(const LookupKey& k, void* callback_args,
  70. bool (*callback_func)(void* arg, const char* entry)) override {
  71. SkipListRep::Iterator iter(&skip_list_);
  72. Slice dummy_slice;
  73. for (iter.Seek(dummy_slice, k.memtable_key().data());
  74. iter.Valid() && callback_func(callback_args, iter.key());
  75. iter.Next()) {
  76. }
  77. }
  78. Status GetAndValidate(const LookupKey& k, void* callback_args,
  79. bool (*callback_func)(void* arg, const char* entry),
  80. bool allow_data_in_errors, bool detect_key_out_of_order,
  81. const std::function<Status(const char*, bool)>&
  82. key_validation_callback) override {
  83. SkipListRep::Iterator iter(&skip_list_);
  84. Slice dummy_slice;
  85. Status status = iter.SeekAndValidate(
  86. dummy_slice, k.memtable_key().data(), allow_data_in_errors,
  87. detect_key_out_of_order, key_validation_callback);
  88. for (; iter.Valid() && status.ok() &&
  89. callback_func(callback_args, iter.key());
  90. status = iter.NextAndValidate(allow_data_in_errors)) {
  91. }
  92. return status;
  93. }
  94. uint64_t ApproximateNumEntries(const Slice& start_ikey,
  95. const Slice& end_ikey) override {
  96. return skip_list_.ApproximateNumEntries(start_ikey, end_ikey);
  97. }
  98. void UniqueRandomSample(const uint64_t num_entries,
  99. const uint64_t target_sample_size,
  100. std::unordered_set<const char*>* entries) override {
  101. entries->clear();
  102. // Avoid divide-by-0.
  103. assert(target_sample_size > 0);
  104. assert(num_entries > 0);
  105. // NOTE: the size of entries is not enforced to be exactly
  106. // target_sample_size at the end of this function, it might be slightly
  107. // greater or smaller.
  108. SkipListRep::Iterator iter(&skip_list_);
  109. // There are two methods to create the subset of samples (size m)
  110. // from the table containing N elements:
  111. // 1-Iterate linearly through the N memtable entries. For each entry i,
  112. // add it to the sample set with a probability
  113. // (target_sample_size - entries.size() ) / (N-i).
  114. //
  115. // 2-Pick m random elements without repetition.
  116. // We pick Option 2 when m<sqrt(N) and
  117. // Option 1 when m > sqrt(N).
  118. if (target_sample_size >
  119. static_cast<uint64_t>(std::sqrt(1.0 * num_entries))) {
  120. Random* rnd = Random::GetTLSInstance();
  121. iter.SeekToFirst();
  122. uint64_t counter = 0, num_samples_left = target_sample_size;
  123. for (; iter.Valid() && (num_samples_left > 0); iter.Next(), counter++) {
  124. // Add entry to sample set with probability
  125. // num_samples_left/(num_entries - counter).
  126. if (rnd->Next() % (num_entries - counter) < num_samples_left) {
  127. entries->insert(iter.key());
  128. num_samples_left--;
  129. }
  130. }
  131. } else {
  132. // Option 2: pick m random elements with no duplicates.
  133. // If Option 2 is picked, then target_sample_size<sqrt(N)
  134. // Using a set spares the need to check for duplicates.
  135. for (uint64_t i = 0; i < target_sample_size; i++) {
  136. // We give it 5 attempts to find a non-duplicate
  137. // With 5 attempts, the chances of returning `entries` set
  138. // of size target_sample_size is:
  139. // PROD_{i=1}^{target_sample_size-1} [1-(i/N)^5]
  140. // which is monotonically increasing with N in the worse case
  141. // of target_sample_size=sqrt(N), and is always >99.9% for N>4.
  142. // At worst, for the final pick , when m=sqrt(N) there is
  143. // a probability of p= 1/sqrt(N) chances to find a duplicate.
  144. for (uint64_t j = 0; j < 5; j++) {
  145. iter.RandomSeek();
  146. // unordered_set::insert returns pair<iterator, bool>.
  147. // The second element is true if an insert successfully happened.
  148. // If element is already in the set, this bool will be false, and
  149. // true otherwise.
  150. if ((entries->insert(iter.key())).second) {
  151. break;
  152. }
  153. }
  154. }
  155. }
  156. }
  157. ~SkipListRep() override = default;
  158. // Iteration over the contents of a skip list
  159. class Iterator : public MemTableRep::Iterator {
  160. InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_;
  161. public:
  162. // Initialize an iterator over the specified list.
  163. // The returned iterator is not valid.
  164. explicit Iterator(
  165. const InlineSkipList<const MemTableRep::KeyComparator&>* list)
  166. : iter_(list) {}
  167. ~Iterator() override = default;
  168. // Returns true iff the iterator is positioned at a valid node.
  169. bool Valid() const override { return iter_.Valid(); }
  170. // Returns the key at the current position.
  171. // REQUIRES: Valid()
  172. const char* key() const override {
  173. assert(Valid());
  174. return iter_.key();
  175. }
  176. // Advances to the next position.
  177. // REQUIRES: Valid()
  178. void Next() override {
  179. assert(Valid());
  180. iter_.Next();
  181. }
  182. // Advances to the previous position.
  183. // REQUIRES: Valid()
  184. void Prev() override {
  185. assert(Valid());
  186. iter_.Prev();
  187. }
  188. // Advance to the first entry with a key >= target
  189. void Seek(const Slice& user_key, const char* memtable_key) override {
  190. if (memtable_key != nullptr) {
  191. iter_.Seek(memtable_key);
  192. } else {
  193. iter_.Seek(EncodeKey(&tmp_, user_key));
  194. }
  195. }
  196. // Retreat to the last entry with a key <= target
  197. void SeekForPrev(const Slice& user_key, const char* memtable_key) override {
  198. if (memtable_key != nullptr) {
  199. iter_.SeekForPrev(memtable_key);
  200. } else {
  201. iter_.SeekForPrev(EncodeKey(&tmp_, user_key));
  202. }
  203. }
  204. void RandomSeek() override { iter_.RandomSeek(); }
  205. // Position at the first entry in list.
  206. // Final state of iterator is Valid() iff list is not empty.
  207. void SeekToFirst() override { iter_.SeekToFirst(); }
  208. // Position at the last entry in list.
  209. // Final state of iterator is Valid() iff list is not empty.
  210. void SeekToLast() override { iter_.SeekToLast(); }
  211. Status NextAndValidate(bool allow_data_in_errors) override {
  212. assert(Valid());
  213. return iter_.NextAndValidate(allow_data_in_errors);
  214. }
  215. Status SeekAndValidate(const Slice& user_key, const char* memtable_key,
  216. bool allow_data_in_errors,
  217. bool detect_key_out_of_order,
  218. const std::function<Status(const char*, bool)>&
  219. key_validation_callback) override {
  220. if (memtable_key != nullptr) {
  221. return iter_.SeekAndValidate(memtable_key, allow_data_in_errors,
  222. detect_key_out_of_order,
  223. key_validation_callback);
  224. } else {
  225. return iter_.SeekAndValidate(
  226. EncodeKey(&tmp_, user_key), allow_data_in_errors,
  227. detect_key_out_of_order, key_validation_callback);
  228. }
  229. }
  230. Status PrevAndValidate(bool allow_data_in_error) override {
  231. assert(Valid());
  232. return iter_.PrevAndValidate(allow_data_in_error);
  233. }
  234. protected:
  235. std::string tmp_; // For passing to EncodeKey
  236. };
  237. // Iterator over the contents of a skip list which also keeps track of the
  238. // previously visited node. In Seek(), it examines a few nodes after it
  239. // first, falling back to O(log n) search from the head of the list only if
  240. // the target key hasn't been found.
  241. class LookaheadIterator : public MemTableRep::Iterator {
  242. public:
  243. explicit LookaheadIterator(const SkipListRep& rep)
  244. : rep_(rep), iter_(&rep_.skip_list_), prev_(iter_) {}
  245. ~LookaheadIterator() override = default;
  246. bool Valid() const override { return iter_.Valid(); }
  247. const char* key() const override {
  248. assert(Valid());
  249. return iter_.key();
  250. }
  251. void Next() override {
  252. assert(Valid());
  253. bool advance_prev = true;
  254. if (prev_.Valid()) {
  255. auto k1 = rep_.UserKey(prev_.key());
  256. auto k2 = rep_.UserKey(iter_.key());
  257. if (k1.compare(k2) == 0) {
  258. // same user key, don't move prev_
  259. advance_prev = false;
  260. } else if (rep_.transform_) {
  261. // only advance prev_ if it has the same prefix as iter_
  262. auto t1 = rep_.transform_->Transform(k1);
  263. auto t2 = rep_.transform_->Transform(k2);
  264. advance_prev = t1.compare(t2) == 0;
  265. }
  266. }
  267. if (advance_prev) {
  268. prev_ = iter_;
  269. }
  270. iter_.Next();
  271. }
  272. void Prev() override {
  273. assert(Valid());
  274. iter_.Prev();
  275. prev_ = iter_;
  276. }
  277. void Seek(const Slice& internal_key, const char* memtable_key) override {
  278. const char* encoded_key = (memtable_key != nullptr)
  279. ? memtable_key
  280. : EncodeKey(&tmp_, internal_key);
  281. if (prev_.Valid() && rep_.cmp_(encoded_key, prev_.key()) >= 0) {
  282. // prev_.key() is smaller or equal to our target key; do a quick
  283. // linear search (at most lookahead_ steps) starting from prev_
  284. iter_ = prev_;
  285. size_t cur = 0;
  286. while (cur++ <= rep_.lookahead_ && iter_.Valid()) {
  287. if (rep_.cmp_(encoded_key, iter_.key()) <= 0) {
  288. return;
  289. }
  290. Next();
  291. }
  292. }
  293. iter_.Seek(encoded_key);
  294. prev_ = iter_;
  295. }
  296. void SeekForPrev(const Slice& internal_key,
  297. const char* memtable_key) override {
  298. const char* encoded_key = (memtable_key != nullptr)
  299. ? memtable_key
  300. : EncodeKey(&tmp_, internal_key);
  301. iter_.SeekForPrev(encoded_key);
  302. prev_ = iter_;
  303. }
  304. void SeekToFirst() override {
  305. iter_.SeekToFirst();
  306. prev_ = iter_;
  307. }
  308. void SeekToLast() override {
  309. iter_.SeekToLast();
  310. prev_ = iter_;
  311. }
  312. protected:
  313. std::string tmp_; // For passing to EncodeKey
  314. private:
  315. const SkipListRep& rep_;
  316. InlineSkipList<const MemTableRep::KeyComparator&>::Iterator iter_;
  317. InlineSkipList<const MemTableRep::KeyComparator&>::Iterator prev_;
  318. };
  319. MemTableRep::Iterator* GetIterator(Arena* arena = nullptr) override {
  320. if (lookahead_ > 0) {
  321. void* mem =
  322. arena ? arena->AllocateAligned(sizeof(SkipListRep::LookaheadIterator))
  323. : operator new(sizeof(SkipListRep::LookaheadIterator));
  324. return new (mem) SkipListRep::LookaheadIterator(*this);
  325. } else {
  326. void* mem = arena ? arena->AllocateAligned(sizeof(SkipListRep::Iterator))
  327. : operator new(sizeof(SkipListRep::Iterator));
  328. return new (mem) SkipListRep::Iterator(&skip_list_);
  329. }
  330. }
  331. };
  332. } // namespace
  333. static std::unordered_map<std::string, OptionTypeInfo> skiplist_factory_info = {
  334. {"lookahead",
  335. {0, OptionType::kSizeT, OptionVerificationType::kNormal,
  336. OptionTypeFlags::kDontSerialize /*Since it is part of the ID*/}},
  337. };
  338. SkipListFactory::SkipListFactory(size_t lookahead) : lookahead_(lookahead) {
  339. RegisterOptions("SkipListFactoryOptions", &lookahead_,
  340. &skiplist_factory_info);
  341. }
  342. std::string SkipListFactory::GetId() const {
  343. std::string id = Name();
  344. if (lookahead_ > 0) {
  345. id.append(":").append(std::to_string(lookahead_));
  346. }
  347. return id;
  348. }
  349. MemTableRep* SkipListFactory::CreateMemTableRep(
  350. const MemTableRep::KeyComparator& compare, Allocator* allocator,
  351. const SliceTransform* transform, Logger* /*logger*/) {
  352. return new SkipListRep(compare, allocator, transform, lookahead_);
  353. }
  354. } // namespace ROCKSDB_NAMESPACE