cuckoo_table_reader.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #ifndef ROCKSDB_LITE
  10. #include "table/cuckoo/cuckoo_table_reader.h"
  11. #include <algorithm>
  12. #include <limits>
  13. #include <string>
  14. #include <utility>
  15. #include <vector>
  16. #include "memory/arena.h"
  17. #include "rocksdb/iterator.h"
  18. #include "rocksdb/table.h"
  19. #include "table/cuckoo/cuckoo_table_factory.h"
  20. #include "table/get_context.h"
  21. #include "table/internal_iterator.h"
  22. #include "table/meta_blocks.h"
  23. #include "util/coding.h"
  24. namespace ROCKSDB_NAMESPACE {
  25. namespace {
  26. const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
  27. const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
  28. }
  29. extern const uint64_t kCuckooTableMagicNumber;
  30. CuckooTableReader::CuckooTableReader(
  31. const ImmutableCFOptions& ioptions,
  32. std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
  33. const Comparator* comparator,
  34. uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
  35. : file_(std::move(file)),
  36. is_last_level_(false),
  37. identity_as_first_hash_(false),
  38. use_module_hash_(false),
  39. num_hash_func_(0),
  40. unused_key_(""),
  41. key_length_(0),
  42. user_key_length_(0),
  43. value_length_(0),
  44. bucket_length_(0),
  45. cuckoo_block_size_(0),
  46. cuckoo_block_bytes_minus_one_(0),
  47. table_size_(0),
  48. ucomp_(comparator),
  49. get_slice_hash_(get_slice_hash) {
  50. if (!ioptions.allow_mmap_reads) {
  51. status_ = Status::InvalidArgument("File is not mmaped");
  52. }
  53. TableProperties* props = nullptr;
  54. status_ = ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber,
  55. ioptions, &props, true /* compression_type_missing */);
  56. if (!status_.ok()) {
  57. return;
  58. }
  59. table_props_.reset(props);
  60. auto& user_props = props->user_collected_properties;
  61. auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
  62. if (hash_funs == user_props.end()) {
  63. status_ = Status::Corruption("Number of hash functions not found");
  64. return;
  65. }
  66. num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
  67. auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
  68. if (unused_key == user_props.end()) {
  69. status_ = Status::Corruption("Empty bucket value not found");
  70. return;
  71. }
  72. unused_key_ = unused_key->second;
  73. key_length_ = static_cast<uint32_t>(props->fixed_key_len);
  74. auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength);
  75. if (user_key_len == user_props.end()) {
  76. status_ = Status::Corruption("User key length not found");
  77. return;
  78. }
  79. user_key_length_ = *reinterpret_cast<const uint32_t*>(
  80. user_key_len->second.data());
  81. auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
  82. if (value_length == user_props.end()) {
  83. status_ = Status::Corruption("Value length not found");
  84. return;
  85. }
  86. value_length_ = *reinterpret_cast<const uint32_t*>(
  87. value_length->second.data());
  88. bucket_length_ = key_length_ + value_length_;
  89. auto hash_table_size = user_props.find(
  90. CuckooTablePropertyNames::kHashTableSize);
  91. if (hash_table_size == user_props.end()) {
  92. status_ = Status::Corruption("Hash table size not found");
  93. return;
  94. }
  95. table_size_ = *reinterpret_cast<const uint64_t*>(
  96. hash_table_size->second.data());
  97. auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
  98. if (is_last_level == user_props.end()) {
  99. status_ = Status::Corruption("Is last level not found");
  100. return;
  101. }
  102. is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
  103. auto identity_as_first_hash = user_props.find(
  104. CuckooTablePropertyNames::kIdentityAsFirstHash);
  105. if (identity_as_first_hash == user_props.end()) {
  106. status_ = Status::Corruption("identity as first hash not found");
  107. return;
  108. }
  109. identity_as_first_hash_ = *reinterpret_cast<const bool*>(
  110. identity_as_first_hash->second.data());
  111. auto use_module_hash = user_props.find(
  112. CuckooTablePropertyNames::kUseModuleHash);
  113. if (use_module_hash == user_props.end()) {
  114. status_ = Status::Corruption("hash type is not found");
  115. return;
  116. }
  117. use_module_hash_ = *reinterpret_cast<const bool*>(
  118. use_module_hash->second.data());
  119. auto cuckoo_block_size = user_props.find(
  120. CuckooTablePropertyNames::kCuckooBlockSize);
  121. if (cuckoo_block_size == user_props.end()) {
  122. status_ = Status::Corruption("Cuckoo block size not found");
  123. return;
  124. }
  125. cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
  126. cuckoo_block_size->second.data());
  127. cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
  128. status_ = file_->Read(0, static_cast<size_t>(file_size), &file_data_, nullptr);
  129. }
  130. Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
  131. const Slice& key, GetContext* get_context,
  132. const SliceTransform* /* prefix_extractor */,
  133. bool /*skip_filters*/) {
  134. assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
  135. Slice user_key = ExtractUserKey(key);
  136. for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
  137. uint64_t offset = bucket_length_ * CuckooHash(
  138. user_key, hash_cnt, use_module_hash_, table_size_,
  139. identity_as_first_hash_, get_slice_hash_);
  140. const char* bucket = &file_data_.data()[offset];
  141. for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
  142. ++block_idx, bucket += bucket_length_) {
  143. if (ucomp_->Equal(Slice(unused_key_.data(), user_key.size()),
  144. Slice(bucket, user_key.size()))) {
  145. return Status::OK();
  146. }
  147. // Here, we compare only the user key part as we support only one entry
  148. // per user key and we don't support snapshot.
  149. if (ucomp_->Equal(user_key, Slice(bucket, user_key.size()))) {
  150. Slice value(bucket + key_length_, value_length_);
  151. if (is_last_level_) {
  152. // Sequence number is not stored at the last level, so we will use
  153. // kMaxSequenceNumber since it is unknown. This could cause some
  154. // transactions to fail to lock a key due to known sequence number.
  155. // However, it is expected for anyone to use a CuckooTable in a
  156. // TransactionDB.
  157. get_context->SaveValue(value, kMaxSequenceNumber);
  158. } else {
  159. Slice full_key(bucket, key_length_);
  160. ParsedInternalKey found_ikey;
  161. ParseInternalKey(full_key, &found_ikey);
  162. bool dont_care __attribute__((__unused__));
  163. get_context->SaveValue(found_ikey, value, &dont_care);
  164. }
  165. // We don't support merge operations. So, we return here.
  166. return Status::OK();
  167. }
  168. }
  169. }
  170. return Status::OK();
  171. }
  172. void CuckooTableReader::Prepare(const Slice& key) {
  173. // Prefetch the first Cuckoo Block.
  174. Slice user_key = ExtractUserKey(key);
  175. uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) +
  176. bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_,
  177. identity_as_first_hash_, nullptr);
  178. uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
  179. for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
  180. PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
  181. }
  182. }
  183. class CuckooTableIterator : public InternalIterator {
  184. public:
  185. explicit CuckooTableIterator(CuckooTableReader* reader);
  186. // No copying allowed
  187. CuckooTableIterator(const CuckooTableIterator&) = delete;
  188. void operator=(const Iterator&) = delete;
  189. ~CuckooTableIterator() override {}
  190. bool Valid() const override;
  191. void SeekToFirst() override;
  192. void SeekToLast() override;
  193. void Seek(const Slice& target) override;
  194. void SeekForPrev(const Slice& target) override;
  195. void Next() override;
  196. void Prev() override;
  197. Slice key() const override;
  198. Slice value() const override;
  199. Status status() const override { return Status::OK(); }
  200. void InitIfNeeded();
  201. private:
  202. struct BucketComparator {
  203. BucketComparator(const Slice& file_data, const Comparator* ucomp,
  204. uint32_t bucket_len, uint32_t user_key_len,
  205. const Slice& target = Slice())
  206. : file_data_(file_data),
  207. ucomp_(ucomp),
  208. bucket_len_(bucket_len),
  209. user_key_len_(user_key_len),
  210. target_(target) {}
  211. bool operator()(const uint32_t first, const uint32_t second) const {
  212. const char* first_bucket =
  213. (first == kInvalidIndex) ? target_.data() :
  214. &file_data_.data()[first * bucket_len_];
  215. const char* second_bucket =
  216. (second == kInvalidIndex) ? target_.data() :
  217. &file_data_.data()[second * bucket_len_];
  218. return ucomp_->Compare(Slice(first_bucket, user_key_len_),
  219. Slice(second_bucket, user_key_len_)) < 0;
  220. }
  221. private:
  222. const Slice file_data_;
  223. const Comparator* ucomp_;
  224. const uint32_t bucket_len_;
  225. const uint32_t user_key_len_;
  226. const Slice target_;
  227. };
  228. const BucketComparator bucket_comparator_;
  229. void PrepareKVAtCurrIdx();
  230. CuckooTableReader* reader_;
  231. bool initialized_;
  232. // Contains a map of keys to bucket_id sorted in key order.
  233. std::vector<uint32_t> sorted_bucket_ids_;
  234. // We assume that the number of items can be stored in uint32 (4 Billion).
  235. uint32_t curr_key_idx_;
  236. Slice curr_value_;
  237. IterKey curr_key_;
  238. };
  239. CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
  240. : bucket_comparator_(reader->file_data_, reader->ucomp_,
  241. reader->bucket_length_, reader->user_key_length_),
  242. reader_(reader),
  243. initialized_(false),
  244. curr_key_idx_(kInvalidIndex) {
  245. sorted_bucket_ids_.clear();
  246. curr_value_.clear();
  247. curr_key_.Clear();
  248. }
  249. void CuckooTableIterator::InitIfNeeded() {
  250. if (initialized_) {
  251. return;
  252. }
  253. sorted_bucket_ids_.reserve(static_cast<size_t>(reader_->GetTableProperties()->num_entries));
  254. uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
  255. assert(num_buckets < kInvalidIndex);
  256. const char* bucket = reader_->file_data_.data();
  257. for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) {
  258. if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) {
  259. sorted_bucket_ids_.push_back(bucket_id);
  260. }
  261. bucket += reader_->bucket_length_;
  262. }
  263. assert(sorted_bucket_ids_.size() ==
  264. reader_->GetTableProperties()->num_entries);
  265. std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
  266. bucket_comparator_);
  267. curr_key_idx_ = kInvalidIndex;
  268. initialized_ = true;
  269. }
  270. void CuckooTableIterator::SeekToFirst() {
  271. InitIfNeeded();
  272. curr_key_idx_ = 0;
  273. PrepareKVAtCurrIdx();
  274. }
  275. void CuckooTableIterator::SeekToLast() {
  276. InitIfNeeded();
  277. curr_key_idx_ = static_cast<uint32_t>(sorted_bucket_ids_.size()) - 1;
  278. PrepareKVAtCurrIdx();
  279. }
  280. void CuckooTableIterator::Seek(const Slice& target) {
  281. InitIfNeeded();
  282. const BucketComparator seek_comparator(
  283. reader_->file_data_, reader_->ucomp_,
  284. reader_->bucket_length_, reader_->user_key_length_,
  285. ExtractUserKey(target));
  286. auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(),
  287. sorted_bucket_ids_.end(),
  288. kInvalidIndex,
  289. seek_comparator);
  290. curr_key_idx_ =
  291. static_cast<uint32_t>(std::distance(sorted_bucket_ids_.begin(), seek_it));
  292. PrepareKVAtCurrIdx();
  293. }
  294. void CuckooTableIterator::SeekForPrev(const Slice& /*target*/) {
  295. // Not supported
  296. assert(false);
  297. }
  298. bool CuckooTableIterator::Valid() const {
  299. return curr_key_idx_ < sorted_bucket_ids_.size();
  300. }
  301. void CuckooTableIterator::PrepareKVAtCurrIdx() {
  302. if (!Valid()) {
  303. curr_value_.clear();
  304. curr_key_.Clear();
  305. return;
  306. }
  307. uint32_t id = sorted_bucket_ids_[curr_key_idx_];
  308. const char* offset = reader_->file_data_.data() +
  309. id * reader_->bucket_length_;
  310. if (reader_->is_last_level_) {
  311. // Always return internal key.
  312. curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_),
  313. 0, kTypeValue);
  314. } else {
  315. curr_key_.SetInternalKey(Slice(offset, reader_->key_length_));
  316. }
  317. curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_);
  318. }
  319. void CuckooTableIterator::Next() {
  320. if (!Valid()) {
  321. curr_value_.clear();
  322. curr_key_.Clear();
  323. return;
  324. }
  325. ++curr_key_idx_;
  326. PrepareKVAtCurrIdx();
  327. }
  328. void CuckooTableIterator::Prev() {
  329. if (curr_key_idx_ == 0) {
  330. curr_key_idx_ = static_cast<uint32_t>(sorted_bucket_ids_.size());
  331. }
  332. if (!Valid()) {
  333. curr_value_.clear();
  334. curr_key_.Clear();
  335. return;
  336. }
  337. --curr_key_idx_;
  338. PrepareKVAtCurrIdx();
  339. }
  340. Slice CuckooTableIterator::key() const {
  341. assert(Valid());
  342. return curr_key_.GetInternalKey();
  343. }
  344. Slice CuckooTableIterator::value() const {
  345. assert(Valid());
  346. return curr_value_;
  347. }
  348. InternalIterator* CuckooTableReader::NewIterator(
  349. const ReadOptions& /*read_options*/,
  350. const SliceTransform* /* prefix_extractor */, Arena* arena,
  351. bool /*skip_filters*/, TableReaderCaller /*caller*/,
  352. size_t /*compaction_readahead_size*/) {
  353. if (!status().ok()) {
  354. return NewErrorInternalIterator<Slice>(
  355. Status::Corruption("CuckooTableReader status is not okay."), arena);
  356. }
  357. CuckooTableIterator* iter;
  358. if (arena == nullptr) {
  359. iter = new CuckooTableIterator(this);
  360. } else {
  361. auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator));
  362. iter = new (iter_mem) CuckooTableIterator(this);
  363. }
  364. return iter;
  365. }
  366. size_t CuckooTableReader::ApproximateMemoryUsage() const { return 0; }
  367. } // namespace ROCKSDB_NAMESPACE
  368. #endif