| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- //
- // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style license that can be
- // found in the LICENSE file. See the AUTHORS file for names of contributors.
- #ifndef ROCKSDB_LITE
- #include "table/cuckoo/cuckoo_table_reader.h"
- #include <algorithm>
- #include <limits>
- #include <string>
- #include <utility>
- #include <vector>
- #include "memory/arena.h"
- #include "rocksdb/iterator.h"
- #include "rocksdb/table.h"
- #include "table/cuckoo/cuckoo_table_factory.h"
- #include "table/get_context.h"
- #include "table/internal_iterator.h"
- #include "table/meta_blocks.h"
- #include "util/coding.h"
- namespace ROCKSDB_NAMESPACE {
- namespace {
- const uint64_t CACHE_LINE_MASK = ~((uint64_t)CACHE_LINE_SIZE - 1);
- const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
- }
- extern const uint64_t kCuckooTableMagicNumber;
- CuckooTableReader::CuckooTableReader(
- const ImmutableCFOptions& ioptions,
- std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
- const Comparator* comparator,
- uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
- : file_(std::move(file)),
- is_last_level_(false),
- identity_as_first_hash_(false),
- use_module_hash_(false),
- num_hash_func_(0),
- unused_key_(""),
- key_length_(0),
- user_key_length_(0),
- value_length_(0),
- bucket_length_(0),
- cuckoo_block_size_(0),
- cuckoo_block_bytes_minus_one_(0),
- table_size_(0),
- ucomp_(comparator),
- get_slice_hash_(get_slice_hash) {
- if (!ioptions.allow_mmap_reads) {
- status_ = Status::InvalidArgument("File is not mmaped");
- }
- TableProperties* props = nullptr;
- status_ = ReadTableProperties(file_.get(), file_size, kCuckooTableMagicNumber,
- ioptions, &props, true /* compression_type_missing */);
- if (!status_.ok()) {
- return;
- }
- table_props_.reset(props);
- auto& user_props = props->user_collected_properties;
- auto hash_funs = user_props.find(CuckooTablePropertyNames::kNumHashFunc);
- if (hash_funs == user_props.end()) {
- status_ = Status::Corruption("Number of hash functions not found");
- return;
- }
- num_hash_func_ = *reinterpret_cast<const uint32_t*>(hash_funs->second.data());
- auto unused_key = user_props.find(CuckooTablePropertyNames::kEmptyKey);
- if (unused_key == user_props.end()) {
- status_ = Status::Corruption("Empty bucket value not found");
- return;
- }
- unused_key_ = unused_key->second;
- key_length_ = static_cast<uint32_t>(props->fixed_key_len);
- auto user_key_len = user_props.find(CuckooTablePropertyNames::kUserKeyLength);
- if (user_key_len == user_props.end()) {
- status_ = Status::Corruption("User key length not found");
- return;
- }
- user_key_length_ = *reinterpret_cast<const uint32_t*>(
- user_key_len->second.data());
- auto value_length = user_props.find(CuckooTablePropertyNames::kValueLength);
- if (value_length == user_props.end()) {
- status_ = Status::Corruption("Value length not found");
- return;
- }
- value_length_ = *reinterpret_cast<const uint32_t*>(
- value_length->second.data());
- bucket_length_ = key_length_ + value_length_;
- auto hash_table_size = user_props.find(
- CuckooTablePropertyNames::kHashTableSize);
- if (hash_table_size == user_props.end()) {
- status_ = Status::Corruption("Hash table size not found");
- return;
- }
- table_size_ = *reinterpret_cast<const uint64_t*>(
- hash_table_size->second.data());
- auto is_last_level = user_props.find(CuckooTablePropertyNames::kIsLastLevel);
- if (is_last_level == user_props.end()) {
- status_ = Status::Corruption("Is last level not found");
- return;
- }
- is_last_level_ = *reinterpret_cast<const bool*>(is_last_level->second.data());
- auto identity_as_first_hash = user_props.find(
- CuckooTablePropertyNames::kIdentityAsFirstHash);
- if (identity_as_first_hash == user_props.end()) {
- status_ = Status::Corruption("identity as first hash not found");
- return;
- }
- identity_as_first_hash_ = *reinterpret_cast<const bool*>(
- identity_as_first_hash->second.data());
- auto use_module_hash = user_props.find(
- CuckooTablePropertyNames::kUseModuleHash);
- if (use_module_hash == user_props.end()) {
- status_ = Status::Corruption("hash type is not found");
- return;
- }
- use_module_hash_ = *reinterpret_cast<const bool*>(
- use_module_hash->second.data());
- auto cuckoo_block_size = user_props.find(
- CuckooTablePropertyNames::kCuckooBlockSize);
- if (cuckoo_block_size == user_props.end()) {
- status_ = Status::Corruption("Cuckoo block size not found");
- return;
- }
- cuckoo_block_size_ = *reinterpret_cast<const uint32_t*>(
- cuckoo_block_size->second.data());
- cuckoo_block_bytes_minus_one_ = cuckoo_block_size_ * bucket_length_ - 1;
- status_ = file_->Read(0, static_cast<size_t>(file_size), &file_data_, nullptr);
- }
- Status CuckooTableReader::Get(const ReadOptions& /*readOptions*/,
- const Slice& key, GetContext* get_context,
- const SliceTransform* /* prefix_extractor */,
- bool /*skip_filters*/) {
- assert(key.size() == key_length_ + (is_last_level_ ? 8 : 0));
- Slice user_key = ExtractUserKey(key);
- for (uint32_t hash_cnt = 0; hash_cnt < num_hash_func_; ++hash_cnt) {
- uint64_t offset = bucket_length_ * CuckooHash(
- user_key, hash_cnt, use_module_hash_, table_size_,
- identity_as_first_hash_, get_slice_hash_);
- const char* bucket = &file_data_.data()[offset];
- for (uint32_t block_idx = 0; block_idx < cuckoo_block_size_;
- ++block_idx, bucket += bucket_length_) {
- if (ucomp_->Equal(Slice(unused_key_.data(), user_key.size()),
- Slice(bucket, user_key.size()))) {
- return Status::OK();
- }
- // Here, we compare only the user key part as we support only one entry
- // per user key and we don't support snapshot.
- if (ucomp_->Equal(user_key, Slice(bucket, user_key.size()))) {
- Slice value(bucket + key_length_, value_length_);
- if (is_last_level_) {
- // Sequence number is not stored at the last level, so we will use
- // kMaxSequenceNumber since it is unknown. This could cause some
- // transactions to fail to lock a key due to known sequence number.
- // However, it is expected for anyone to use a CuckooTable in a
- // TransactionDB.
- get_context->SaveValue(value, kMaxSequenceNumber);
- } else {
- Slice full_key(bucket, key_length_);
- ParsedInternalKey found_ikey;
- ParseInternalKey(full_key, &found_ikey);
- bool dont_care __attribute__((__unused__));
- get_context->SaveValue(found_ikey, value, &dont_care);
- }
- // We don't support merge operations. So, we return here.
- return Status::OK();
- }
- }
- }
- return Status::OK();
- }
- void CuckooTableReader::Prepare(const Slice& key) {
- // Prefetch the first Cuckoo Block.
- Slice user_key = ExtractUserKey(key);
- uint64_t addr = reinterpret_cast<uint64_t>(file_data_.data()) +
- bucket_length_ * CuckooHash(user_key, 0, use_module_hash_, table_size_,
- identity_as_first_hash_, nullptr);
- uint64_t end_addr = addr + cuckoo_block_bytes_minus_one_;
- for (addr &= CACHE_LINE_MASK; addr < end_addr; addr += CACHE_LINE_SIZE) {
- PREFETCH(reinterpret_cast<const char*>(addr), 0, 3);
- }
- }
- class CuckooTableIterator : public InternalIterator {
- public:
- explicit CuckooTableIterator(CuckooTableReader* reader);
- // No copying allowed
- CuckooTableIterator(const CuckooTableIterator&) = delete;
- void operator=(const Iterator&) = delete;
- ~CuckooTableIterator() override {}
- bool Valid() const override;
- void SeekToFirst() override;
- void SeekToLast() override;
- void Seek(const Slice& target) override;
- void SeekForPrev(const Slice& target) override;
- void Next() override;
- void Prev() override;
- Slice key() const override;
- Slice value() const override;
- Status status() const override { return Status::OK(); }
- void InitIfNeeded();
- private:
- struct BucketComparator {
- BucketComparator(const Slice& file_data, const Comparator* ucomp,
- uint32_t bucket_len, uint32_t user_key_len,
- const Slice& target = Slice())
- : file_data_(file_data),
- ucomp_(ucomp),
- bucket_len_(bucket_len),
- user_key_len_(user_key_len),
- target_(target) {}
- bool operator()(const uint32_t first, const uint32_t second) const {
- const char* first_bucket =
- (first == kInvalidIndex) ? target_.data() :
- &file_data_.data()[first * bucket_len_];
- const char* second_bucket =
- (second == kInvalidIndex) ? target_.data() :
- &file_data_.data()[second * bucket_len_];
- return ucomp_->Compare(Slice(first_bucket, user_key_len_),
- Slice(second_bucket, user_key_len_)) < 0;
- }
- private:
- const Slice file_data_;
- const Comparator* ucomp_;
- const uint32_t bucket_len_;
- const uint32_t user_key_len_;
- const Slice target_;
- };
- const BucketComparator bucket_comparator_;
- void PrepareKVAtCurrIdx();
- CuckooTableReader* reader_;
- bool initialized_;
- // Contains a map of keys to bucket_id sorted in key order.
- std::vector<uint32_t> sorted_bucket_ids_;
- // We assume that the number of items can be stored in uint32 (4 Billion).
- uint32_t curr_key_idx_;
- Slice curr_value_;
- IterKey curr_key_;
- };
- CuckooTableIterator::CuckooTableIterator(CuckooTableReader* reader)
- : bucket_comparator_(reader->file_data_, reader->ucomp_,
- reader->bucket_length_, reader->user_key_length_),
- reader_(reader),
- initialized_(false),
- curr_key_idx_(kInvalidIndex) {
- sorted_bucket_ids_.clear();
- curr_value_.clear();
- curr_key_.Clear();
- }
- void CuckooTableIterator::InitIfNeeded() {
- if (initialized_) {
- return;
- }
- sorted_bucket_ids_.reserve(static_cast<size_t>(reader_->GetTableProperties()->num_entries));
- uint64_t num_buckets = reader_->table_size_ + reader_->cuckoo_block_size_ - 1;
- assert(num_buckets < kInvalidIndex);
- const char* bucket = reader_->file_data_.data();
- for (uint32_t bucket_id = 0; bucket_id < num_buckets; ++bucket_id) {
- if (Slice(bucket, reader_->key_length_) != Slice(reader_->unused_key_)) {
- sorted_bucket_ids_.push_back(bucket_id);
- }
- bucket += reader_->bucket_length_;
- }
- assert(sorted_bucket_ids_.size() ==
- reader_->GetTableProperties()->num_entries);
- std::sort(sorted_bucket_ids_.begin(), sorted_bucket_ids_.end(),
- bucket_comparator_);
- curr_key_idx_ = kInvalidIndex;
- initialized_ = true;
- }
- void CuckooTableIterator::SeekToFirst() {
- InitIfNeeded();
- curr_key_idx_ = 0;
- PrepareKVAtCurrIdx();
- }
- void CuckooTableIterator::SeekToLast() {
- InitIfNeeded();
- curr_key_idx_ = static_cast<uint32_t>(sorted_bucket_ids_.size()) - 1;
- PrepareKVAtCurrIdx();
- }
- void CuckooTableIterator::Seek(const Slice& target) {
- InitIfNeeded();
- const BucketComparator seek_comparator(
- reader_->file_data_, reader_->ucomp_,
- reader_->bucket_length_, reader_->user_key_length_,
- ExtractUserKey(target));
- auto seek_it = std::lower_bound(sorted_bucket_ids_.begin(),
- sorted_bucket_ids_.end(),
- kInvalidIndex,
- seek_comparator);
- curr_key_idx_ =
- static_cast<uint32_t>(std::distance(sorted_bucket_ids_.begin(), seek_it));
- PrepareKVAtCurrIdx();
- }
- void CuckooTableIterator::SeekForPrev(const Slice& /*target*/) {
- // Not supported
- assert(false);
- }
- bool CuckooTableIterator::Valid() const {
- return curr_key_idx_ < sorted_bucket_ids_.size();
- }
- void CuckooTableIterator::PrepareKVAtCurrIdx() {
- if (!Valid()) {
- curr_value_.clear();
- curr_key_.Clear();
- return;
- }
- uint32_t id = sorted_bucket_ids_[curr_key_idx_];
- const char* offset = reader_->file_data_.data() +
- id * reader_->bucket_length_;
- if (reader_->is_last_level_) {
- // Always return internal key.
- curr_key_.SetInternalKey(Slice(offset, reader_->user_key_length_),
- 0, kTypeValue);
- } else {
- curr_key_.SetInternalKey(Slice(offset, reader_->key_length_));
- }
- curr_value_ = Slice(offset + reader_->key_length_, reader_->value_length_);
- }
- void CuckooTableIterator::Next() {
- if (!Valid()) {
- curr_value_.clear();
- curr_key_.Clear();
- return;
- }
- ++curr_key_idx_;
- PrepareKVAtCurrIdx();
- }
- void CuckooTableIterator::Prev() {
- if (curr_key_idx_ == 0) {
- curr_key_idx_ = static_cast<uint32_t>(sorted_bucket_ids_.size());
- }
- if (!Valid()) {
- curr_value_.clear();
- curr_key_.Clear();
- return;
- }
- --curr_key_idx_;
- PrepareKVAtCurrIdx();
- }
- Slice CuckooTableIterator::key() const {
- assert(Valid());
- return curr_key_.GetInternalKey();
- }
- Slice CuckooTableIterator::value() const {
- assert(Valid());
- return curr_value_;
- }
- InternalIterator* CuckooTableReader::NewIterator(
- const ReadOptions& /*read_options*/,
- const SliceTransform* /* prefix_extractor */, Arena* arena,
- bool /*skip_filters*/, TableReaderCaller /*caller*/,
- size_t /*compaction_readahead_size*/) {
- if (!status().ok()) {
- return NewErrorInternalIterator<Slice>(
- Status::Corruption("CuckooTableReader status is not okay."), arena);
- }
- CuckooTableIterator* iter;
- if (arena == nullptr) {
- iter = new CuckooTableIterator(this);
- } else {
- auto iter_mem = arena->AllocateAligned(sizeof(CuckooTableIterator));
- iter = new (iter_mem) CuckooTableIterator(this);
- }
- return iter;
- }
- size_t CuckooTableReader::ApproximateMemoryUsage() const { return 0; }
- } // namespace ROCKSDB_NAMESPACE
- #endif
|