| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497 |
- // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
- // This source code is licensed under both the GPLv2 (found in the
- // COPYING file in the root directory) and Apache 2.0 License
- // (found in the LICENSE.Apache file in the root directory).
- #include "trace_replay/block_cache_tracer.h"
- #include <cinttypes>
- #include <cstdio>
- #include <cstdlib>
- #include "db/db_impl/db_impl.h"
- #include "db/dbformat.h"
- #include "rocksdb/slice.h"
- #include "util/coding.h"
- #include "util/hash.h"
- #include "util/string_util.h"
- namespace ROCKSDB_NAMESPACE {
- namespace {
- const unsigned int kCharSize = 1;
- bool ShouldTrace(const Slice& block_key, const TraceOptions& trace_options) {
- if (trace_options.sampling_frequency == 0 ||
- trace_options.sampling_frequency == 1) {
- return true;
- }
- // We use spatial downsampling so that we have a complete access history for a
- // block.
- return 0 == fastrange64(GetSliceNPHash64(block_key),
- trace_options.sampling_frequency);
- }
- } // namespace
- const uint64_t kMicrosInSecond = 1000 * 1000;
- const uint64_t kSecondInMinute = 60;
- const uint64_t kSecondInHour = 3600;
- const std::string BlockCacheTraceHelper::kUnknownColumnFamilyName =
- "UnknownColumnFamily";
- const uint64_t BlockCacheTraceHelper::kReservedGetId = 0;
- bool BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(
- TraceType block_type, TableReaderCaller caller) {
- return (block_type == TraceType::kBlockTraceDataBlock) &&
- IsGetOrMultiGet(caller);
- }
- bool BlockCacheTraceHelper::IsGetOrMultiGet(TableReaderCaller caller) {
- return caller == TableReaderCaller::kUserGet ||
- caller == TableReaderCaller::kUserMultiGet;
- }
- bool BlockCacheTraceHelper::IsUserAccess(TableReaderCaller caller) {
- return caller == TableReaderCaller::kUserGet ||
- caller == TableReaderCaller::kUserMultiGet ||
- caller == TableReaderCaller::kUserIterator ||
- caller == TableReaderCaller::kUserApproximateSize ||
- caller == TableReaderCaller::kUserVerifyChecksum;
- }
- std::string BlockCacheTraceHelper::ComputeRowKey(
- const BlockCacheTraceRecord& access) {
- if (!IsGetOrMultiGet(access.caller)) {
- return "";
- }
- Slice key = ExtractUserKey(access.referenced_key);
- return std::to_string(access.sst_fd_number) + "_" + key.ToString();
- }
- uint64_t BlockCacheTraceHelper::GetTableId(
- const BlockCacheTraceRecord& access) {
- if (!IsGetOrMultiGet(access.caller) || access.referenced_key.size() < 4) {
- return 0;
- }
- return static_cast<uint64_t>(DecodeFixed32(access.referenced_key.data())) + 1;
- }
- uint64_t BlockCacheTraceHelper::GetSequenceNumber(
- const BlockCacheTraceRecord& access) {
- if (!IsGetOrMultiGet(access.caller)) {
- return 0;
- }
- return access.get_from_user_specified_snapshot == Boolean::kFalse
- ? 0
- : 1 + GetInternalKeySeqno(access.referenced_key);
- }
- uint64_t BlockCacheTraceHelper::GetBlockOffsetInFile(
- const BlockCacheTraceRecord& access) {
- Slice input(access.block_key);
- uint64_t offset = 0;
- while (true) {
- uint64_t tmp = 0;
- if (GetVarint64(&input, &tmp)) {
- offset = tmp;
- } else {
- break;
- }
- }
- return offset;
- }
- BlockCacheTraceWriter::BlockCacheTraceWriter(
- Env* env, const TraceOptions& trace_options,
- std::unique_ptr<TraceWriter>&& trace_writer)
- : env_(env),
- trace_options_(trace_options),
- trace_writer_(std::move(trace_writer)) {}
- Status BlockCacheTraceWriter::WriteBlockAccess(
- const BlockCacheTraceRecord& record, const Slice& block_key,
- const Slice& cf_name, const Slice& referenced_key) {
- uint64_t trace_file_size = trace_writer_->GetFileSize();
- if (trace_file_size > trace_options_.max_trace_file_size) {
- return Status::OK();
- }
- Trace trace;
- trace.ts = record.access_timestamp;
- trace.type = record.block_type;
- PutLengthPrefixedSlice(&trace.payload, block_key);
- PutFixed64(&trace.payload, record.block_size);
- PutFixed64(&trace.payload, record.cf_id);
- PutLengthPrefixedSlice(&trace.payload, cf_name);
- PutFixed32(&trace.payload, record.level);
- PutFixed64(&trace.payload, record.sst_fd_number);
- trace.payload.push_back(record.caller);
- trace.payload.push_back(record.is_cache_hit);
- trace.payload.push_back(record.no_insert);
- if (BlockCacheTraceHelper::IsGetOrMultiGet(record.caller)) {
- PutFixed64(&trace.payload, record.get_id);
- trace.payload.push_back(record.get_from_user_specified_snapshot);
- PutLengthPrefixedSlice(&trace.payload, referenced_key);
- }
- if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(record.block_type,
- record.caller)) {
- PutFixed64(&trace.payload, record.referenced_data_size);
- PutFixed64(&trace.payload, record.num_keys_in_block);
- trace.payload.push_back(record.referenced_key_exist_in_block);
- }
- std::string encoded_trace;
- TracerHelper::EncodeTrace(trace, &encoded_trace);
- return trace_writer_->Write(encoded_trace);
- }
- Status BlockCacheTraceWriter::WriteHeader() {
- Trace trace;
- trace.ts = env_->NowMicros();
- trace.type = TraceType::kTraceBegin;
- PutLengthPrefixedSlice(&trace.payload, kTraceMagic);
- PutFixed32(&trace.payload, kMajorVersion);
- PutFixed32(&trace.payload, kMinorVersion);
- std::string encoded_trace;
- TracerHelper::EncodeTrace(trace, &encoded_trace);
- return trace_writer_->Write(encoded_trace);
- }
- BlockCacheTraceReader::BlockCacheTraceReader(
- std::unique_ptr<TraceReader>&& reader)
- : trace_reader_(std::move(reader)) {}
- Status BlockCacheTraceReader::ReadHeader(BlockCacheTraceHeader* header) {
- assert(header != nullptr);
- std::string encoded_trace;
- Status s = trace_reader_->Read(&encoded_trace);
- if (!s.ok()) {
- return s;
- }
- Trace trace;
- s = TracerHelper::DecodeTrace(encoded_trace, &trace);
- if (!s.ok()) {
- return s;
- }
- header->start_time = trace.ts;
- Slice enc_slice = Slice(trace.payload);
- Slice magnic_number;
- if (!GetLengthPrefixedSlice(&enc_slice, &magnic_number)) {
- return Status::Corruption(
- "Corrupted header in the trace file: Failed to read the magic number.");
- }
- if (magnic_number.ToString() != kTraceMagic) {
- return Status::Corruption(
- "Corrupted header in the trace file: Magic number does not match.");
- }
- if (!GetFixed32(&enc_slice, &header->rocksdb_major_version)) {
- return Status::Corruption(
- "Corrupted header in the trace file: Failed to read rocksdb major "
- "version number.");
- }
- if (!GetFixed32(&enc_slice, &header->rocksdb_minor_version)) {
- return Status::Corruption(
- "Corrupted header in the trace file: Failed to read rocksdb minor "
- "version number.");
- }
- // We should have retrieved all information in the header.
- if (!enc_slice.empty()) {
- return Status::Corruption(
- "Corrupted header in the trace file: The length of header is too "
- "long.");
- }
- return Status::OK();
- }
- Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
- assert(record);
- std::string encoded_trace;
- Status s = trace_reader_->Read(&encoded_trace);
- if (!s.ok()) {
- return s;
- }
- Trace trace;
- s = TracerHelper::DecodeTrace(encoded_trace, &trace);
- if (!s.ok()) {
- return s;
- }
- record->access_timestamp = trace.ts;
- record->block_type = trace.type;
- Slice enc_slice = Slice(trace.payload);
- Slice block_key;
- if (!GetLengthPrefixedSlice(&enc_slice, &block_key)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read block key.");
- }
- record->block_key = block_key.ToString();
- if (!GetFixed64(&enc_slice, &record->block_size)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read block size.");
- }
- if (!GetFixed64(&enc_slice, &record->cf_id)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read column family ID.");
- }
- Slice cf_name;
- if (!GetLengthPrefixedSlice(&enc_slice, &cf_name)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read column family name.");
- }
- record->cf_name = cf_name.ToString();
- if (!GetFixed32(&enc_slice, &record->level)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read level.");
- }
- if (!GetFixed64(&enc_slice, &record->sst_fd_number)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read SST file number.");
- }
- if (enc_slice.empty()) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read caller.");
- }
- record->caller = static_cast<TableReaderCaller>(enc_slice[0]);
- enc_slice.remove_prefix(kCharSize);
- if (enc_slice.empty()) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read is_cache_hit.");
- }
- record->is_cache_hit = static_cast<Boolean>(enc_slice[0]);
- enc_slice.remove_prefix(kCharSize);
- if (enc_slice.empty()) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read no_insert.");
- }
- record->no_insert = static_cast<Boolean>(enc_slice[0]);
- enc_slice.remove_prefix(kCharSize);
- if (BlockCacheTraceHelper::IsGetOrMultiGet(record->caller)) {
- if (!GetFixed64(&enc_slice, &record->get_id)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read the get id.");
- }
- if (enc_slice.empty()) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read "
- "get_from_user_specified_snapshot.");
- }
- record->get_from_user_specified_snapshot =
- static_cast<Boolean>(enc_slice[0]);
- enc_slice.remove_prefix(kCharSize);
- Slice referenced_key;
- if (!GetLengthPrefixedSlice(&enc_slice, &referenced_key)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read the referenced key.");
- }
- record->referenced_key = referenced_key.ToString();
- }
- if (BlockCacheTraceHelper::IsGetOrMultiGetOnDataBlock(record->block_type,
- record->caller)) {
- if (!GetFixed64(&enc_slice, &record->referenced_data_size)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read the referenced data size.");
- }
- if (!GetFixed64(&enc_slice, &record->num_keys_in_block)) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read the number of keys in the "
- "block.");
- }
- if (enc_slice.empty()) {
- return Status::Incomplete(
- "Incomplete access record: Failed to read "
- "referenced_key_exist_in_block.");
- }
- record->referenced_key_exist_in_block = static_cast<Boolean>(enc_slice[0]);
- }
- return Status::OK();
- }
- BlockCacheHumanReadableTraceWriter::~BlockCacheHumanReadableTraceWriter() {
- if (human_readable_trace_file_writer_) {
- human_readable_trace_file_writer_->Flush();
- human_readable_trace_file_writer_->Close();
- }
- }
- Status BlockCacheHumanReadableTraceWriter::NewWritableFile(
- const std::string& human_readable_trace_file_path,
- ROCKSDB_NAMESPACE::Env* env) {
- if (human_readable_trace_file_path.empty()) {
- return Status::InvalidArgument(
- "The provided human_readable_trace_file_path is null.");
- }
- return env->NewWritableFile(human_readable_trace_file_path,
- &human_readable_trace_file_writer_, EnvOptions());
- }
- Status BlockCacheHumanReadableTraceWriter::WriteHumanReadableTraceRecord(
- const BlockCacheTraceRecord& access, uint64_t block_id,
- uint64_t get_key_id) {
- if (!human_readable_trace_file_writer_) {
- return Status::OK();
- }
- int ret = snprintf(
- trace_record_buffer_, sizeof(trace_record_buffer_),
- "%" PRIu64 ",%" PRIu64 ",%u,%" PRIu64 ",%" PRIu64 ",%s,%" PRIu32
- ",%" PRIu64 ",%u,%u,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%u,%u,%" PRIu64
- ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%" PRIu64 "\n",
- access.access_timestamp, block_id, access.block_type, access.block_size,
- access.cf_id, access.cf_name.c_str(), access.level, access.sst_fd_number,
- access.caller, access.no_insert, access.get_id, get_key_id,
- access.referenced_data_size, access.is_cache_hit,
- access.referenced_key_exist_in_block, access.num_keys_in_block,
- BlockCacheTraceHelper::GetTableId(access),
- BlockCacheTraceHelper::GetSequenceNumber(access),
- static_cast<uint64_t>(access.block_key.size()),
- static_cast<uint64_t>(access.referenced_key.size()),
- BlockCacheTraceHelper::GetBlockOffsetInFile(access));
- if (ret < 0) {
- return Status::IOError("failed to format the output");
- }
- std::string printout(trace_record_buffer_);
- return human_readable_trace_file_writer_->Append(printout);
- }
- BlockCacheHumanReadableTraceReader::BlockCacheHumanReadableTraceReader(
- const std::string& trace_file_path)
- : BlockCacheTraceReader(/*trace_reader=*/nullptr) {
- human_readable_trace_reader_.open(trace_file_path, std::ifstream::in);
- }
- BlockCacheHumanReadableTraceReader::~BlockCacheHumanReadableTraceReader() {
- human_readable_trace_reader_.close();
- }
- Status BlockCacheHumanReadableTraceReader::ReadHeader(
- BlockCacheTraceHeader* /*header*/) {
- return Status::OK();
- }
- Status BlockCacheHumanReadableTraceReader::ReadAccess(
- BlockCacheTraceRecord* record) {
- std::string line;
- if (!std::getline(human_readable_trace_reader_, line)) {
- return Status::Incomplete("No more records to read.");
- }
- std::stringstream ss(line);
- std::vector<std::string> record_strs;
- while (ss.good()) {
- std::string substr;
- getline(ss, substr, ',');
- record_strs.push_back(substr);
- }
- if (record_strs.size() != 21) {
- return Status::Incomplete("Records format is wrong.");
- }
- record->access_timestamp = ParseUint64(record_strs[0]);
- uint64_t block_key = ParseUint64(record_strs[1]);
- record->block_type = static_cast<TraceType>(ParseUint64(record_strs[2]));
- record->block_size = ParseUint64(record_strs[3]);
- record->cf_id = ParseUint64(record_strs[4]);
- record->cf_name = record_strs[5];
- record->level = static_cast<uint32_t>(ParseUint64(record_strs[6]));
- record->sst_fd_number = ParseUint64(record_strs[7]);
- record->caller = static_cast<TableReaderCaller>(ParseUint64(record_strs[8]));
- record->no_insert = static_cast<Boolean>(ParseUint64(record_strs[9]));
- record->get_id = ParseUint64(record_strs[10]);
- uint64_t get_key_id = ParseUint64(record_strs[11]);
- record->referenced_data_size = ParseUint64(record_strs[12]);
- record->is_cache_hit = static_cast<Boolean>(ParseUint64(record_strs[13]));
- record->referenced_key_exist_in_block =
- static_cast<Boolean>(ParseUint64(record_strs[14]));
- record->num_keys_in_block = ParseUint64(record_strs[15]);
- uint64_t table_id = ParseUint64(record_strs[16]);
- if (table_id > 0) {
- // Decrement since valid table id in the trace file equals traced table id
- // + 1.
- table_id -= 1;
- }
- uint64_t get_sequence_number = ParseUint64(record_strs[17]);
- if (get_sequence_number > 0) {
- record->get_from_user_specified_snapshot = Boolean::kTrue;
- // Decrement since valid seq number in the trace file equals traced seq
- // number + 1.
- get_sequence_number -= 1;
- }
- uint64_t block_key_size = ParseUint64(record_strs[18]);
- uint64_t get_key_size = ParseUint64(record_strs[19]);
- uint64_t block_offset = ParseUint64(record_strs[20]);
- std::string tmp_block_key;
- PutVarint64(&tmp_block_key, block_key);
- PutVarint64(&tmp_block_key, block_offset);
- // Append 1 until the size is the same as traced block key size.
- while (record->block_key.size() < block_key_size - tmp_block_key.size()) {
- record->block_key += "1";
- }
- record->block_key += tmp_block_key;
- if (get_key_id != 0) {
- std::string tmp_get_key;
- PutFixed64(&tmp_get_key, get_key_id);
- PutFixed64(&tmp_get_key, get_sequence_number << 8);
- PutFixed32(&record->referenced_key, static_cast<uint32_t>(table_id));
- // Append 1 until the size is the same as traced key size.
- while (record->referenced_key.size() < get_key_size - tmp_get_key.size()) {
- record->referenced_key += "1";
- }
- record->referenced_key += tmp_get_key;
- }
- return Status::OK();
- }
- BlockCacheTracer::BlockCacheTracer() { writer_.store(nullptr); }
- BlockCacheTracer::~BlockCacheTracer() { EndTrace(); }
- Status BlockCacheTracer::StartTrace(
- Env* env, const TraceOptions& trace_options,
- std::unique_ptr<TraceWriter>&& trace_writer) {
- InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
- if (writer_.load()) {
- return Status::Busy();
- }
- get_id_counter_.store(1);
- trace_options_ = trace_options;
- writer_.store(
- new BlockCacheTraceWriter(env, trace_options, std::move(trace_writer)));
- return writer_.load()->WriteHeader();
- }
- void BlockCacheTracer::EndTrace() {
- InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
- if (!writer_.load()) {
- return;
- }
- delete writer_.load();
- writer_.store(nullptr);
- }
- Status BlockCacheTracer::WriteBlockAccess(const BlockCacheTraceRecord& record,
- const Slice& block_key,
- const Slice& cf_name,
- const Slice& referenced_key) {
- if (!writer_.load() || !ShouldTrace(block_key, trace_options_)) {
- return Status::OK();
- }
- InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
- if (!writer_.load()) {
- return Status::OK();
- }
- return writer_.load()->WriteBlockAccess(record, block_key, cf_name,
- referenced_key);
- }
- uint64_t BlockCacheTracer::NextGetId() {
- if (!writer_.load(std::memory_order_relaxed)) {
- return BlockCacheTraceHelper::kReservedGetId;
- }
- uint64_t prev_value = get_id_counter_.fetch_add(1);
- if (prev_value == BlockCacheTraceHelper::kReservedGetId) {
- // fetch and add again.
- return get_id_counter_.fetch_add(1);
- }
- return prev_value;
- }
- } // namespace ROCKSDB_NAMESPACE
|