plain_table_key_coding.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #pragma once
  6. #include <array>
  7. #include "rocksdb/slice.h"
  8. #include "table/plain/plain_table_reader.h"
  9. // The file contains three helper classes of PlainTable format,
  10. // PlainTableKeyEncoder, PlainTableKeyDecoder and PlainTableFileReader.
  11. // These classes issue the lowest level of operations of PlainTable.
  12. // Actual data format of the key is documented in comments of class
  13. // PlainTableFactory.
  14. namespace ROCKSDB_NAMESPACE {
  15. class WritableFile;
  16. struct ParsedInternalKey;
  17. struct PlainTableReaderFileInfo;
  18. enum PlainTableEntryType : unsigned char;
  19. // Helper class for PlainTable format to write out a key to an output file
  20. // The class is used in PlainTableBuilder.
  21. class PlainTableKeyEncoder {
  22. public:
  23. explicit PlainTableKeyEncoder(EncodingType encoding_type,
  24. uint32_t user_key_len,
  25. const SliceTransform* prefix_extractor,
  26. size_t index_sparseness)
  27. : encoding_type_((prefix_extractor != nullptr) ? encoding_type : kPlain),
  28. fixed_user_key_len_(user_key_len),
  29. prefix_extractor_(prefix_extractor),
  30. index_sparseness_((index_sparseness > 1) ? index_sparseness : 1),
  31. key_count_for_prefix_(0) {}
  32. // key: the key to write out, in the format of internal key.
  33. // file: the output file to write out
  34. // offset: offset in the file. Needs to be updated after appending bytes
  35. // for the key
  36. // meta_bytes_buf: buffer for extra meta bytes
  37. // meta_bytes_buf_size: offset to append extra meta bytes. Will be updated
  38. // if meta_bytes_buf is updated.
  39. IOStatus AppendKey(const Slice& key, WritableFileWriter* file,
  40. uint64_t* offset, char* meta_bytes_buf,
  41. size_t* meta_bytes_buf_size);
  42. // Return actual encoding type to be picked
  43. EncodingType GetEncodingType() { return encoding_type_; }
  44. private:
  45. EncodingType encoding_type_;
  46. uint32_t fixed_user_key_len_;
  47. const SliceTransform* prefix_extractor_;
  48. const size_t index_sparseness_;
  49. size_t key_count_for_prefix_;
  50. IterKey pre_prefix_;
  51. };
  52. // The class does raw file reads for PlainTableReader.
  53. // It hides whether it is a mmap-read, or a non-mmap read.
  54. // The class is implemented in a way to favor the performance of mmap case.
  55. // The class is used by PlainTableReader.
  56. class PlainTableFileReader {
  57. public:
  58. explicit PlainTableFileReader(const PlainTableReaderFileInfo* _file_info)
  59. : file_info_(_file_info), num_buf_(0) {}
  60. ~PlainTableFileReader() {
  61. // Should fix.
  62. status_.PermitUncheckedError();
  63. }
  64. // In mmaped mode, the results point to mmaped area of the file, which
  65. // means it is always valid before closing the file.
  66. // In non-mmap mode, the results point to an internal buffer. If the caller
  67. // makes another read call, the results may not be valid. So callers should
  68. // make a copy when needed.
  69. // In order to save read calls to files, we keep two internal buffers:
  70. // the first read and the most recent read. This is efficient because it
  71. // columns these two common use cases:
  72. // (1) hash index only identify one location, we read the key to verify
  73. // the location, and read key and value if it is the right location.
  74. // (2) after hash index checking, we identify two locations (because of
  75. // hash bucket conflicts), we binary search the two location to see
  76. // which one is what we need and start to read from the location.
  77. // These two most common use cases will be covered by the two buffers
  78. // so that we don't need to re-read the same location.
  79. // Currently we keep a fixed size buffer. If a read doesn't exactly fit
  80. // the buffer, we replace the second buffer with the location user reads.
  81. //
  82. // If return false, status code is stored in status_.
  83. bool Read(uint32_t file_offset, uint32_t len, Slice* out) {
  84. if (file_info_->is_mmap_mode) {
  85. assert(file_offset + len <= file_info_->data_end_offset);
  86. *out = Slice(file_info_->file_data.data() + file_offset, len);
  87. return true;
  88. } else {
  89. return ReadNonMmap(file_offset, len, out);
  90. }
  91. }
  92. // If return false, status code is stored in status_.
  93. bool ReadNonMmap(uint32_t file_offset, uint32_t len, Slice* output);
  94. // *bytes_read = 0 means eof. false means failure and status is saved
  95. // in status_. Not directly returning Status to save copying status
  96. // object to map previous performance of mmap mode.
  97. inline bool ReadVarint32(uint32_t offset, uint32_t* output,
  98. uint32_t* bytes_read);
  99. bool ReadVarint32NonMmap(uint32_t offset, uint32_t* output,
  100. uint32_t* bytes_read);
  101. Status status() const { return status_; }
  102. const PlainTableReaderFileInfo* file_info() { return file_info_; }
  103. private:
  104. const PlainTableReaderFileInfo* file_info_;
  105. struct Buffer {
  106. Buffer() : buf_start_offset(0), buf_len(0), buf_capacity(0) {}
  107. std::unique_ptr<char[]> buf;
  108. uint32_t buf_start_offset;
  109. uint32_t buf_len;
  110. uint32_t buf_capacity;
  111. };
  112. // Keep buffers for two recent reads.
  113. std::array<std::unique_ptr<Buffer>, 2> buffers_;
  114. uint32_t num_buf_;
  115. Status status_;
  116. Slice GetFromBuffer(Buffer* buf, uint32_t file_offset, uint32_t len);
  117. };
  118. // A helper class to decode keys from input buffer
  119. // The class is used by PlainTableBuilder.
  120. class PlainTableKeyDecoder {
  121. public:
  122. explicit PlainTableKeyDecoder(const PlainTableReaderFileInfo* file_info,
  123. EncodingType encoding_type,
  124. uint32_t user_key_len,
  125. const SliceTransform* prefix_extractor)
  126. : file_reader_(file_info),
  127. encoding_type_(encoding_type),
  128. prefix_len_(0),
  129. fixed_user_key_len_(user_key_len),
  130. prefix_extractor_(prefix_extractor),
  131. in_prefix_(false) {}
  132. // Find the next key.
  133. // start: char array where the key starts.
  134. // limit: boundary of the char array
  135. // parsed_key: the output of the result key
  136. // internal_key: if not null, fill with the output of the result key in
  137. // un-parsed format
  138. // bytes_read: how many bytes read from start. Output
  139. // seekable: whether key can be read from this place. Used when building
  140. // indexes. Output.
  141. Status NextKey(uint32_t start_offset, ParsedInternalKey* parsed_key,
  142. Slice* internal_key, Slice* value, uint32_t* bytes_read,
  143. bool* seekable = nullptr);
  144. Status NextKeyNoValue(uint32_t start_offset, ParsedInternalKey* parsed_key,
  145. Slice* internal_key, uint32_t* bytes_read,
  146. bool* seekable = nullptr);
  147. PlainTableFileReader file_reader_;
  148. EncodingType encoding_type_;
  149. uint32_t prefix_len_;
  150. uint32_t fixed_user_key_len_;
  151. Slice saved_user_key_;
  152. IterKey cur_key_;
  153. const SliceTransform* prefix_extractor_;
  154. bool in_prefix_;
  155. private:
  156. Status NextPlainEncodingKey(uint32_t start_offset,
  157. ParsedInternalKey* parsed_key,
  158. Slice* internal_key, uint32_t* bytes_read,
  159. bool* seekable = nullptr);
  160. Status NextPrefixEncodingKey(uint32_t start_offset,
  161. ParsedInternalKey* parsed_key,
  162. Slice* internal_key, uint32_t* bytes_read,
  163. bool* seekable = nullptr);
  164. Status ReadInternalKey(uint32_t file_offset, uint32_t user_key_size,
  165. ParsedInternalKey* parsed_key, uint32_t* bytes_read,
  166. bool* internal_key_valid, Slice* internal_key);
  167. inline Status DecodeSize(uint32_t start_offset,
  168. PlainTableEntryType* entry_type, uint32_t* key_size,
  169. uint32_t* bytes_read);
  170. };
  171. } // namespace ROCKSDB_NAMESPACE