file_indexer.cc 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include "db/file_indexer.h"
  10. #include <algorithm>
  11. #include <functional>
  12. #include "db/version_edit.h"
  13. #include "rocksdb/comparator.h"
  14. namespace ROCKSDB_NAMESPACE {
  15. FileIndexer::FileIndexer(const Comparator* ucmp)
  16. : num_levels_(0), ucmp_(ucmp), level_rb_(nullptr) {}
  17. size_t FileIndexer::NumLevelIndex() const { return next_level_index_.size(); }
  18. size_t FileIndexer::LevelIndexSize(size_t level) const {
  19. if (level >= next_level_index_.size()) {
  20. return 0;
  21. }
  22. return next_level_index_[level].num_index;
  23. }
  24. void FileIndexer::GetNextLevelIndex(const size_t level, const size_t file_index,
  25. const int cmp_smallest,
  26. const int cmp_largest, int32_t* left_bound,
  27. int32_t* right_bound) const {
  28. assert(level > 0);
  29. // Last level, no hint
  30. if (level == num_levels_ - 1) {
  31. *left_bound = 0;
  32. *right_bound = -1;
  33. return;
  34. }
  35. assert(level < num_levels_ - 1);
  36. assert(static_cast<int32_t>(file_index) <= level_rb_[level]);
  37. const IndexUnit* index_units = next_level_index_[level].index_units;
  38. const auto& index = index_units[file_index];
  39. if (cmp_smallest < 0) {
  40. *left_bound = (level > 0 && file_index > 0)
  41. ? index_units[file_index - 1].largest_lb
  42. : 0;
  43. *right_bound = index.smallest_rb;
  44. } else if (cmp_smallest == 0) {
  45. *left_bound = index.smallest_lb;
  46. *right_bound = index.smallest_rb;
  47. } else if (cmp_smallest > 0 && cmp_largest < 0) {
  48. *left_bound = index.smallest_lb;
  49. *right_bound = index.largest_rb;
  50. } else if (cmp_largest == 0) {
  51. *left_bound = index.largest_lb;
  52. *right_bound = index.largest_rb;
  53. } else if (cmp_largest > 0) {
  54. *left_bound = index.largest_lb;
  55. *right_bound = level_rb_[level + 1];
  56. } else {
  57. assert(false);
  58. }
  59. assert(*left_bound >= 0);
  60. assert(*left_bound <= *right_bound + 1);
  61. assert(*right_bound <= level_rb_[level + 1]);
  62. }
  63. void FileIndexer::UpdateIndex(Arena* arena, const size_t num_levels,
  64. std::vector<FileMetaData*>* const files) {
  65. if (files == nullptr) {
  66. return;
  67. }
  68. if (num_levels == 0) { // uint_32 0-1 would cause bad behavior
  69. num_levels_ = num_levels;
  70. return;
  71. }
  72. assert(level_rb_ == nullptr); // level_rb_ should be init here
  73. num_levels_ = num_levels;
  74. next_level_index_.resize(num_levels);
  75. char* mem = arena->AllocateAligned(num_levels_ * sizeof(int32_t));
  76. level_rb_ = new (mem) int32_t[num_levels_];
  77. for (size_t i = 0; i < num_levels_; i++) {
  78. level_rb_[i] = -1;
  79. }
  80. // L1 - Ln-1
  81. for (size_t level = 1; level < num_levels_ - 1; ++level) {
  82. const auto& upper_files = files[level];
  83. const int32_t upper_size = static_cast<int32_t>(upper_files.size());
  84. const auto& lower_files = files[level + 1];
  85. level_rb_[level] = static_cast<int32_t>(upper_files.size()) - 1;
  86. if (upper_size == 0) {
  87. continue;
  88. }
  89. IndexLevel& index_level = next_level_index_[level];
  90. index_level.num_index = upper_size;
  91. mem = arena->AllocateAligned(upper_size * sizeof(IndexUnit));
  92. index_level.index_units = new (mem) IndexUnit[upper_size];
  93. CalculateLB(
  94. upper_files, lower_files, &index_level,
  95. [this](const FileMetaData* a, const FileMetaData* b) -> int {
  96. return ucmp_->CompareWithoutTimestamp(a->smallest.user_key(),
  97. b->largest.user_key());
  98. },
  99. [](IndexUnit* index, int32_t f_idx) { index->smallest_lb = f_idx; });
  100. CalculateLB(
  101. upper_files, lower_files, &index_level,
  102. [this](const FileMetaData* a, const FileMetaData* b) -> int {
  103. return ucmp_->CompareWithoutTimestamp(a->largest.user_key(),
  104. b->largest.user_key());
  105. },
  106. [](IndexUnit* index, int32_t f_idx) { index->largest_lb = f_idx; });
  107. CalculateRB(
  108. upper_files, lower_files, &index_level,
  109. [this](const FileMetaData* a, const FileMetaData* b) -> int {
  110. return ucmp_->CompareWithoutTimestamp(a->smallest.user_key(),
  111. b->smallest.user_key());
  112. },
  113. [](IndexUnit* index, int32_t f_idx) { index->smallest_rb = f_idx; });
  114. CalculateRB(
  115. upper_files, lower_files, &index_level,
  116. [this](const FileMetaData* a, const FileMetaData* b) -> int {
  117. return ucmp_->CompareWithoutTimestamp(a->largest.user_key(),
  118. b->smallest.user_key());
  119. },
  120. [](IndexUnit* index, int32_t f_idx) { index->largest_rb = f_idx; });
  121. }
  122. level_rb_[num_levels_ - 1] =
  123. static_cast<int32_t>(files[num_levels_ - 1].size()) - 1;
  124. }
  125. void FileIndexer::CalculateLB(
  126. const std::vector<FileMetaData*>& upper_files,
  127. const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
  128. std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
  129. std::function<void(IndexUnit*, int32_t)> set_index) {
  130. const int32_t upper_size = static_cast<int32_t>(upper_files.size());
  131. const int32_t lower_size = static_cast<int32_t>(lower_files.size());
  132. int32_t upper_idx = 0;
  133. int32_t lower_idx = 0;
  134. IndexUnit* index = index_level->index_units;
  135. while (upper_idx < upper_size && lower_idx < lower_size) {
  136. int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
  137. if (cmp == 0) {
  138. set_index(&index[upper_idx], lower_idx);
  139. ++upper_idx;
  140. } else if (cmp > 0) {
  141. // Lower level's file (largest) is smaller, a key won't hit in that
  142. // file. Move to next lower file
  143. ++lower_idx;
  144. } else {
  145. // Lower level's file becomes larger, update the index, and
  146. // move to the next upper file
  147. set_index(&index[upper_idx], lower_idx);
  148. ++upper_idx;
  149. }
  150. }
  151. while (upper_idx < upper_size) {
  152. // Lower files are exhausted, that means the remaining upper files are
  153. // greater than any lower files. Set the index to be the lower level size.
  154. set_index(&index[upper_idx], lower_size);
  155. ++upper_idx;
  156. }
  157. }
  158. void FileIndexer::CalculateRB(
  159. const std::vector<FileMetaData*>& upper_files,
  160. const std::vector<FileMetaData*>& lower_files, IndexLevel* index_level,
  161. std::function<int(const FileMetaData*, const FileMetaData*)> cmp_op,
  162. std::function<void(IndexUnit*, int32_t)> set_index) {
  163. const int32_t upper_size = static_cast<int32_t>(upper_files.size());
  164. const int32_t lower_size = static_cast<int32_t>(lower_files.size());
  165. int32_t upper_idx = upper_size - 1;
  166. int32_t lower_idx = lower_size - 1;
  167. IndexUnit* index = index_level->index_units;
  168. while (upper_idx >= 0 && lower_idx >= 0) {
  169. int cmp = cmp_op(upper_files[upper_idx], lower_files[lower_idx]);
  170. if (cmp == 0) {
  171. set_index(&index[upper_idx], lower_idx);
  172. --upper_idx;
  173. } else if (cmp < 0) {
  174. // Lower level's file (smallest) is larger, a key won't hit in that
  175. // file. Move to next lower file.
  176. --lower_idx;
  177. } else {
  178. // Lower level's file becomes smaller, update the index, and move to
  179. // the next the upper file
  180. set_index(&index[upper_idx], lower_idx);
  181. --upper_idx;
  182. }
  183. }
  184. while (upper_idx >= 0) {
  185. // Lower files are exhausted, that means the remaining upper files are
  186. // smaller than any lower files. Set it to -1.
  187. set_index(&index[upper_idx], -1);
  188. --upper_idx;
  189. }
  190. }
  191. } // namespace ROCKSDB_NAMESPACE