plain_table_db_test.cc 49 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. //
  6. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
  7. // Use of this source code is governed by a BSD-style license that can be
  8. // found in the LICENSE file. See the AUTHORS file for names of contributors.
  9. #include <algorithm>
  10. #include <set>
  11. #include "db/db_impl/db_impl.h"
  12. #include "db/version_set.h"
  13. #include "db/write_batch_internal.h"
  14. #include "file/filename.h"
  15. #include "rocksdb/cache.h"
  16. #include "rocksdb/compaction_filter.h"
  17. #include "rocksdb/db.h"
  18. #include "rocksdb/env.h"
  19. #include "rocksdb/filter_policy.h"
  20. #include "rocksdb/slice_transform.h"
  21. #include "rocksdb/table.h"
  22. #include "table/meta_blocks.h"
  23. #include "table/plain/plain_table_bloom.h"
  24. #include "table/plain/plain_table_factory.h"
  25. #include "table/plain/plain_table_key_coding.h"
  26. #include "table/plain/plain_table_reader.h"
  27. #include "table/table_builder.h"
  28. #include "test_util/testharness.h"
  29. #include "test_util/testutil.h"
  30. #include "util/cast_util.h"
  31. #include "util/hash.h"
  32. #include "util/mutexlock.h"
  33. #include "util/random.h"
  34. #include "util/string_util.h"
  35. #include "utilities/merge_operators.h"
  36. namespace ROCKSDB_NAMESPACE {
  37. class PlainTableKeyDecoderTest : public testing::Test {};
  38. TEST_F(PlainTableKeyDecoderTest, ReadNonMmap) {
  39. Random rnd(301);
  40. const uint32_t kLength = 2222;
  41. std::string tmp = rnd.RandomString(kLength);
  42. Slice contents(tmp);
  43. test::StringSource* string_source =
  44. new test::StringSource(contents, 0, false);
  45. std::unique_ptr<FSRandomAccessFile> holder(string_source);
  46. std::unique_ptr<RandomAccessFileReader> file_reader(
  47. new RandomAccessFileReader(std::move(holder), "test"));
  48. std::unique_ptr<PlainTableReaderFileInfo> file_info(
  49. new PlainTableReaderFileInfo(std::move(file_reader), EnvOptions(),
  50. kLength));
  51. {
  52. PlainTableFileReader reader(file_info.get());
  53. const uint32_t kReadSize = 77;
  54. for (uint32_t pos = 0; pos < kLength; pos += kReadSize) {
  55. uint32_t read_size = std::min(kLength - pos, kReadSize);
  56. Slice out;
  57. ASSERT_TRUE(reader.Read(pos, read_size, &out));
  58. ASSERT_EQ(0, out.compare(tmp.substr(pos, read_size)));
  59. }
  60. ASSERT_LT(uint32_t(string_source->total_reads()), kLength / kReadSize / 2);
  61. }
  62. std::vector<std::vector<std::pair<uint32_t, uint32_t>>> reads = {
  63. {{600, 30}, {590, 30}, {600, 20}, {600, 40}},
  64. {{800, 20}, {100, 20}, {500, 20}, {1500, 20}, {100, 20}, {80, 20}},
  65. {{1000, 20}, {500, 20}, {1000, 50}},
  66. {{1000, 20}, {500, 20}, {500, 20}},
  67. {{1000, 20}, {500, 20}, {200, 20}, {500, 20}},
  68. {{1000, 20}, {500, 20}, {200, 20}, {1000, 50}},
  69. {{600, 500}, {610, 20}, {100, 20}},
  70. {{500, 100}, {490, 100}, {550, 50}},
  71. };
  72. std::vector<int> num_file_reads = {2, 6, 2, 2, 4, 3, 2, 2};
  73. for (size_t i = 0; i < reads.size(); i++) {
  74. string_source->set_total_reads(0);
  75. PlainTableFileReader reader(file_info.get());
  76. for (auto p : reads[i]) {
  77. Slice out;
  78. ASSERT_TRUE(reader.Read(p.first, p.second, &out));
  79. ASSERT_EQ(0, out.compare(tmp.substr(p.first, p.second)));
  80. }
  81. ASSERT_EQ(num_file_reads[i], string_source->total_reads());
  82. }
  83. }
  84. class PlainTableDBTest : public testing::Test,
  85. public testing::WithParamInterface<bool> {
  86. protected:
  87. private:
  88. std::string dbname_;
  89. Env* env_;
  90. DB* db_;
  91. bool mmap_mode_;
  92. Options last_options_;
  93. public:
  94. PlainTableDBTest() : env_(Env::Default()) {}
  95. ~PlainTableDBTest() override {
  96. delete db_;
  97. EXPECT_OK(DestroyDB(dbname_, Options()));
  98. }
  99. void SetUp() override {
  100. mmap_mode_ = GetParam();
  101. dbname_ = test::PerThreadDBPath("plain_table_db_test");
  102. EXPECT_OK(DestroyDB(dbname_, Options()));
  103. db_ = nullptr;
  104. Reopen();
  105. }
  106. // Return the current option configuration.
  107. Options CurrentOptions() {
  108. Options options;
  109. options.level_compaction_dynamic_level_bytes = false;
  110. PlainTableOptions plain_table_options;
  111. plain_table_options.user_key_len = 0;
  112. plain_table_options.bloom_bits_per_key = 2;
  113. plain_table_options.hash_table_ratio = 0.8;
  114. plain_table_options.index_sparseness = 3;
  115. plain_table_options.huge_page_tlb_size = 0;
  116. plain_table_options.encoding_type = kPrefix;
  117. plain_table_options.full_scan_mode = false;
  118. plain_table_options.store_index_in_file = false;
  119. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  120. options.memtable_factory.reset(NewHashLinkListRepFactory(4, 0, 3, true));
  121. options.prefix_extractor.reset(NewFixedPrefixTransform(8));
  122. options.allow_mmap_reads = mmap_mode_;
  123. options.allow_concurrent_memtable_write = false;
  124. options.unordered_write = false;
  125. return options;
  126. }
  127. DBImpl* dbfull() { return static_cast_with_check<DBImpl>(db_); }
  128. void Reopen(Options* options = nullptr) { ASSERT_OK(TryReopen(options)); }
  129. void Close() {
  130. delete db_;
  131. db_ = nullptr;
  132. }
  133. bool mmap_mode() const { return mmap_mode_; }
  134. void DestroyAndReopen(Options* options = nullptr) {
  135. // Destroy using last options
  136. Destroy(&last_options_);
  137. ASSERT_OK(TryReopen(options));
  138. }
  139. void Destroy(Options* options) {
  140. delete db_;
  141. db_ = nullptr;
  142. ASSERT_OK(DestroyDB(dbname_, *options));
  143. }
  144. Status PureReopen(Options* options, DB** db) {
  145. return DB::Open(*options, dbname_, db);
  146. }
  147. Status ReopenForReadOnly(Options* options) {
  148. delete db_;
  149. db_ = nullptr;
  150. return DB::OpenForReadOnly(*options, dbname_, &db_);
  151. }
  152. Status TryReopen(Options* options = nullptr) {
  153. delete db_;
  154. db_ = nullptr;
  155. Options opts;
  156. if (options != nullptr) {
  157. opts = *options;
  158. } else {
  159. opts = CurrentOptions();
  160. opts.create_if_missing = true;
  161. }
  162. last_options_ = opts;
  163. return DB::Open(opts, dbname_, &db_);
  164. }
  165. Status Put(const Slice& k, const Slice& v) {
  166. return db_->Put(WriteOptions(), k, v);
  167. }
  168. Status Delete(const std::string& k) { return db_->Delete(WriteOptions(), k); }
  169. std::string Get(const std::string& k, const Snapshot* snapshot = nullptr) {
  170. ReadOptions options;
  171. options.snapshot = snapshot;
  172. std::string result;
  173. Status s = db_->Get(options, k, &result);
  174. if (s.IsNotFound()) {
  175. result = "NOT_FOUND";
  176. } else if (!s.ok()) {
  177. result = s.ToString();
  178. }
  179. return result;
  180. }
  181. int NumTableFilesAtLevel(int level) {
  182. std::string property;
  183. EXPECT_TRUE(db_->GetProperty(
  184. "rocksdb.num-files-at-level" + std::to_string(level), &property));
  185. return atoi(property.c_str());
  186. }
  187. // Return spread of files per level
  188. std::string FilesPerLevel() {
  189. std::string result;
  190. size_t last_non_zero_offset = 0;
  191. for (int level = 0; level < db_->NumberLevels(); level++) {
  192. int f = NumTableFilesAtLevel(level);
  193. char buf[100];
  194. snprintf(buf, sizeof(buf), "%s%d", (level ? "," : ""), f);
  195. result += buf;
  196. if (f > 0) {
  197. last_non_zero_offset = result.size();
  198. }
  199. }
  200. result.resize(last_non_zero_offset);
  201. return result;
  202. }
  203. std::string IterStatus(Iterator* iter) {
  204. std::string result;
  205. if (iter->Valid()) {
  206. result = iter->key().ToString() + "->" + iter->value().ToString();
  207. } else {
  208. result = "(invalid)";
  209. }
  210. return result;
  211. }
  212. };
  213. TEST_P(PlainTableDBTest, Empty) {
  214. ASSERT_TRUE(dbfull() != nullptr);
  215. ASSERT_EQ("NOT_FOUND", Get("0000000000000foo"));
  216. }
  217. class TestPlainTableReader : public PlainTableReader {
  218. public:
  219. TestPlainTableReader(
  220. const EnvOptions& env_options, const InternalKeyComparator& icomparator,
  221. EncodingType encoding_type, uint64_t file_size, int bloom_bits_per_key,
  222. double hash_table_ratio, size_t index_sparseness,
  223. std::unique_ptr<TableProperties>&& props,
  224. std::unique_ptr<RandomAccessFileReader>&& file,
  225. const ImmutableOptions& ioptions, const SliceTransform* prefix_extractor,
  226. bool* expect_bloom_not_match, bool store_index_in_file,
  227. uint32_t column_family_id, const std::string& column_family_name)
  228. : PlainTableReader(ioptions, std::move(file), env_options, icomparator,
  229. encoding_type, file_size, props.get(),
  230. prefix_extractor),
  231. expect_bloom_not_match_(expect_bloom_not_match) {
  232. Status s = MmapDataIfNeeded();
  233. EXPECT_TRUE(s.ok());
  234. s = PopulateIndex(props.get(), bloom_bits_per_key, hash_table_ratio,
  235. index_sparseness, 2 * 1024 * 1024);
  236. EXPECT_TRUE(s.ok());
  237. EXPECT_EQ(column_family_id, static_cast<uint32_t>(props->column_family_id));
  238. EXPECT_EQ(column_family_name, props->column_family_name);
  239. if (store_index_in_file) {
  240. auto bloom_version_ptr = props->user_collected_properties.find(
  241. PlainTablePropertyNames::kBloomVersion);
  242. EXPECT_TRUE(bloom_version_ptr != props->user_collected_properties.end());
  243. EXPECT_EQ(bloom_version_ptr->second, std::string("1"));
  244. if (ioptions.bloom_locality > 0) {
  245. auto num_blocks_ptr = props->user_collected_properties.find(
  246. PlainTablePropertyNames::kNumBloomBlocks);
  247. EXPECT_TRUE(num_blocks_ptr != props->user_collected_properties.end());
  248. }
  249. }
  250. table_properties_ = std::move(props);
  251. }
  252. ~TestPlainTableReader() override = default;
  253. private:
  254. bool MatchBloom(uint32_t hash) const override {
  255. bool ret = PlainTableReader::MatchBloom(hash);
  256. if (*expect_bloom_not_match_) {
  257. EXPECT_TRUE(!ret);
  258. } else {
  259. EXPECT_TRUE(ret);
  260. }
  261. return ret;
  262. }
  263. bool* expect_bloom_not_match_;
  264. };
  265. class TestPlainTableFactory : public PlainTableFactory {
  266. public:
  267. explicit TestPlainTableFactory(bool* expect_bloom_not_match,
  268. const PlainTableOptions& options,
  269. uint32_t column_family_id,
  270. std::string column_family_name)
  271. : PlainTableFactory(options),
  272. bloom_bits_per_key_(options.bloom_bits_per_key),
  273. hash_table_ratio_(options.hash_table_ratio),
  274. index_sparseness_(options.index_sparseness),
  275. store_index_in_file_(options.store_index_in_file),
  276. expect_bloom_not_match_(expect_bloom_not_match),
  277. column_family_id_(column_family_id),
  278. column_family_name_(std::move(column_family_name)) {}
  279. using PlainTableFactory::NewTableReader;
  280. Status NewTableReader(
  281. const ReadOptions& /*ro*/, const TableReaderOptions& table_reader_options,
  282. std::unique_ptr<RandomAccessFileReader>&& file, uint64_t file_size,
  283. std::unique_ptr<TableReader>* table,
  284. bool /*prefetch_index_and_filter_in_cache*/) const override {
  285. std::unique_ptr<TableProperties> props;
  286. const ReadOptions read_options;
  287. auto s = ReadTableProperties(file.get(), file_size, kPlainTableMagicNumber,
  288. table_reader_options.ioptions, read_options,
  289. &props);
  290. EXPECT_TRUE(s.ok());
  291. if (store_index_in_file_) {
  292. BlockHandle bloom_block_handle;
  293. s = FindMetaBlockInFile(file.get(), file_size, kPlainTableMagicNumber,
  294. table_reader_options.ioptions, read_options,
  295. BloomBlockBuilder::kBloomBlock,
  296. &bloom_block_handle);
  297. EXPECT_TRUE(s.ok());
  298. BlockHandle index_block_handle;
  299. s = FindMetaBlockInFile(file.get(), file_size, kPlainTableMagicNumber,
  300. table_reader_options.ioptions, read_options,
  301. PlainTableIndexBuilder::kPlainTableIndexBlock,
  302. &index_block_handle);
  303. EXPECT_TRUE(s.ok());
  304. }
  305. auto& user_props = props->user_collected_properties;
  306. auto encoding_type_prop =
  307. user_props.find(PlainTablePropertyNames::kEncodingType);
  308. assert(encoding_type_prop != user_props.end());
  309. EncodingType encoding_type = static_cast<EncodingType>(
  310. DecodeFixed32(encoding_type_prop->second.c_str()));
  311. std::unique_ptr<PlainTableReader> new_reader(new TestPlainTableReader(
  312. table_reader_options.env_options,
  313. table_reader_options.internal_comparator, encoding_type, file_size,
  314. bloom_bits_per_key_, hash_table_ratio_, index_sparseness_,
  315. std::move(props), std::move(file), table_reader_options.ioptions,
  316. table_reader_options.prefix_extractor.get(), expect_bloom_not_match_,
  317. store_index_in_file_, column_family_id_, column_family_name_));
  318. *table = std::move(new_reader);
  319. return s;
  320. }
  321. private:
  322. int bloom_bits_per_key_;
  323. double hash_table_ratio_;
  324. size_t index_sparseness_;
  325. bool store_index_in_file_;
  326. bool* expect_bloom_not_match_;
  327. const uint32_t column_family_id_;
  328. const std::string column_family_name_;
  329. };
  330. TEST_P(PlainTableDBTest, BadOptions1) {
  331. // Build with a prefix extractor
  332. ASSERT_OK(Put("1000000000000foo", "v1"));
  333. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  334. // Bad attempt to re-open without a prefix extractor
  335. Options options = CurrentOptions();
  336. options.prefix_extractor.reset();
  337. ASSERT_EQ(
  338. "Invalid argument: Prefix extractor is missing when opening a PlainTable "
  339. "built using a prefix extractor",
  340. TryReopen(&options).ToString());
  341. // Bad attempt to re-open with different prefix extractor
  342. options.prefix_extractor.reset(NewFixedPrefixTransform(6));
  343. ASSERT_EQ(
  344. "Invalid argument: Prefix extractor given doesn't match the one used to "
  345. "build PlainTable",
  346. TryReopen(&options).ToString());
  347. // Correct prefix extractor
  348. options.prefix_extractor.reset(NewFixedPrefixTransform(8));
  349. Reopen(&options);
  350. ASSERT_EQ("v1", Get("1000000000000foo"));
  351. }
  352. TEST_P(PlainTableDBTest, BadOptions2) {
  353. Options options = CurrentOptions();
  354. options.prefix_extractor.reset();
  355. options.create_if_missing = true;
  356. DestroyAndReopen(&options);
  357. // Build without a prefix extractor
  358. // (apparently works even if hash_table_ratio > 0)
  359. ASSERT_OK(Put("1000000000000foo", "v1"));
  360. // Build without a prefix extractor, this call will fail and returns the
  361. // status for this bad attempt.
  362. ASSERT_NOK(dbfull()->TEST_FlushMemTable());
  363. // Bad attempt to re-open with hash_table_ratio > 0 and no prefix extractor
  364. Status s = TryReopen(&options);
  365. ASSERT_EQ(
  366. "Not implemented: PlainTable requires a prefix extractor enable prefix "
  367. "hash mode.",
  368. s.ToString());
  369. // OK to open with hash_table_ratio == 0 and no prefix extractor
  370. PlainTableOptions plain_table_options;
  371. plain_table_options.hash_table_ratio = 0;
  372. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  373. Reopen(&options);
  374. ASSERT_EQ("v1", Get("1000000000000foo"));
  375. // OK to open newly with a prefix_extractor and hash table; builds index
  376. // in memory.
  377. options = CurrentOptions();
  378. Reopen(&options);
  379. ASSERT_EQ("v1", Get("1000000000000foo"));
  380. }
  381. TEST_P(PlainTableDBTest, Flush) {
  382. for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
  383. huge_page_tlb_size += 2 * 1024 * 1024) {
  384. for (EncodingType encoding_type : {kPlain, kPrefix}) {
  385. for (int bloom = -1; bloom <= 117; bloom += 117) {
  386. const int bloom_bits = std::max(bloom, 0);
  387. const bool full_scan_mode = bloom < 0;
  388. for (int total_order = 0; total_order <= 1; total_order++) {
  389. for (int store_index_in_file = 0; store_index_in_file <= 1;
  390. ++store_index_in_file) {
  391. Options options = CurrentOptions();
  392. options.create_if_missing = true;
  393. // Set only one bucket to force bucket conflict.
  394. // Test index interval for the same prefix to be 1, 2 and 4
  395. if (total_order) {
  396. options.prefix_extractor.reset();
  397. PlainTableOptions plain_table_options;
  398. plain_table_options.user_key_len = 0;
  399. plain_table_options.bloom_bits_per_key = bloom_bits;
  400. plain_table_options.hash_table_ratio = 0;
  401. plain_table_options.index_sparseness = 2;
  402. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  403. plain_table_options.encoding_type = encoding_type;
  404. plain_table_options.full_scan_mode = full_scan_mode;
  405. plain_table_options.store_index_in_file = store_index_in_file;
  406. options.table_factory.reset(
  407. NewPlainTableFactory(plain_table_options));
  408. } else {
  409. PlainTableOptions plain_table_options;
  410. plain_table_options.user_key_len = 0;
  411. plain_table_options.bloom_bits_per_key = bloom_bits;
  412. plain_table_options.hash_table_ratio = 0.75;
  413. plain_table_options.index_sparseness = 16;
  414. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  415. plain_table_options.encoding_type = encoding_type;
  416. plain_table_options.full_scan_mode = full_scan_mode;
  417. plain_table_options.store_index_in_file = store_index_in_file;
  418. options.table_factory.reset(
  419. NewPlainTableFactory(plain_table_options));
  420. }
  421. DestroyAndReopen(&options);
  422. uint64_t int_num;
  423. ASSERT_TRUE(dbfull()->GetIntProperty(
  424. "rocksdb.estimate-table-readers-mem", &int_num));
  425. ASSERT_EQ(int_num, 0U);
  426. ASSERT_OK(Put("1000000000000foo", "v1"));
  427. ASSERT_OK(Put("0000000000000bar", "v2"));
  428. ASSERT_OK(Put("1000000000000foo", "v3"));
  429. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  430. ASSERT_TRUE(dbfull()->GetIntProperty(
  431. "rocksdb.estimate-table-readers-mem", &int_num));
  432. ASSERT_GT(int_num, 0U);
  433. TablePropertiesCollection ptc;
  434. ASSERT_OK(
  435. static_cast<DB*>(dbfull())->GetPropertiesOfAllTables(&ptc));
  436. ASSERT_EQ(1U, ptc.size());
  437. auto row = ptc.begin();
  438. auto tp = row->second;
  439. if (full_scan_mode) {
  440. // Does not support Get/Seek
  441. std::unique_ptr<Iterator> iter(
  442. dbfull()->NewIterator(ReadOptions()));
  443. iter->SeekToFirst();
  444. ASSERT_TRUE(iter->Valid());
  445. ASSERT_EQ("0000000000000bar", iter->key().ToString());
  446. ASSERT_EQ("v2", iter->value().ToString());
  447. iter->Next();
  448. ASSERT_TRUE(iter->Valid());
  449. ASSERT_EQ("1000000000000foo", iter->key().ToString());
  450. ASSERT_EQ("v3", iter->value().ToString());
  451. iter->Next();
  452. ASSERT_TRUE(!iter->Valid());
  453. ASSERT_TRUE(iter->status().ok());
  454. } else {
  455. if (!store_index_in_file) {
  456. ASSERT_EQ(total_order ? "4" : "12",
  457. (tp->user_collected_properties)
  458. .at("plain_table_hash_table_size"));
  459. ASSERT_EQ("0", (tp->user_collected_properties)
  460. .at("plain_table_sub_index_size"));
  461. } else {
  462. ASSERT_EQ("0", (tp->user_collected_properties)
  463. .at("plain_table_hash_table_size"));
  464. ASSERT_EQ("0", (tp->user_collected_properties)
  465. .at("plain_table_sub_index_size"));
  466. }
  467. ASSERT_EQ("v3", Get("1000000000000foo"));
  468. ASSERT_EQ("v2", Get("0000000000000bar"));
  469. }
  470. }
  471. }
  472. }
  473. }
  474. }
  475. }
  476. TEST_P(PlainTableDBTest, Flush2) {
  477. for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
  478. huge_page_tlb_size += 2 * 1024 * 1024) {
  479. for (EncodingType encoding_type : {kPlain, kPrefix}) {
  480. for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
  481. for (int total_order = 0; total_order <= 1; total_order++) {
  482. for (int store_index_in_file = 0; store_index_in_file <= 1;
  483. ++store_index_in_file) {
  484. if (encoding_type == kPrefix && total_order) {
  485. continue;
  486. }
  487. if (!bloom_bits && store_index_in_file) {
  488. continue;
  489. }
  490. if (total_order && store_index_in_file) {
  491. continue;
  492. }
  493. bool expect_bloom_not_match = false;
  494. Options options = CurrentOptions();
  495. options.create_if_missing = true;
  496. // Set only one bucket to force bucket conflict.
  497. // Test index interval for the same prefix to be 1, 2 and 4
  498. PlainTableOptions plain_table_options;
  499. if (total_order) {
  500. options.prefix_extractor = nullptr;
  501. plain_table_options.hash_table_ratio = 0;
  502. plain_table_options.index_sparseness = 2;
  503. } else {
  504. plain_table_options.hash_table_ratio = 0.75;
  505. plain_table_options.index_sparseness = 16;
  506. }
  507. plain_table_options.user_key_len = kPlainTableVariableLength;
  508. plain_table_options.bloom_bits_per_key = bloom_bits;
  509. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  510. plain_table_options.encoding_type = encoding_type;
  511. plain_table_options.store_index_in_file = store_index_in_file;
  512. options.table_factory.reset(new TestPlainTableFactory(
  513. &expect_bloom_not_match, plain_table_options,
  514. 0 /* column_family_id */, kDefaultColumnFamilyName));
  515. DestroyAndReopen(&options);
  516. ASSERT_OK(Put("0000000000000bar", "b"));
  517. ASSERT_OK(Put("1000000000000foo", "v1"));
  518. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  519. ASSERT_OK(Put("1000000000000foo", "v2"));
  520. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  521. ASSERT_EQ("v2", Get("1000000000000foo"));
  522. ASSERT_OK(Put("0000000000000eee", "v3"));
  523. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  524. ASSERT_EQ("v3", Get("0000000000000eee"));
  525. ASSERT_OK(Delete("0000000000000bar"));
  526. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  527. ASSERT_EQ("NOT_FOUND", Get("0000000000000bar"));
  528. ASSERT_OK(Put("0000000000000eee", "v5"));
  529. ASSERT_OK(Put("9000000000000eee", "v5"));
  530. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  531. ASSERT_EQ("v5", Get("0000000000000eee"));
  532. // Test Bloom Filter
  533. if (bloom_bits > 0) {
  534. // Neither key nor value should exist.
  535. expect_bloom_not_match = true;
  536. ASSERT_EQ("NOT_FOUND", Get("5_not00000000bar"));
  537. // Key doesn't exist any more but prefix exists.
  538. if (total_order) {
  539. ASSERT_EQ("NOT_FOUND", Get("1000000000000not"));
  540. ASSERT_EQ("NOT_FOUND", Get("0000000000000not"));
  541. }
  542. expect_bloom_not_match = false;
  543. }
  544. }
  545. }
  546. }
  547. }
  548. }
  549. }
  550. TEST_P(PlainTableDBTest, Immortal) {
  551. for (EncodingType encoding_type : {kPlain, kPrefix}) {
  552. Options options = CurrentOptions();
  553. options.create_if_missing = true;
  554. options.max_open_files = -1;
  555. // Set only one bucket to force bucket conflict.
  556. // Test index interval for the same prefix to be 1, 2 and 4
  557. PlainTableOptions plain_table_options;
  558. plain_table_options.hash_table_ratio = 0.75;
  559. plain_table_options.index_sparseness = 16;
  560. plain_table_options.user_key_len = kPlainTableVariableLength;
  561. plain_table_options.bloom_bits_per_key = 10;
  562. plain_table_options.encoding_type = encoding_type;
  563. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  564. DestroyAndReopen(&options);
  565. ASSERT_OK(Put("0000000000000bar", "b"));
  566. ASSERT_OK(Put("1000000000000foo", "v1"));
  567. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  568. int copied = 0;
  569. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
  570. "GetContext::SaveValue::PinSelf", [&](void* /*arg*/) { copied++; });
  571. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
  572. ASSERT_EQ("b", Get("0000000000000bar"));
  573. ASSERT_EQ("v1", Get("1000000000000foo"));
  574. ASSERT_EQ(2, copied);
  575. copied = 0;
  576. Close();
  577. ASSERT_OK(ReopenForReadOnly(&options));
  578. ASSERT_EQ("b", Get("0000000000000bar"));
  579. ASSERT_EQ("v1", Get("1000000000000foo"));
  580. ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
  581. if (mmap_mode()) {
  582. ASSERT_EQ(0, copied);
  583. } else {
  584. ASSERT_EQ(2, copied);
  585. }
  586. ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
  587. }
  588. }
  589. TEST_P(PlainTableDBTest, Iterator) {
  590. for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
  591. huge_page_tlb_size += 2 * 1024 * 1024) {
  592. for (EncodingType encoding_type : {kPlain, kPrefix}) {
  593. for (int bloom_bits = 0; bloom_bits <= 117; bloom_bits += 117) {
  594. for (int total_order = 0; total_order <= 1; total_order++) {
  595. if (encoding_type == kPrefix && total_order == 1) {
  596. continue;
  597. }
  598. bool expect_bloom_not_match = false;
  599. Options options = CurrentOptions();
  600. options.create_if_missing = true;
  601. // Set only one bucket to force bucket conflict.
  602. // Test index interval for the same prefix to be 1, 2 and 4
  603. if (total_order) {
  604. options.prefix_extractor = nullptr;
  605. PlainTableOptions plain_table_options;
  606. plain_table_options.user_key_len = 16;
  607. plain_table_options.bloom_bits_per_key = bloom_bits;
  608. plain_table_options.hash_table_ratio = 0;
  609. plain_table_options.index_sparseness = 2;
  610. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  611. plain_table_options.encoding_type = encoding_type;
  612. options.table_factory.reset(new TestPlainTableFactory(
  613. &expect_bloom_not_match, plain_table_options,
  614. 0 /* column_family_id */, kDefaultColumnFamilyName));
  615. } else {
  616. PlainTableOptions plain_table_options;
  617. plain_table_options.user_key_len = 16;
  618. plain_table_options.bloom_bits_per_key = bloom_bits;
  619. plain_table_options.hash_table_ratio = 0.75;
  620. plain_table_options.index_sparseness = 16;
  621. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  622. plain_table_options.encoding_type = encoding_type;
  623. options.table_factory.reset(new TestPlainTableFactory(
  624. &expect_bloom_not_match, plain_table_options,
  625. 0 /* column_family_id */, kDefaultColumnFamilyName));
  626. }
  627. DestroyAndReopen(&options);
  628. ASSERT_OK(Put("1000000000foo002", "v_2"));
  629. ASSERT_OK(Put("0000000000000bar", "random"));
  630. ASSERT_OK(Put("1000000000foo001", "v1"));
  631. ASSERT_OK(Put("3000000000000bar", "bar_v"));
  632. ASSERT_OK(Put("1000000000foo003", "v__3"));
  633. ASSERT_OK(Put("1000000000foo004", "v__4"));
  634. ASSERT_OK(Put("1000000000foo005", "v__5"));
  635. ASSERT_OK(Put("1000000000foo007", "v__7"));
  636. ASSERT_OK(Put("1000000000foo008", "v__8"));
  637. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  638. ASSERT_EQ("v1", Get("1000000000foo001"));
  639. ASSERT_EQ("v__3", Get("1000000000foo003"));
  640. Iterator* iter = dbfull()->NewIterator(ReadOptions());
  641. iter->Seek("1000000000foo000");
  642. ASSERT_TRUE(iter->Valid());
  643. ASSERT_EQ("1000000000foo001", iter->key().ToString());
  644. ASSERT_EQ("v1", iter->value().ToString());
  645. iter->Next();
  646. ASSERT_TRUE(iter->Valid());
  647. ASSERT_EQ("1000000000foo002", iter->key().ToString());
  648. ASSERT_EQ("v_2", iter->value().ToString());
  649. iter->Next();
  650. ASSERT_TRUE(iter->Valid());
  651. ASSERT_EQ("1000000000foo003", iter->key().ToString());
  652. ASSERT_EQ("v__3", iter->value().ToString());
  653. iter->Next();
  654. ASSERT_TRUE(iter->Valid());
  655. ASSERT_EQ("1000000000foo004", iter->key().ToString());
  656. ASSERT_EQ("v__4", iter->value().ToString());
  657. iter->Seek("3000000000000bar");
  658. ASSERT_TRUE(iter->Valid());
  659. ASSERT_EQ("3000000000000bar", iter->key().ToString());
  660. ASSERT_EQ("bar_v", iter->value().ToString());
  661. iter->Seek("1000000000foo000");
  662. ASSERT_TRUE(iter->Valid());
  663. ASSERT_EQ("1000000000foo001", iter->key().ToString());
  664. ASSERT_EQ("v1", iter->value().ToString());
  665. iter->Seek("1000000000foo005");
  666. ASSERT_TRUE(iter->Valid());
  667. ASSERT_EQ("1000000000foo005", iter->key().ToString());
  668. ASSERT_EQ("v__5", iter->value().ToString());
  669. iter->Seek("1000000000foo006");
  670. ASSERT_TRUE(iter->Valid());
  671. ASSERT_EQ("1000000000foo007", iter->key().ToString());
  672. ASSERT_EQ("v__7", iter->value().ToString());
  673. iter->Seek("1000000000foo008");
  674. ASSERT_TRUE(iter->Valid());
  675. ASSERT_EQ("1000000000foo008", iter->key().ToString());
  676. ASSERT_EQ("v__8", iter->value().ToString());
  677. if (total_order == 0) {
  678. iter->Seek("1000000000foo009");
  679. ASSERT_TRUE(iter->Valid());
  680. ASSERT_EQ("3000000000000bar", iter->key().ToString());
  681. }
  682. // Test Bloom Filter
  683. if (bloom_bits > 0) {
  684. if (!total_order) {
  685. // Neither key nor value should exist.
  686. expect_bloom_not_match = true;
  687. iter->Seek("2not000000000bar");
  688. ASSERT_TRUE(!iter->Valid());
  689. ASSERT_EQ("NOT_FOUND", Get("2not000000000bar"));
  690. expect_bloom_not_match = false;
  691. } else {
  692. expect_bloom_not_match = true;
  693. ASSERT_EQ("NOT_FOUND", Get("2not000000000bar"));
  694. expect_bloom_not_match = false;
  695. }
  696. }
  697. ASSERT_OK(iter->status());
  698. delete iter;
  699. }
  700. }
  701. }
  702. }
  703. }
  704. namespace {
  705. std::string NthKey(size_t n, char filler) {
  706. std::string rv(16, filler);
  707. rv[0] = n % 10;
  708. rv[1] = (n / 10) % 10;
  709. rv[2] = (n / 100) % 10;
  710. rv[3] = (n / 1000) % 10;
  711. return rv;
  712. }
  713. } // anonymous namespace
  714. TEST_P(PlainTableDBTest, BloomSchema) {
  715. Options options = CurrentOptions();
  716. options.create_if_missing = true;
  717. for (int bloom_locality = 0; bloom_locality <= 1; bloom_locality++) {
  718. options.bloom_locality = bloom_locality;
  719. PlainTableOptions plain_table_options;
  720. plain_table_options.user_key_len = 16;
  721. plain_table_options.bloom_bits_per_key = 3; // high FP rate for test
  722. plain_table_options.hash_table_ratio = 0.75;
  723. plain_table_options.index_sparseness = 16;
  724. plain_table_options.huge_page_tlb_size = 0;
  725. plain_table_options.encoding_type = kPlain;
  726. bool expect_bloom_not_match = false;
  727. options.table_factory.reset(new TestPlainTableFactory(
  728. &expect_bloom_not_match, plain_table_options, 0 /* column_family_id */,
  729. kDefaultColumnFamilyName));
  730. DestroyAndReopen(&options);
  731. for (unsigned i = 0; i < 2345; ++i) {
  732. ASSERT_OK(Put(NthKey(i, 'y'), "added"));
  733. }
  734. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  735. ASSERT_EQ("added", Get(NthKey(42, 'y')));
  736. for (unsigned i = 0; i < 32; ++i) {
  737. // Known pattern of Bloom filter false positives can detect schema change
  738. // with high probability. Known FPs stuffed into bits:
  739. uint32_t pattern;
  740. if (!bloom_locality) {
  741. pattern = 1785868347UL;
  742. } else if (CACHE_LINE_SIZE == 64U) {
  743. pattern = 2421694657UL;
  744. } else if (CACHE_LINE_SIZE == 128U) {
  745. pattern = 788710956UL;
  746. } else {
  747. ASSERT_EQ(CACHE_LINE_SIZE, 256U);
  748. pattern = 163905UL;
  749. }
  750. bool expect_fp = pattern & (1UL << i);
  751. // fprintf(stderr, "expect_fp@%u: %d\n", i, (int)expect_fp);
  752. expect_bloom_not_match = !expect_fp;
  753. ASSERT_EQ("NOT_FOUND", Get(NthKey(i, 'n')));
  754. }
  755. }
  756. }
  757. namespace {
  758. std::string MakeLongKey(size_t length, char c) {
  759. return std::string(length, c);
  760. }
  761. } // anonymous namespace
  762. TEST_P(PlainTableDBTest, IteratorLargeKeys) {
  763. Options options = CurrentOptions();
  764. PlainTableOptions plain_table_options;
  765. plain_table_options.user_key_len = 0;
  766. plain_table_options.bloom_bits_per_key = 0;
  767. plain_table_options.hash_table_ratio = 0;
  768. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  769. options.create_if_missing = true;
  770. options.prefix_extractor.reset();
  771. DestroyAndReopen(&options);
  772. std::string key_list[] = {MakeLongKey(30, '0'), MakeLongKey(16, '1'),
  773. MakeLongKey(32, '2'), MakeLongKey(60, '3'),
  774. MakeLongKey(90, '4'), MakeLongKey(50, '5'),
  775. MakeLongKey(26, '6')};
  776. for (size_t i = 0; i < 7; i++) {
  777. ASSERT_OK(Put(key_list[i], std::to_string(i)));
  778. }
  779. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  780. Iterator* iter = dbfull()->NewIterator(ReadOptions());
  781. iter->Seek(key_list[0]);
  782. for (size_t i = 0; i < 7; i++) {
  783. ASSERT_TRUE(iter->Valid());
  784. ASSERT_EQ(key_list[i], iter->key().ToString());
  785. ASSERT_EQ(std::to_string(i), iter->value().ToString());
  786. iter->Next();
  787. }
  788. ASSERT_TRUE(!iter->Valid());
  789. ASSERT_OK(iter->status());
  790. delete iter;
  791. }
  792. namespace {
  793. std::string MakeLongKeyWithPrefix(size_t length, char c) {
  794. return "00000000" + std::string(length - 8, c);
  795. }
  796. } // anonymous namespace
  797. TEST_P(PlainTableDBTest, IteratorLargeKeysWithPrefix) {
  798. Options options = CurrentOptions();
  799. PlainTableOptions plain_table_options;
  800. plain_table_options.user_key_len = 16;
  801. plain_table_options.bloom_bits_per_key = 0;
  802. plain_table_options.hash_table_ratio = 0.8;
  803. plain_table_options.index_sparseness = 3;
  804. plain_table_options.huge_page_tlb_size = 0;
  805. plain_table_options.encoding_type = kPrefix;
  806. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  807. options.create_if_missing = true;
  808. DestroyAndReopen(&options);
  809. std::string key_list[] = {
  810. MakeLongKeyWithPrefix(30, '0'), MakeLongKeyWithPrefix(16, '1'),
  811. MakeLongKeyWithPrefix(32, '2'), MakeLongKeyWithPrefix(60, '3'),
  812. MakeLongKeyWithPrefix(90, '4'), MakeLongKeyWithPrefix(50, '5'),
  813. MakeLongKeyWithPrefix(26, '6')};
  814. for (size_t i = 0; i < 7; i++) {
  815. ASSERT_OK(Put(key_list[i], std::to_string(i)));
  816. }
  817. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  818. Iterator* iter = dbfull()->NewIterator(ReadOptions());
  819. iter->Seek(key_list[0]);
  820. for (size_t i = 0; i < 7; i++) {
  821. ASSERT_TRUE(iter->Valid());
  822. ASSERT_EQ(key_list[i], iter->key().ToString());
  823. ASSERT_EQ(std::to_string(i), iter->value().ToString());
  824. iter->Next();
  825. }
  826. ASSERT_TRUE(!iter->Valid());
  827. ASSERT_OK(iter->status());
  828. delete iter;
  829. }
  830. TEST_P(PlainTableDBTest, IteratorReverseSuffixComparator) {
  831. Options options = CurrentOptions();
  832. options.create_if_missing = true;
  833. // Set only one bucket to force bucket conflict.
  834. // Test index interval for the same prefix to be 1, 2 and 4
  835. test::SimpleSuffixReverseComparator comp;
  836. options.comparator = &comp;
  837. DestroyAndReopen(&options);
  838. ASSERT_OK(Put("1000000000foo002", "v_2"));
  839. ASSERT_OK(Put("0000000000000bar", "random"));
  840. ASSERT_OK(Put("1000000000foo001", "v1"));
  841. ASSERT_OK(Put("3000000000000bar", "bar_v"));
  842. ASSERT_OK(Put("1000000000foo003", "v__3"));
  843. ASSERT_OK(Put("1000000000foo004", "v__4"));
  844. ASSERT_OK(Put("1000000000foo005", "v__5"));
  845. ASSERT_OK(Put("1000000000foo007", "v__7"));
  846. ASSERT_OK(Put("1000000000foo008", "v__8"));
  847. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  848. ASSERT_EQ("v1", Get("1000000000foo001"));
  849. ASSERT_EQ("v__3", Get("1000000000foo003"));
  850. Iterator* iter = dbfull()->NewIterator(ReadOptions());
  851. iter->Seek("1000000000foo009");
  852. ASSERT_TRUE(iter->Valid());
  853. ASSERT_EQ("1000000000foo008", iter->key().ToString());
  854. ASSERT_EQ("v__8", iter->value().ToString());
  855. iter->Next();
  856. ASSERT_TRUE(iter->Valid());
  857. ASSERT_EQ("1000000000foo007", iter->key().ToString());
  858. ASSERT_EQ("v__7", iter->value().ToString());
  859. iter->Next();
  860. ASSERT_TRUE(iter->Valid());
  861. ASSERT_EQ("1000000000foo005", iter->key().ToString());
  862. ASSERT_EQ("v__5", iter->value().ToString());
  863. iter->Next();
  864. ASSERT_TRUE(iter->Valid());
  865. ASSERT_EQ("1000000000foo004", iter->key().ToString());
  866. ASSERT_EQ("v__4", iter->value().ToString());
  867. iter->Seek("3000000000000bar");
  868. ASSERT_TRUE(iter->Valid());
  869. ASSERT_EQ("3000000000000bar", iter->key().ToString());
  870. ASSERT_EQ("bar_v", iter->value().ToString());
  871. iter->Seek("1000000000foo005");
  872. ASSERT_TRUE(iter->Valid());
  873. ASSERT_EQ("1000000000foo005", iter->key().ToString());
  874. ASSERT_EQ("v__5", iter->value().ToString());
  875. iter->Seek("1000000000foo006");
  876. ASSERT_TRUE(iter->Valid());
  877. ASSERT_EQ("1000000000foo005", iter->key().ToString());
  878. ASSERT_EQ("v__5", iter->value().ToString());
  879. iter->Seek("1000000000foo008");
  880. ASSERT_TRUE(iter->Valid());
  881. ASSERT_EQ("1000000000foo008", iter->key().ToString());
  882. ASSERT_EQ("v__8", iter->value().ToString());
  883. iter->Seek("1000000000foo000");
  884. ASSERT_TRUE(iter->Valid());
  885. ASSERT_EQ("3000000000000bar", iter->key().ToString());
  886. delete iter;
  887. }
  888. TEST_P(PlainTableDBTest, HashBucketConflict) {
  889. for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
  890. huge_page_tlb_size += 2 * 1024 * 1024) {
  891. for (unsigned char i = 1; i <= 3; i++) {
  892. Options options = CurrentOptions();
  893. options.create_if_missing = true;
  894. // Set only one bucket to force bucket conflict.
  895. // Test index interval for the same prefix to be 1, 2 and 4
  896. PlainTableOptions plain_table_options;
  897. plain_table_options.user_key_len = 16;
  898. plain_table_options.bloom_bits_per_key = 0;
  899. plain_table_options.hash_table_ratio = 0;
  900. plain_table_options.index_sparseness = 2 ^ i;
  901. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  902. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  903. DestroyAndReopen(&options);
  904. ASSERT_OK(Put("5000000000000fo0", "v1"));
  905. ASSERT_OK(Put("5000000000000fo1", "v2"));
  906. ASSERT_OK(Put("5000000000000fo2", "v"));
  907. ASSERT_OK(Put("2000000000000fo0", "v3"));
  908. ASSERT_OK(Put("2000000000000fo1", "v4"));
  909. ASSERT_OK(Put("2000000000000fo2", "v"));
  910. ASSERT_OK(Put("2000000000000fo3", "v"));
  911. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  912. ASSERT_EQ("v1", Get("5000000000000fo0"));
  913. ASSERT_EQ("v2", Get("5000000000000fo1"));
  914. ASSERT_EQ("v3", Get("2000000000000fo0"));
  915. ASSERT_EQ("v4", Get("2000000000000fo1"));
  916. ASSERT_EQ("NOT_FOUND", Get("5000000000000bar"));
  917. ASSERT_EQ("NOT_FOUND", Get("2000000000000bar"));
  918. ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8"));
  919. ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8"));
  920. ReadOptions ro;
  921. Iterator* iter = dbfull()->NewIterator(ro);
  922. iter->Seek("5000000000000fo0");
  923. ASSERT_TRUE(iter->Valid());
  924. ASSERT_EQ("5000000000000fo0", iter->key().ToString());
  925. iter->Next();
  926. ASSERT_TRUE(iter->Valid());
  927. ASSERT_EQ("5000000000000fo1", iter->key().ToString());
  928. iter->Seek("5000000000000fo1");
  929. ASSERT_TRUE(iter->Valid());
  930. ASSERT_EQ("5000000000000fo1", iter->key().ToString());
  931. iter->Seek("2000000000000fo0");
  932. ASSERT_TRUE(iter->Valid());
  933. ASSERT_EQ("2000000000000fo0", iter->key().ToString());
  934. iter->Next();
  935. ASSERT_TRUE(iter->Valid());
  936. ASSERT_EQ("2000000000000fo1", iter->key().ToString());
  937. iter->Seek("2000000000000fo1");
  938. ASSERT_TRUE(iter->Valid());
  939. ASSERT_EQ("2000000000000fo1", iter->key().ToString());
  940. iter->Seek("2000000000000bar");
  941. ASSERT_TRUE(iter->Valid());
  942. ASSERT_EQ("2000000000000fo0", iter->key().ToString());
  943. iter->Seek("5000000000000bar");
  944. ASSERT_TRUE(iter->Valid());
  945. ASSERT_EQ("5000000000000fo0", iter->key().ToString());
  946. iter->Seek("2000000000000fo8");
  947. ASSERT_TRUE(!iter->Valid() ||
  948. options.comparator->Compare(iter->key(), "20000001") > 0);
  949. iter->Seek("5000000000000fo8");
  950. ASSERT_TRUE(!iter->Valid());
  951. iter->Seek("1000000000000fo2");
  952. ASSERT_TRUE(!iter->Valid());
  953. iter->Seek("3000000000000fo2");
  954. ASSERT_TRUE(!iter->Valid());
  955. iter->Seek("8000000000000fo2");
  956. ASSERT_TRUE(!iter->Valid());
  957. ASSERT_OK(iter->status());
  958. delete iter;
  959. }
  960. }
  961. }
  962. TEST_P(PlainTableDBTest, HashBucketConflictReverseSuffixComparator) {
  963. for (size_t huge_page_tlb_size = 0; huge_page_tlb_size <= 2 * 1024 * 1024;
  964. huge_page_tlb_size += 2 * 1024 * 1024) {
  965. for (unsigned char i = 1; i <= 3; i++) {
  966. Options options = CurrentOptions();
  967. options.create_if_missing = true;
  968. test::SimpleSuffixReverseComparator comp;
  969. options.comparator = &comp;
  970. // Set only one bucket to force bucket conflict.
  971. // Test index interval for the same prefix to be 1, 2 and 4
  972. PlainTableOptions plain_table_options;
  973. plain_table_options.user_key_len = 16;
  974. plain_table_options.bloom_bits_per_key = 0;
  975. plain_table_options.hash_table_ratio = 0;
  976. plain_table_options.index_sparseness = 2 ^ i;
  977. plain_table_options.huge_page_tlb_size = huge_page_tlb_size;
  978. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  979. DestroyAndReopen(&options);
  980. ASSERT_OK(Put("5000000000000fo0", "v1"));
  981. ASSERT_OK(Put("5000000000000fo1", "v2"));
  982. ASSERT_OK(Put("5000000000000fo2", "v"));
  983. ASSERT_OK(Put("2000000000000fo0", "v3"));
  984. ASSERT_OK(Put("2000000000000fo1", "v4"));
  985. ASSERT_OK(Put("2000000000000fo2", "v"));
  986. ASSERT_OK(Put("2000000000000fo3", "v"));
  987. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  988. ASSERT_EQ("v1", Get("5000000000000fo0"));
  989. ASSERT_EQ("v2", Get("5000000000000fo1"));
  990. ASSERT_EQ("v3", Get("2000000000000fo0"));
  991. ASSERT_EQ("v4", Get("2000000000000fo1"));
  992. ASSERT_EQ("NOT_FOUND", Get("5000000000000bar"));
  993. ASSERT_EQ("NOT_FOUND", Get("2000000000000bar"));
  994. ASSERT_EQ("NOT_FOUND", Get("5000000000000fo8"));
  995. ASSERT_EQ("NOT_FOUND", Get("2000000000000fo8"));
  996. ReadOptions ro;
  997. Iterator* iter = dbfull()->NewIterator(ro);
  998. iter->Seek("5000000000000fo1");
  999. ASSERT_TRUE(iter->Valid());
  1000. ASSERT_EQ("5000000000000fo1", iter->key().ToString());
  1001. iter->Next();
  1002. ASSERT_TRUE(iter->Valid());
  1003. ASSERT_EQ("5000000000000fo0", iter->key().ToString());
  1004. iter->Seek("5000000000000fo1");
  1005. ASSERT_TRUE(iter->Valid());
  1006. ASSERT_EQ("5000000000000fo1", iter->key().ToString());
  1007. iter->Seek("2000000000000fo1");
  1008. ASSERT_TRUE(iter->Valid());
  1009. ASSERT_EQ("2000000000000fo1", iter->key().ToString());
  1010. iter->Next();
  1011. ASSERT_TRUE(iter->Valid());
  1012. ASSERT_EQ("2000000000000fo0", iter->key().ToString());
  1013. iter->Seek("2000000000000fo1");
  1014. ASSERT_TRUE(iter->Valid());
  1015. ASSERT_EQ("2000000000000fo1", iter->key().ToString());
  1016. iter->Seek("2000000000000var");
  1017. ASSERT_TRUE(iter->Valid());
  1018. ASSERT_EQ("2000000000000fo3", iter->key().ToString());
  1019. iter->Seek("5000000000000var");
  1020. ASSERT_TRUE(iter->Valid());
  1021. ASSERT_EQ("5000000000000fo2", iter->key().ToString());
  1022. std::string seek_key = "2000000000000bar";
  1023. iter->Seek(seek_key);
  1024. ASSERT_TRUE(!iter->Valid() ||
  1025. options.prefix_extractor->Transform(iter->key()) !=
  1026. options.prefix_extractor->Transform(seek_key));
  1027. iter->Seek("1000000000000fo2");
  1028. ASSERT_TRUE(!iter->Valid());
  1029. iter->Seek("3000000000000fo2");
  1030. ASSERT_TRUE(!iter->Valid());
  1031. iter->Seek("8000000000000fo2");
  1032. ASSERT_TRUE(!iter->Valid());
  1033. ASSERT_OK(iter->status());
  1034. delete iter;
  1035. }
  1036. }
  1037. }
  1038. TEST_P(PlainTableDBTest, NonExistingKeyToNonEmptyBucket) {
  1039. Options options = CurrentOptions();
  1040. options.create_if_missing = true;
  1041. // Set only one bucket to force bucket conflict.
  1042. // Test index interval for the same prefix to be 1, 2 and 4
  1043. PlainTableOptions plain_table_options;
  1044. plain_table_options.user_key_len = 16;
  1045. plain_table_options.bloom_bits_per_key = 0;
  1046. plain_table_options.hash_table_ratio = 0;
  1047. plain_table_options.index_sparseness = 5;
  1048. options.table_factory.reset(NewPlainTableFactory(plain_table_options));
  1049. DestroyAndReopen(&options);
  1050. ASSERT_OK(Put("5000000000000fo0", "v1"));
  1051. ASSERT_OK(Put("5000000000000fo1", "v2"));
  1052. ASSERT_OK(Put("5000000000000fo2", "v3"));
  1053. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  1054. ASSERT_EQ("v1", Get("5000000000000fo0"));
  1055. ASSERT_EQ("v2", Get("5000000000000fo1"));
  1056. ASSERT_EQ("v3", Get("5000000000000fo2"));
  1057. ASSERT_EQ("NOT_FOUND", Get("8000000000000bar"));
  1058. ASSERT_EQ("NOT_FOUND", Get("1000000000000bar"));
  1059. Iterator* iter = dbfull()->NewIterator(ReadOptions());
  1060. iter->Seek("5000000000000bar");
  1061. ASSERT_TRUE(iter->Valid());
  1062. ASSERT_EQ("5000000000000fo0", iter->key().ToString());
  1063. iter->Seek("5000000000000fo8");
  1064. ASSERT_TRUE(!iter->Valid());
  1065. iter->Seek("1000000000000fo2");
  1066. ASSERT_TRUE(!iter->Valid());
  1067. iter->Seek("8000000000000fo2");
  1068. ASSERT_TRUE(!iter->Valid());
  1069. ASSERT_OK(iter->status());
  1070. delete iter;
  1071. }
  1072. static std::string Key(int i) {
  1073. char buf[100];
  1074. snprintf(buf, sizeof(buf), "key_______%06d", i);
  1075. return std::string(buf);
  1076. }
  1077. TEST_P(PlainTableDBTest, CompactionTrigger) {
  1078. Options options = CurrentOptions();
  1079. options.write_buffer_size = 120 << 10; // 120KB
  1080. options.num_levels = 3;
  1081. options.level0_file_num_compaction_trigger = 3;
  1082. Reopen(&options);
  1083. Random rnd(301);
  1084. for (int num = 0; num < options.level0_file_num_compaction_trigger - 1;
  1085. num++) {
  1086. std::vector<std::string> values;
  1087. // Write 120KB (10 values, each 12K)
  1088. for (int i = 0; i < 10; i++) {
  1089. values.push_back(rnd.RandomString(12 << 10));
  1090. ASSERT_OK(Put(Key(i), values[i]));
  1091. }
  1092. ASSERT_OK(Put(Key(999), ""));
  1093. ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
  1094. ASSERT_EQ(NumTableFilesAtLevel(0), num + 1);
  1095. }
  1096. // generate one more file in level-0, and should trigger level-0 compaction
  1097. std::vector<std::string> values;
  1098. for (int i = 0; i < 12; i++) {
  1099. values.push_back(rnd.RandomString(10000));
  1100. ASSERT_OK(Put(Key(i), values[i]));
  1101. }
  1102. ASSERT_OK(Put(Key(999), ""));
  1103. ASSERT_OK(dbfull()->TEST_WaitForCompact());
  1104. ASSERT_EQ(NumTableFilesAtLevel(0), 0);
  1105. ASSERT_EQ(NumTableFilesAtLevel(1), 1);
  1106. }
  1107. TEST_P(PlainTableDBTest, AdaptiveTable) {
  1108. Options options = CurrentOptions();
  1109. options.create_if_missing = true;
  1110. options.table_factory.reset(NewPlainTableFactory());
  1111. DestroyAndReopen(&options);
  1112. ASSERT_OK(Put("1000000000000foo", "v1"));
  1113. ASSERT_OK(Put("0000000000000bar", "v2"));
  1114. ASSERT_OK(Put("1000000000000foo", "v3"));
  1115. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  1116. options.create_if_missing = false;
  1117. std::shared_ptr<TableFactory> block_based_factory(
  1118. NewBlockBasedTableFactory());
  1119. std::shared_ptr<TableFactory> plain_table_factory(NewPlainTableFactory());
  1120. std::shared_ptr<TableFactory> dummy_factory;
  1121. options.table_factory.reset(NewAdaptiveTableFactory(
  1122. block_based_factory, block_based_factory, plain_table_factory));
  1123. Reopen(&options);
  1124. ASSERT_EQ("v3", Get("1000000000000foo"));
  1125. ASSERT_EQ("v2", Get("0000000000000bar"));
  1126. ASSERT_OK(Put("2000000000000foo", "v4"));
  1127. ASSERT_OK(Put("3000000000000bar", "v5"));
  1128. ASSERT_OK(dbfull()->TEST_FlushMemTable());
  1129. ASSERT_EQ("v4", Get("2000000000000foo"));
  1130. ASSERT_EQ("v5", Get("3000000000000bar"));
  1131. Reopen(&options);
  1132. ASSERT_EQ("v3", Get("1000000000000foo"));
  1133. ASSERT_EQ("v2", Get("0000000000000bar"));
  1134. ASSERT_EQ("v4", Get("2000000000000foo"));
  1135. ASSERT_EQ("v5", Get("3000000000000bar"));
  1136. options.paranoid_checks = false;
  1137. options.table_factory.reset(NewBlockBasedTableFactory());
  1138. Reopen(&options);
  1139. ASSERT_NE("v3", Get("1000000000000foo"));
  1140. options.paranoid_checks = false;
  1141. options.table_factory.reset(NewPlainTableFactory());
  1142. Reopen(&options);
  1143. ASSERT_NE("v5", Get("3000000000000bar"));
  1144. }
  1145. INSTANTIATE_TEST_CASE_P(PlainTableDBTest, PlainTableDBTest, ::testing::Bool());
  1146. TEST_P(PlainTableDBTest, DeleteRangeNotSupported) {
  1147. for (bool use_write_batch : {false, true}) {
  1148. DestroyAndReopen();
  1149. ASSERT_OK(Put("a0001111", "1"));
  1150. ASSERT_OK(Put("b0001111", "2"));
  1151. ASSERT_OK(Put("c0001111", "3"));
  1152. if (use_write_batch) {
  1153. WriteBatch wb;
  1154. ASSERT_OK(wb.Put("d0001111", "4"));
  1155. ASSERT_OK(wb.DeleteRange("a", "b"));
  1156. ASSERT_OK(wb.Put("e0001111", "5"));
  1157. ASSERT_EQ(dbfull()->Write({}, &wb).code(), Status::Code::kNotSupported);
  1158. } else {
  1159. ASSERT_EQ(dbfull()->DeleteRange({}, "az", "bz").code(),
  1160. Status::Code::kNotSupported);
  1161. }
  1162. ASSERT_EQ(Get("a0001111"), "1");
  1163. ASSERT_EQ(Get("b0001111"), "2");
  1164. ASSERT_EQ(Get("c0001111"), "3");
  1165. ASSERT_EQ(Get("d0001111"), "NOT_FOUND"); // expect WriteBatch atomicity
  1166. ASSERT_EQ(Get("e0001111"), "NOT_FOUND");
  1167. ASSERT_EQ(Put("e0001111", "5").code(), Status::Code::kNotSupported);
  1168. ASSERT_EQ(Get("e0001111"), "NOT_FOUND");
  1169. // Even trying to flush
  1170. ASSERT_EQ(dbfull()->TEST_FlushMemTable().code(),
  1171. Status::Code::kNotSupported);
  1172. // WAL is recoverable (at least in standard configurations)
  1173. ASSERT_OK(TryReopen());
  1174. ASSERT_EQ(Get("a0001111"), "1");
  1175. ASSERT_EQ(Get("b0001111"), "2");
  1176. ASSERT_EQ(Get("c0001111"), "3");
  1177. ASSERT_EQ(Get("d0001111"), "NOT_FOUND");
  1178. ASSERT_EQ(Get("e0001111"), "NOT_FOUND");
  1179. }
  1180. }
  1181. } // namespace ROCKSDB_NAMESPACE
  1182. int main(int argc, char** argv) {
  1183. ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
  1184. ::testing::InitGoogleTest(&argc, argv);
  1185. return RUN_ALL_TESTS();
  1186. }