forward_iterator_bench.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
  2. // This source code is licensed under both the GPLv2 (found in the
  3. // COPYING file in the root directory) and Apache 2.0 License
  4. // (found in the LICENSE.Apache file in the root directory).
  5. #if !defined(GFLAGS) || defined(ROCKSDB_LITE)
  6. #include <cstdio>
  7. int main() {
  8. fprintf(stderr, "Please install gflags to run rocksdb tools\n");
  9. return 1;
  10. }
  11. #elif defined(OS_MACOSX) || defined(OS_WIN)
  12. // Block forward_iterator_bench under MAC and Windows
  13. int main() { return 0; }
  14. #else
  15. #include <semaphore.h>
  16. #include <atomic>
  17. #include <bitset>
  18. #include <chrono>
  19. #include <climits>
  20. #include <condition_variable>
  21. #include <limits>
  22. #include <mutex>
  23. #include <queue>
  24. #include <random>
  25. #include <thread>
  26. #include "port/port.h"
  27. #include "rocksdb/cache.h"
  28. #include "rocksdb/db.h"
  29. #include "rocksdb/status.h"
  30. #include "rocksdb/table.h"
  31. #include "test_util/testharness.h"
  32. #include "util/gflags_compat.h"
  33. const int MAX_SHARDS = 100000;
  34. DEFINE_int32(writers, 8, "");
  35. DEFINE_int32(readers, 8, "");
  36. DEFINE_int64(rate, 100000, "");
  37. DEFINE_int64(value_size, 300, "");
  38. DEFINE_int64(shards, 1000, "");
  39. DEFINE_int64(memtable_size, 500000000, "");
  40. DEFINE_int64(block_cache_size, 300000000, "");
  41. DEFINE_int64(block_size, 65536, "");
  42. DEFINE_double(runtime, 300.0, "");
  43. DEFINE_bool(cache_only_first, true, "");
  44. DEFINE_bool(iterate_upper_bound, true, "");
  45. struct Stats {
  46. char pad1[128] __attribute__((__unused__));
  47. std::atomic<uint64_t> written{0};
  48. char pad2[128] __attribute__((__unused__));
  49. std::atomic<uint64_t> read{0};
  50. std::atomic<uint64_t> cache_misses{0};
  51. char pad3[128] __attribute__((__unused__));
  52. } stats;
  53. struct Key {
  54. Key() {}
  55. Key(uint64_t shard_in, uint64_t seqno_in)
  56. : shard_be(htobe64(shard_in)), seqno_be(htobe64(seqno_in)) {}
  57. uint64_t shard() const { return be64toh(shard_be); }
  58. uint64_t seqno() const { return be64toh(seqno_be); }
  59. private:
  60. uint64_t shard_be;
  61. uint64_t seqno_be;
  62. } __attribute__((__packed__));
  63. struct Reader;
  64. struct Writer;
  65. struct ShardState {
  66. char pad1[128] __attribute__((__unused__));
  67. std::atomic<uint64_t> last_written{0};
  68. Writer* writer;
  69. Reader* reader;
  70. char pad2[128] __attribute__((__unused__));
  71. std::atomic<uint64_t> last_read{0};
  72. std::unique_ptr<ROCKSDB_NAMESPACE::Iterator> it;
  73. std::unique_ptr<ROCKSDB_NAMESPACE::Iterator> it_cacheonly;
  74. Key upper_bound;
  75. ROCKSDB_NAMESPACE::Slice upper_bound_slice;
  76. char pad3[128] __attribute__((__unused__));
  77. };
  78. struct Reader {
  79. public:
  80. explicit Reader(std::vector<ShardState>* shard_states,
  81. ROCKSDB_NAMESPACE::DB* db)
  82. : shard_states_(shard_states), db_(db) {
  83. sem_init(&sem_, 0, 0);
  84. thread_ = port::Thread(&Reader::run, this);
  85. }
  86. void run() {
  87. while (1) {
  88. sem_wait(&sem_);
  89. if (done_.load()) {
  90. break;
  91. }
  92. uint64_t shard;
  93. {
  94. std::lock_guard<std::mutex> guard(queue_mutex_);
  95. assert(!shards_pending_queue_.empty());
  96. shard = shards_pending_queue_.front();
  97. shards_pending_queue_.pop();
  98. shards_pending_set_.reset(shard);
  99. }
  100. readOnceFromShard(shard);
  101. }
  102. }
  103. void readOnceFromShard(uint64_t shard) {
  104. ShardState& state = (*shard_states_)[shard];
  105. if (!state.it) {
  106. // Initialize iterators
  107. ROCKSDB_NAMESPACE::ReadOptions options;
  108. options.tailing = true;
  109. if (FLAGS_iterate_upper_bound) {
  110. state.upper_bound = Key(shard, std::numeric_limits<uint64_t>::max());
  111. state.upper_bound_slice = ROCKSDB_NAMESPACE::Slice(
  112. (const char*)&state.upper_bound, sizeof(state.upper_bound));
  113. options.iterate_upper_bound = &state.upper_bound_slice;
  114. }
  115. state.it.reset(db_->NewIterator(options));
  116. if (FLAGS_cache_only_first) {
  117. options.read_tier = ROCKSDB_NAMESPACE::ReadTier::kBlockCacheTier;
  118. state.it_cacheonly.reset(db_->NewIterator(options));
  119. }
  120. }
  121. const uint64_t upto = state.last_written.load();
  122. for (ROCKSDB_NAMESPACE::Iterator* it :
  123. {state.it_cacheonly.get(), state.it.get()}) {
  124. if (it == nullptr) {
  125. continue;
  126. }
  127. if (state.last_read.load() >= upto) {
  128. break;
  129. }
  130. bool need_seek = true;
  131. for (uint64_t seq = state.last_read.load() + 1; seq <= upto; ++seq) {
  132. if (need_seek) {
  133. Key from(shard, state.last_read.load() + 1);
  134. it->Seek(ROCKSDB_NAMESPACE::Slice((const char*)&from, sizeof(from)));
  135. need_seek = false;
  136. } else {
  137. it->Next();
  138. }
  139. if (it->status().IsIncomplete()) {
  140. ++::stats.cache_misses;
  141. break;
  142. }
  143. assert(it->Valid());
  144. assert(it->key().size() == sizeof(Key));
  145. Key key;
  146. memcpy(&key, it->key().data(), it->key().size());
  147. // fprintf(stderr, "Expecting (%ld, %ld) read (%ld, %ld)\n",
  148. // shard, seq, key.shard(), key.seqno());
  149. assert(key.shard() == shard);
  150. assert(key.seqno() == seq);
  151. state.last_read.store(seq);
  152. ++::stats.read;
  153. }
  154. }
  155. }
  156. void onWrite(uint64_t shard) {
  157. {
  158. std::lock_guard<std::mutex> guard(queue_mutex_);
  159. if (!shards_pending_set_.test(shard)) {
  160. shards_pending_queue_.push(shard);
  161. shards_pending_set_.set(shard);
  162. sem_post(&sem_);
  163. }
  164. }
  165. }
  166. ~Reader() {
  167. done_.store(true);
  168. sem_post(&sem_);
  169. thread_.join();
  170. }
  171. private:
  172. char pad1[128] __attribute__((__unused__));
  173. std::vector<ShardState>* shard_states_;
  174. ROCKSDB_NAMESPACE::DB* db_;
  175. ROCKSDB_NAMESPACE::port::Thread thread_;
  176. sem_t sem_;
  177. std::mutex queue_mutex_;
  178. std::bitset<MAX_SHARDS + 1> shards_pending_set_;
  179. std::queue<uint64_t> shards_pending_queue_;
  180. std::atomic<bool> done_{false};
  181. char pad2[128] __attribute__((__unused__));
  182. };
  183. struct Writer {
  184. explicit Writer(std::vector<ShardState>* shard_states,
  185. ROCKSDB_NAMESPACE::DB* db)
  186. : shard_states_(shard_states), db_(db) {}
  187. void start() { thread_ = port::Thread(&Writer::run, this); }
  188. void run() {
  189. std::queue<std::chrono::steady_clock::time_point> workq;
  190. std::chrono::steady_clock::time_point deadline(
  191. std::chrono::steady_clock::now() +
  192. std::chrono::nanoseconds((uint64_t)(1000000000 * FLAGS_runtime)));
  193. std::vector<uint64_t> my_shards;
  194. for (int i = 1; i <= FLAGS_shards; ++i) {
  195. if ((*shard_states_)[i].writer == this) {
  196. my_shards.push_back(i);
  197. }
  198. }
  199. std::mt19937 rng{std::random_device()()};
  200. std::uniform_int_distribution<int> shard_dist(
  201. 0, static_cast<int>(my_shards.size()) - 1);
  202. std::string value(FLAGS_value_size, '*');
  203. while (1) {
  204. auto now = std::chrono::steady_clock::now();
  205. if (FLAGS_runtime >= 0 && now >= deadline) {
  206. break;
  207. }
  208. if (workq.empty()) {
  209. for (int i = 0; i < FLAGS_rate; i += FLAGS_writers) {
  210. std::chrono::nanoseconds offset(1000000000LL * i / FLAGS_rate);
  211. workq.push(now + offset);
  212. }
  213. }
  214. while (!workq.empty() && workq.front() < now) {
  215. workq.pop();
  216. uint64_t shard = my_shards[shard_dist(rng)];
  217. ShardState& state = (*shard_states_)[shard];
  218. uint64_t seqno = state.last_written.load() + 1;
  219. Key key(shard, seqno);
  220. // fprintf(stderr, "Writing (%ld, %ld)\n", shard, seqno);
  221. ROCKSDB_NAMESPACE::Status status =
  222. db_->Put(ROCKSDB_NAMESPACE::WriteOptions(),
  223. ROCKSDB_NAMESPACE::Slice((const char*)&key, sizeof(key)),
  224. ROCKSDB_NAMESPACE::Slice(value));
  225. assert(status.ok());
  226. state.last_written.store(seqno);
  227. state.reader->onWrite(shard);
  228. ++::stats.written;
  229. }
  230. std::this_thread::sleep_for(std::chrono::milliseconds(1));
  231. }
  232. // fprintf(stderr, "Writer done\n");
  233. }
  234. ~Writer() { thread_.join(); }
  235. private:
  236. char pad1[128] __attribute__((__unused__));
  237. std::vector<ShardState>* shard_states_;
  238. ROCKSDB_NAMESPACE::DB* db_;
  239. ROCKSDB_NAMESPACE::port::Thread thread_;
  240. char pad2[128] __attribute__((__unused__));
  241. };
  242. struct StatsThread {
  243. explicit StatsThread(ROCKSDB_NAMESPACE::DB* db)
  244. : db_(db), thread_(&StatsThread::run, this) {}
  245. void run() {
  246. // using namespace std::chrono;
  247. auto tstart = std::chrono::steady_clock::now(), tlast = tstart;
  248. uint64_t wlast = 0, rlast = 0;
  249. while (!done_.load()) {
  250. {
  251. std::unique_lock<std::mutex> lock(cvm_);
  252. cv_.wait_for(lock, std::chrono::seconds(1));
  253. }
  254. auto now = std::chrono::steady_clock::now();
  255. double elapsed =
  256. std::chrono::duration_cast<std::chrono::duration<double> >(
  257. now - tlast).count();
  258. uint64_t w = ::stats.written.load();
  259. uint64_t r = ::stats.read.load();
  260. fprintf(stderr,
  261. "%s elapsed %4lds | written %10ld | w/s %10.0f | read %10ld | "
  262. "r/s %10.0f | cache misses %10ld\n",
  263. db_->GetEnv()->TimeToString(time(nullptr)).c_str(),
  264. std::chrono::duration_cast<std::chrono::seconds>(now - tstart)
  265. .count(),
  266. w, (w - wlast) / elapsed, r, (r - rlast) / elapsed,
  267. ::stats.cache_misses.load());
  268. wlast = w;
  269. rlast = r;
  270. tlast = now;
  271. }
  272. }
  273. ~StatsThread() {
  274. {
  275. std::lock_guard<std::mutex> guard(cvm_);
  276. done_.store(true);
  277. }
  278. cv_.notify_all();
  279. thread_.join();
  280. }
  281. private:
  282. ROCKSDB_NAMESPACE::DB* db_;
  283. std::mutex cvm_;
  284. std::condition_variable cv_;
  285. ROCKSDB_NAMESPACE::port::Thread thread_;
  286. std::atomic<bool> done_{false};
  287. };
  288. int main(int argc, char** argv) {
  289. GFLAGS_NAMESPACE::ParseCommandLineFlags(&argc, &argv, true);
  290. std::mt19937 rng{std::random_device()()};
  291. ROCKSDB_NAMESPACE::Status status;
  292. std::string path =
  293. ROCKSDB_NAMESPACE::test::PerThreadDBPath("forward_iterator_test");
  294. fprintf(stderr, "db path is %s\n", path.c_str());
  295. ROCKSDB_NAMESPACE::Options options;
  296. options.create_if_missing = true;
  297. options.compression = ROCKSDB_NAMESPACE::CompressionType::kNoCompression;
  298. options.compaction_style =
  299. ROCKSDB_NAMESPACE::CompactionStyle::kCompactionStyleNone;
  300. options.level0_slowdown_writes_trigger = 99999;
  301. options.level0_stop_writes_trigger = 99999;
  302. options.use_direct_io_for_flush_and_compaction = true;
  303. options.write_buffer_size = FLAGS_memtable_size;
  304. ROCKSDB_NAMESPACE::BlockBasedTableOptions table_options;
  305. table_options.block_cache =
  306. ROCKSDB_NAMESPACE::NewLRUCache(FLAGS_block_cache_size);
  307. table_options.block_size = FLAGS_block_size;
  308. options.table_factory.reset(
  309. ROCKSDB_NAMESPACE::NewBlockBasedTableFactory(table_options));
  310. status = ROCKSDB_NAMESPACE::DestroyDB(path, options);
  311. assert(status.ok());
  312. ROCKSDB_NAMESPACE::DB* db_raw;
  313. status = ROCKSDB_NAMESPACE::DB::Open(options, path, &db_raw);
  314. assert(status.ok());
  315. std::unique_ptr<ROCKSDB_NAMESPACE::DB> db(db_raw);
  316. std::vector<ShardState> shard_states(FLAGS_shards + 1);
  317. std::deque<Reader> readers;
  318. while (static_cast<int>(readers.size()) < FLAGS_readers) {
  319. readers.emplace_back(&shard_states, db_raw);
  320. }
  321. std::deque<Writer> writers;
  322. while (static_cast<int>(writers.size()) < FLAGS_writers) {
  323. writers.emplace_back(&shard_states, db_raw);
  324. }
  325. // Each shard gets a random reader and random writer assigned to it
  326. for (int i = 1; i <= FLAGS_shards; ++i) {
  327. std::uniform_int_distribution<int> reader_dist(0, FLAGS_readers - 1);
  328. std::uniform_int_distribution<int> writer_dist(0, FLAGS_writers - 1);
  329. shard_states[i].reader = &readers[reader_dist(rng)];
  330. shard_states[i].writer = &writers[writer_dist(rng)];
  331. }
  332. StatsThread stats_thread(db_raw);
  333. for (Writer& w : writers) {
  334. w.start();
  335. }
  336. writers.clear();
  337. readers.clear();
  338. }
  339. #endif // !defined(GFLAGS) || defined(ROCKSDB_LITE)