run_blob_bench.sh 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. #!/usr/bin/env bash
  2. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3. #
  4. # BlobDB benchmark script
  5. #
  6. # REQUIRES: benchmark.sh is in the tools subdirectory
  7. #
  8. # After the execution of this script, log files are available in $output_dir.
  9. # report.tsv provides high level statistics.
  10. #
  11. # Should be run from the parent of the tools directory. The command line is:
  12. # [$env_vars] tools/run_blob_bench.sh
  13. #
  14. # This runs the following sequence of BlobDB performance tests:
  15. # phase 1) write-only - bulkload+compact, overwrite+waitforcompaction
  16. # phase 2) read-write - readwhilewriting, fwdrangewhilewriting
  17. # phase 3) read-only - readrandom, fwdrange
  18. #
  19. # Exit Codes
  20. EXIT_INVALID_ARGS=1
  21. EXIT_INVALID_PATH=2
  22. # Size constants
  23. K=1024
  24. M=$((1024 * K))
  25. G=$((1024 * M))
  26. T=$((1024 * G))
  27. function display_usage() {
  28. echo "usage: run_blob_bench.sh [--help]"
  29. echo ""
  30. echo "Runs the following sequence of BlobDB benchmark tests using tools/benchmark.sh:"
  31. echo -e "\tPhase 1: write-only tests: bulkload+compact, overwrite+waitforcompaction"
  32. echo -e "\tPhase 2: read-write tests: readwhilewriting, fwdrangewhilewriting"
  33. echo -e "\tPhase 3: read-only tests: readrandom, fwdrange"
  34. echo ""
  35. echo "Environment Variables:"
  36. echo -e "\tJOB_ID\t\t\t\tIdentifier for the benchmark job, will appear in the results (default: empty)"
  37. echo -e "\tDB_DIR\t\t\t\tPath for the RocksDB data directory (mandatory)"
  38. echo -e "\tWAL_DIR\t\t\t\tPath for the RocksDB WAL directory (mandatory)"
  39. echo -e "\tOUTPUT_DIR\t\t\tPath for the benchmark results (mandatory)"
  40. echo -e "\tNUM_THREADS\t\t\tNumber of threads (default: 16)"
  41. echo -e "\tCOMPRESSION_TYPE\t\tCompression type for the SST files (default: lz4)"
  42. echo -e "\tDB_SIZE\t\t\t\tRaw (uncompressed) database size (default: 1 TB)"
  43. echo -e "\tVALUE_SIZE\t\t\tValue size (default: 1 KB)"
  44. echo -e "\tNUM_KEYS\t\t\tNumber of keys (default: raw database size divided by value size)"
  45. echo -e "\tDURATION\t\t\tIndividual duration for read-write/read-only tests in seconds (default: 1800)"
  46. echo -e "\tWRITE_BUFFER_SIZE\t\tWrite buffer (memtable) size (default: 1 GB)"
  47. echo -e "\tENABLE_BLOB_FILES\t\tEnable blob files (default: 1)"
  48. echo -e "\tMIN_BLOB_SIZE\t\t\tSize threshold for storing values in blob files (default: 0)"
  49. echo -e "\tBLOB_FILE_SIZE\t\t\tBlob file size (default: same as write buffer size)"
  50. echo -e "\tBLOB_COMPRESSION_TYPE\t\tCompression type for the blob files (default: lz4)"
  51. echo -e "\tENABLE_BLOB_GC\t\t\tEnable blob garbage collection (default: 1)"
  52. echo -e "\tBLOB_GC_AGE_CUTOFF\t\tBlob garbage collection age cutoff (default: 0.25)"
  53. echo -e "\tBLOB_GC_FORCE_THRESHOLD\t\tThreshold for forcing garbage collection of the oldest blob files (default: 1.0)"
  54. echo -e "\tBLOB_COMPACTION_READAHEAD_SIZE\tBlob compaction readahead size (default: 0)"
  55. echo -e "\tBLOB_FILE_STARTING_LEVEL\t\tBlob file starting level (default: 0)"
  56. echo -e "\tUSE_BLOB_CACHE\t\t\tEnable blob cache. (default: 1)"
  57. echo -e "\tUSE_SHARED_BLOCK_AND_BLOB_CACHE\t\t\tUse the same backing cache for block cache and blob cache. (default: 1)"
  58. echo -e "\tBLOB_CACHE_SIZE\t\t\tSize of the blob cache (default: 16GB)"
  59. echo -e "\tBLOB_CACHE_NUMSHARDBITS\t\t\tNumber of shards for the blob cache is 2 ** blob_cache_numshardbits (default: 6)"
  60. echo -e "\tPREPOPULATE_BLOB_CACHE\t\t\tPre-populate hot/warm blobs in blob cache (default: 0)"
  61. echo -e "\tTARGET_FILE_SIZE_BASE\t\tTarget SST file size for compactions (default: write buffer size, scaled down if blob files are enabled)"
  62. echo -e "\tMAX_BYTES_FOR_LEVEL_BASE\tMaximum size for the base level (default: 8 * target SST file size)"
  63. }
  64. if [ $# -ge 1 ]; then
  65. display_usage
  66. if [ "$1" == "--help" ]; then
  67. exit
  68. else
  69. exit $EXIT_INVALID_ARGS
  70. fi
  71. fi
  72. if [ ! -f tools/benchmark.sh ]; then
  73. echo "tools/benchmark.sh not found"
  74. exit $EXIT_INVALID_PATH
  75. fi
  76. # shellcheck disable=SC2153
  77. if [ -z "$DB_DIR" ]; then
  78. echo "DB_DIR is not defined"
  79. exit $EXIT_INVALID_ARGS
  80. fi
  81. # shellcheck disable=SC2153
  82. if [ -z "$WAL_DIR" ]; then
  83. echo "WAL_DIR is not defined"
  84. exit $EXIT_INVALID_ARGS
  85. fi
  86. # shellcheck disable=SC2153
  87. if [ -z "$OUTPUT_DIR" ]; then
  88. echo "OUTPUT_DIR is not defined"
  89. exit $EXIT_INVALID_ARGS
  90. fi
  91. # shellcheck disable=SC2153
  92. job_id=$JOB_ID
  93. db_dir=$DB_DIR
  94. wal_dir=$WAL_DIR
  95. output_dir=$OUTPUT_DIR
  96. num_threads=${NUM_THREADS:-16}
  97. compression_type=${COMPRESSION_TYPE:-lz4}
  98. db_size=${DB_SIZE:-$((1 * T))}
  99. value_size=${VALUE_SIZE:-$((1 * K))}
  100. num_keys=${NUM_KEYS:-$((db_size / value_size))}
  101. duration=${DURATION:-1800}
  102. write_buffer_size=${WRITE_BUFFER_SIZE:-$((1 * G))}
  103. enable_blob_files=${ENABLE_BLOB_FILES:-1}
  104. min_blob_size=${MIN_BLOB_SIZE:-0}
  105. blob_file_size=${BLOB_FILE_SIZE:-$write_buffer_size}
  106. blob_compression_type=${BLOB_COMPRESSION_TYPE:-lz4}
  107. enable_blob_garbage_collection=${ENABLE_BLOB_GC:-1}
  108. blob_garbage_collection_age_cutoff=${BLOB_GC_AGE_CUTOFF:-0.25}
  109. blob_garbage_collection_force_threshold=${BLOB_GC_FORCE_THRESHOLD:-1.0}
  110. blob_compaction_readahead_size=${BLOB_COMPACTION_READAHEAD_SIZE:-0}
  111. blob_file_starting_level=${BLOB_FILE_STARTING_LEVEL:-0}
  112. use_blob_cache=${USE_BLOB_CACHE:-1}
  113. use_shared_block_and_blob_cache=${USE_SHARED_BLOCK_AND_BLOB_CACHE:-1}
  114. blob_cache_size=${BLOB_CACHE_SIZE:-$((16 * G))}
  115. blob_cache_numshardbits=${BLOB_CACHE_NUMSHARDBITS:-6}
  116. prepopulate_blob_cache=${PREPOPULATE_BLOB_CACHE:-0}
  117. if [ "$enable_blob_files" == "1" ]; then
  118. target_file_size_base=${TARGET_FILE_SIZE_BASE:-$((32 * write_buffer_size / value_size))}
  119. else
  120. target_file_size_base=${TARGET_FILE_SIZE_BASE:-$write_buffer_size}
  121. fi
  122. max_bytes_for_level_base=${MAX_BYTES_FOR_LEVEL_BASE:-$((8 * target_file_size_base))}
  123. echo "======================== Benchmark setup ========================"
  124. echo -e "Job ID:\t\t\t\t\t$job_id"
  125. echo -e "Data directory:\t\t\t\t$db_dir"
  126. echo -e "WAL directory:\t\t\t\t$wal_dir"
  127. echo -e "Output directory:\t\t\t$output_dir"
  128. echo -e "Number of threads:\t\t\t$num_threads"
  129. echo -e "Compression type for SST files:\t\t$compression_type"
  130. echo -e "Raw database size:\t\t\t$db_size"
  131. echo -e "Value size:\t\t\t\t$value_size"
  132. echo -e "Number of keys:\t\t\t\t$num_keys"
  133. echo -e "Duration of read-write/read-only tests:\t$duration"
  134. echo -e "Write buffer size:\t\t\t$write_buffer_size"
  135. echo -e "Blob files enabled:\t\t\t$enable_blob_files"
  136. echo -e "Blob size threshold:\t\t\t$min_blob_size"
  137. echo -e "Blob file size:\t\t\t\t$blob_file_size"
  138. echo -e "Compression type for blob files:\t$blob_compression_type"
  139. echo -e "Blob GC enabled:\t\t\t$enable_blob_garbage_collection"
  140. echo -e "Blob GC age cutoff:\t\t\t$blob_garbage_collection_age_cutoff"
  141. echo -e "Blob GC force threshold:\t\t$blob_garbage_collection_force_threshold"
  142. echo -e "Blob compaction readahead size:\t\t$blob_compaction_readahead_size"
  143. echo -e "Blob file starting level:\t\t$blob_file_starting_level"
  144. echo -e "Blob cache enabled:\t\t\t$use_blob_cache"
  145. echo -e "Blob cache and block cache shared:\t\t\t$use_shared_block_and_blob_cache"
  146. echo -e "Blob cache size:\t\t$blob_cache_size"
  147. echo -e "Blob cache number of shard bits:\t\t$blob_cache_numshardbits"
  148. echo -e "Blob cache prepopulated:\t\t\t$prepopulate_blob_cache"
  149. echo -e "Target SST file size:\t\t\t$target_file_size_base"
  150. echo -e "Maximum size of base level:\t\t$max_bytes_for_level_base"
  151. echo "================================================================="
  152. rm -rf "$db_dir"
  153. rm -rf "$wal_dir"
  154. rm -rf "$output_dir"
  155. ENV_VARS="\
  156. JOB_ID=$job_id \
  157. DB_DIR=$db_dir \
  158. WAL_DIR=$wal_dir \
  159. OUTPUT_DIR=$output_dir \
  160. NUM_THREADS=$num_threads \
  161. COMPRESSION_TYPE=$compression_type \
  162. VALUE_SIZE=$value_size \
  163. NUM_KEYS=$num_keys"
  164. ENV_VARS_D="$ENV_VARS DURATION=$duration"
  165. PARAMS="\
  166. --enable_blob_files=$enable_blob_files \
  167. --min_blob_size=$min_blob_size \
  168. --blob_file_size=$blob_file_size \
  169. --blob_compression_type=$blob_compression_type \
  170. --blob_file_starting_level=$blob_file_starting_level \
  171. --use_blob_cache=$use_blob_cache \
  172. --use_shared_block_and_blob_cache=$use_shared_block_and_blob_cache \
  173. --blob_cache_size=$blob_cache_size \
  174. --blob_cache_numshardbits=$blob_cache_numshardbits \
  175. --prepopulate_blob_cache=$prepopulate_blob_cache \
  176. --write_buffer_size=$write_buffer_size \
  177. --target_file_size_base=$target_file_size_base \
  178. --max_bytes_for_level_base=$max_bytes_for_level_base"
  179. PARAMS_GC="$PARAMS \
  180. --enable_blob_garbage_collection=$enable_blob_garbage_collection \
  181. --blob_garbage_collection_age_cutoff=$blob_garbage_collection_age_cutoff \
  182. --blob_garbage_collection_force_threshold=$blob_garbage_collection_force_threshold \
  183. --blob_compaction_readahead_size=$blob_compaction_readahead_size"
  184. # bulk load (using fillrandom) + compact
  185. env -u DURATION -S "$ENV_VARS" ./tools/benchmark.sh bulkload "$PARAMS"
  186. # overwrite + waitforcompaction
  187. env -u DURATION -S "$ENV_VARS" ./tools/benchmark.sh overwrite "$PARAMS_GC"
  188. # readwhilewriting
  189. env -S "$ENV_VARS_D" ./tools/benchmark.sh readwhilewriting "$PARAMS_GC"
  190. # fwdrangewhilewriting
  191. env -S "$ENV_VARS_D" ./tools/benchmark.sh fwdrangewhilewriting "$PARAMS_GC"
  192. # readrandom
  193. env -S "$ENV_VARS_D" ./tools/benchmark.sh readrandom "$PARAMS_GC"
  194. # fwdrange
  195. env -S "$ENV_VARS_D" ./tools/benchmark.sh fwdrange "$PARAMS_GC"
  196. # save logs to output directory
  197. cp "$db_dir"/LOG* "$output_dir/"