generate_random_db.sh 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #!/usr/bin/env bash
  2. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3. #
  4. # A shell script to load some pre generated data file to a DB using ldb tool
  5. # ./ldb needs to be avaible to be executed.
  6. #
  7. # Usage: <SCRIPT> <input_data_path> <DB Path>
  8. if [ "$#" -lt 2 ]; then
  9. echo "usage: $BASH_SOURCE <input_data_path> <DB Path>"
  10. exit 1
  11. fi
  12. input_data_dir=$1
  13. db_dir=$2
  14. rm -rf $db_dir
  15. second_gen_compression_support=
  16. mixed_compression_support=
  17. # Support for `ldb --version` is a crude under-approximation for versions
  18. # supporting dictionary compression and algorithms including zstd and lz4
  19. if ./ldb --version 2>/dev/null >/dev/null; then
  20. second_gen_compression_support=1
  21. if ./ldb load --db=$db_dir --compression_type=mixed --create_if_missing \
  22. < /dev/null 2>/dev/null >/dev/null; then
  23. mixed_compression_support=1
  24. fi
  25. rm -rf $db_dir
  26. fi
  27. echo == Loading data from $input_data_dir to $db_dir
  28. declare -a compression_opts=("no" "snappy" "zlib" "bzip2")
  29. allow_dict=0
  30. if [ "$second_gen_compression_support" == 1 ]; then
  31. if [ "$mixed_compression_support" == 1 ]; then
  32. compression_opts=("zstd" "no" "snappy" "zlib" "bzip2" "lz4" "lz4hc" "mixed")
  33. else
  34. compression_opts=("zstd" "no" "snappy" "zlib" "bzip2" "lz4" "lz4hc")
  35. fi
  36. fi
  37. set -e
  38. n=$RANDOM
  39. c_count=${#compression_opts[@]}
  40. for f in `ls -1 $input_data_dir`
  41. do
  42. # NOTE: This will typically accumulate the loaded data into a .log file which
  43. # will only be flushed to an SST file on recovery in the next iteration, so
  44. # compression settings of this iteration might only apply to data from the
  45. # previous iteration (if there was one). This has the advantage of leaving a
  46. # WAL file for testing its format compatibility (in addition to SST files
  47. # etc.)
  48. c=${compression_opts[n % c_count]}
  49. d=$((n / c_count % 2 * 12345))
  50. echo == Loading $f with compression $c dict bytes $d
  51. if [ "$second_gen_compression_support" == 1 ]; then
  52. d_arg=--compression_max_dict_bytes=$d
  53. else
  54. d_arg=""
  55. fi
  56. ./ldb load --db=$db_dir --compression_type=$c $d_arg --bloom_bits=10 \
  57. --auto_compaction=false --create_if_missing < $input_data_dir/$f
  58. let "n = n + 1"
  59. done