block_cache_pysim.sh 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #!/usr/bin/env bash
  2. # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3. #
  4. # A shell script to run a batch of pysims and combine individual pysim output files.
  5. #
  6. # Usage: bash block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs
  7. # trace_file_path: The file path that stores the traces.
  8. # result_dir: The directory to store pysim results. The output files from a pysim is stores in result_dir/ml
  9. # downsample_size: The downsample size used to collect the trace.
  10. # warmup_seconds: The number of seconds used for warmup.
  11. # max_jobs: The max number of concurrent pysims to run.
  12. # Install required packages to run simulations.
  13. # sudo dnf install -y numpy scipy python-matplotlib ipython python-pandas sympy python-nose atlas-devel
  14. ulimit -c 0
  15. if [ $# -ne 5 ]; then
  16. echo "Usage: ./block_cache_pysim.sh trace_file_path result_dir downsample_size warmup_seconds max_jobs"
  17. exit 0
  18. fi
  19. trace_file="$1"
  20. result_dir="$2"
  21. downsample_size="$3"
  22. warmup_seconds="$4"
  23. max_jobs="$5"
  24. max_num_accesses=100000000
  25. current_jobs=1
  26. ml_tmp_result_dir="$result_dir/ml"
  27. rm -rf "$ml_tmp_result_dir"
  28. mkdir -p "$result_dir"
  29. mkdir -p "$ml_tmp_result_dir"
  30. # Report miss ratio in the trace.
  31. current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
  32. for cf_name in "all"
  33. do
  34. for cache_size in "1G" "2G" "4G" "8G" "16G" #"12G" "16G" "1T"
  35. do
  36. for cache_type in "opt" "lru" "pylru" "pycctbbt" "pyhb" "ts" "trace" "lru_hybrid" #"pycctblevelbt" #"lru_hybridn" "opt" #"pylru" "pylru_hybrid" "pycctbbt" "pycccfbt" "trace"
  37. do
  38. if [[ $cache_type == "trace" && $cache_size != "16G" ]]; then
  39. # We only need to collect miss ratios observed in the trace once.
  40. continue
  41. fi
  42. while [ "$current_jobs" -ge "$max_jobs" ]
  43. do
  44. sleep 10
  45. echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
  46. current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
  47. echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
  48. done
  49. output="log-ml-$cache_type-$cache_size-$cf_name"
  50. echo "Running simulation for $cache_type, cache size $cache_size, and cf_name $cf_name. Number of running jobs: $current_jobs. "
  51. nohup python block_cache_pysim.py "$cache_type" "$cache_size" "$downsample_size" "$warmup_seconds" "$trace_file" "$ml_tmp_result_dir" "$max_num_accesses" "$cf_name" >& "$ml_tmp_result_dir/$output" &
  52. current_jobs=$((current_jobs+1))
  53. done
  54. done
  55. done
  56. # Wait for all jobs to complete.
  57. while [ $current_jobs -gt 0 ]
  58. do
  59. sleep 10
  60. echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
  61. current_jobs=$(ps aux | grep pysim | grep python | grep -cv grep)
  62. echo "Waiting jobs to complete. Number of running jobs: $current_jobs"
  63. done
  64. echo "Combine individual pysim output files"
  65. rm -rf "$result_dir/ml_*"
  66. for header in "header-" "data-"
  67. do
  68. for fn in "$ml_tmp_result_dir"/*
  69. do
  70. sum_file=""
  71. time_unit=""
  72. capacity=""
  73. target_cf_name=""
  74. if [[ $fn == *"timeline"* ]]; then
  75. tmpfn="$fn"
  76. IFS='-' read -ra elements <<< "$tmpfn"
  77. time_unit_index=0
  78. capacity_index=0
  79. for i in "${elements[@]}"
  80. do
  81. if [[ $i == "timeline" ]]; then
  82. break
  83. fi
  84. time_unit_index=$((time_unit_index+1))
  85. done
  86. time_unit_index=$((time_unit_index+1))
  87. capacity_index=$((time_unit_index+2))
  88. target_cf_name_index=$((time_unit_index+3))
  89. time_unit="${elements[$time_unit_index]}_"
  90. capacity="${elements[$capacity_index]}_"
  91. target_cf_name="${elements[$target_cf_name_index]}_"
  92. fi
  93. if [[ $fn == *"${header}ml-policy-timeline"* ]]; then
  94. sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}policy_timeline"
  95. fi
  96. if [[ $fn == *"${header}ml-policy-ratio-timeline"* ]]; then
  97. sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}policy_ratio_timeline"
  98. fi
  99. if [[ $fn == *"${header}ml-miss-timeline"* ]]; then
  100. sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}miss_timeline"
  101. fi
  102. if [[ $fn == *"${header}ml-miss-ratio-timeline"* ]]; then
  103. sum_file="$result_dir/ml_${target_cf_name}${capacity}${time_unit}miss_ratio_timeline"
  104. fi
  105. if [[ $fn == *"${header}ml-mrc"* ]]; then
  106. tmpfn="$fn"
  107. IFS='-' read -ra elements <<< "$tmpfn"
  108. target_cf_name=${elements[-1]}
  109. sum_file="${result_dir}/ml_${target_cf_name}_mrc"
  110. fi
  111. if [[ $fn == *"${header}ml-avgmb"* ]]; then
  112. tmpfn="$fn"
  113. IFS='-' read -ra elements <<< "$tmpfn"
  114. time_unit=${elements[3]}
  115. target_cf_name=${elements[-1]}
  116. sum_file="${result_dir}/ml_${time_unit}_${target_cf_name}_avgmb"
  117. fi
  118. if [[ $fn == *"${header}ml-p95mb"* ]]; then
  119. tmpfn="$fn"
  120. IFS='-' read -ra elements <<< "$tmpfn"
  121. time_unit=${elements[3]}
  122. target_cf_name=${elements[-1]}
  123. sum_file="${result_dir}/ml_${time_unit}_${target_cf_name}_p95mb"
  124. fi
  125. if [[ $sum_file == "" ]]; then
  126. continue
  127. fi
  128. if [[ $header == "header-" ]]; then
  129. if [ -e "$sum_file" ]; then
  130. continue
  131. fi
  132. fi
  133. cat "$fn" >> "$sum_file"
  134. done
  135. done
  136. echo "Done"
  137. for fn in $result_dir/*
  138. do
  139. if [[ $fn == *"_mrc" || $fn == *"_avgmb" || $fn == *"_p95mb" ]]; then
  140. # Sort MRC file by cache_type and cache_size.
  141. tmp_file="$result_dir/tmp_mrc"
  142. cat "$fn" | sort -t ',' -k1,1 -k4,4n > "$tmp_file"
  143. cat "$tmp_file" > "$fn"
  144. rm -rf "$tmp_file"
  145. fi
  146. done