lstm_comparison.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. from caffe2.python import workspace, core, lstm_benchmark, utils
  2. from copy import copy
  3. @utils.debug
  4. def Compare(args):
  5. results = []
  6. num_iters = 1000
  7. args.gpu = True
  8. with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
  9. for batch_size in [64, 128, 256]:
  10. for seq_length in [20, 100]:
  11. for hidden_dim in [40, 100, 400, 800]:
  12. args.batch_size = batch_size
  13. args.seq_length = seq_length
  14. args.hidden_dim = hidden_dim
  15. args.data_size = batch_size * seq_length * num_iters
  16. args.iters_to_report = num_iters // 3
  17. args.implementation = 'own'
  18. t_own = lstm_benchmark.Benchmark(args)
  19. workspace.ResetWorkspace()
  20. args.implementation = 'cudnn'
  21. t_cudnn = lstm_benchmark.Benchmark(args)
  22. workspace.ResetWorkspace()
  23. results.append((copy(args), float(t_own), float(t_cudnn)))
  24. print(args)
  25. print("t_cudnn / t_own: {}".format(t_cudnn / t_own))
  26. for args, t_own, t_cudnn in results:
  27. print("{}: cudnn time: {}, own time: {}, ratio: {}".format(
  28. str(args), t_cudnn, t_own, t_cudnn / t_own))
  29. ratio_sum = 0
  30. for args, t_own, t_cudnn in results:
  31. ratio = float(t_cudnn) / t_own
  32. ratio_sum += ratio
  33. print("hidden_dim: {}, seq_lengths: {}, batch_size: {}, num_layers: {}:"
  34. " cudnn time: {}, own time: {}, ratio: {}".format(
  35. args.hidden_dim, args.seq_length, args.batch_size,
  36. args.num_layers, t_cudnn, t_own, ratio))
  37. print("Ratio average: {}".format(ratio_sum / len(results)))
  38. if __name__ == '__main__':
  39. args = lstm_benchmark.GetArgumentParser().parse_args()
  40. workspace.GlobalInit([
  41. 'caffe2',
  42. '--caffe2_log_level=0',
  43. '--caffe2_print_blob_sizes_at_exit=0',
  44. '--caffe2_gpu_memory_tracking=1'])
  45. Compare(args)