convnet_benchmarks.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727
  1. ## @package convnet_benchmarks
  2. # Module caffe2.python.convnet_benchmarks
  3. """
  4. Benchmark for common convnets.
  5. Speed on Titan X, with 10 warmup steps and 10 main steps and with different
  6. versions of cudnn, are as follows (time reported below is per-batch time,
  7. forward / forward+backward):
  8. CuDNN V3 CuDNN v4
  9. AlexNet 32.5 / 108.0 27.4 / 90.1
  10. OverFeat 113.0 / 342.3 91.7 / 276.5
  11. Inception 134.5 / 485.8 125.7 / 450.6
  12. VGG (batch 64) 200.8 / 650.0 164.1 / 551.7
  13. Speed on Inception with varied batch sizes and CuDNN v4 is as follows:
  14. Batch Size Speed per batch Speed per image
  15. 16 22.8 / 72.7 1.43 / 4.54
  16. 32 38.0 / 127.5 1.19 / 3.98
  17. 64 67.2 / 233.6 1.05 / 3.65
  18. 128 125.7 / 450.6 0.98 / 3.52
  19. Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
  20. v4, is as follows:
  21. AlexNet 68.4 / 218.1
  22. OverFeat 210.5 / 630.3
  23. Inception 300.2 / 1122.2
  24. VGG (batch 64) 405.8 / 1327.7
  25. (Note that these numbers involve a "full" backprop, i.e. the gradient
  26. with respect to the input image is also computed.)
  27. To get the numbers, simply run:
  28. for MODEL in AlexNet OverFeat Inception; do
  29. PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
  30. --batch_size 128 --model $MODEL --forward_only True
  31. done
  32. for MODEL in AlexNet OverFeat Inception; do
  33. PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
  34. --batch_size 128 --model $MODEL
  35. done
  36. PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
  37. --batch_size 64 --model VGGA --forward_only True
  38. PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
  39. --batch_size 64 --model VGGA
  40. for BS in 16 32 64 128; do
  41. PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
  42. --batch_size $BS --model Inception --forward_only True
  43. PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
  44. --batch_size $BS --model Inception
  45. done
  46. Note that VGG needs to be run at batch 64 due to memory limit on the backward
  47. pass.
  48. """
  49. import argparse
  50. from caffe2.python import workspace, brew, model_helper
  51. def MLP(order, cudnn_ws):
  52. model = model_helper.ModelHelper(name="MLP")
  53. d = 256
  54. depth = 20
  55. width = 3
  56. for i in range(depth):
  57. for j in range(width):
  58. current = "fc_{}_{}".format(i, j) if i > 0 else "data"
  59. next_ = "fc_{}_{}".format(i + 1, j)
  60. brew.fc(
  61. model,
  62. current,
  63. next_,
  64. dim_in=d,
  65. dim_out=d,
  66. weight_init=('XavierFill', {}),
  67. bias_init=('XavierFill', {}),
  68. )
  69. brew.sum(
  70. model, ["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"]
  71. )
  72. brew.fc(
  73. model,
  74. "sum",
  75. "last",
  76. dim_in=d,
  77. dim_out=1000,
  78. weight_init=('XavierFill', {}),
  79. bias_init=('XavierFill', {}),
  80. )
  81. xent = model.net.LabelCrossEntropy(["last", "label"], "xent")
  82. model.net.AveragedLoss(xent, "loss")
  83. return model, d
  84. def AlexNet(order, cudnn_ws):
  85. my_arg_scope = {
  86. 'order': order,
  87. 'use_cudnn': True,
  88. 'cudnn_exhaustive_search': True,
  89. }
  90. if cudnn_ws:
  91. my_arg_scope['ws_nbytes_limit'] = cudnn_ws
  92. model = model_helper.ModelHelper(
  93. name="alexnet",
  94. arg_scope=my_arg_scope,
  95. )
  96. conv1 = brew.conv(
  97. model,
  98. "data",
  99. "conv1",
  100. 3,
  101. 64,
  102. 11, ('XavierFill', {}), ('ConstantFill', {}),
  103. stride=4,
  104. pad=2
  105. )
  106. relu1 = brew.relu(model, conv1, "conv1")
  107. pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2)
  108. conv2 = brew.conv(
  109. model,
  110. pool1,
  111. "conv2",
  112. 64,
  113. 192,
  114. 5,
  115. ('XavierFill', {}),
  116. ('ConstantFill', {}),
  117. pad=2
  118. )
  119. relu2 = brew.relu(model, conv2, "conv2")
  120. pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2)
  121. conv3 = brew.conv(
  122. model,
  123. pool2,
  124. "conv3",
  125. 192,
  126. 384,
  127. 3,
  128. ('XavierFill', {}),
  129. ('ConstantFill', {}),
  130. pad=1
  131. )
  132. relu3 = brew.relu(model, conv3, "conv3")
  133. conv4 = brew.conv(
  134. model,
  135. relu3,
  136. "conv4",
  137. 384,
  138. 256,
  139. 3,
  140. ('XavierFill', {}),
  141. ('ConstantFill', {}),
  142. pad=1
  143. )
  144. relu4 = brew.relu(model, conv4, "conv4")
  145. conv5 = brew.conv(
  146. model,
  147. relu4,
  148. "conv5",
  149. 256,
  150. 256,
  151. 3,
  152. ('XavierFill', {}),
  153. ('ConstantFill', {}),
  154. pad=1
  155. )
  156. relu5 = brew.relu(model, conv5, "conv5")
  157. pool5 = brew.max_pool(model, relu5, "pool5", kernel=3, stride=2)
  158. fc6 = brew.fc(
  159. model,
  160. pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
  161. ('ConstantFill', {})
  162. )
  163. relu6 = brew.relu(model, fc6, "fc6")
  164. fc7 = brew.fc(
  165. model, relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
  166. )
  167. relu7 = brew.relu(model, fc7, "fc7")
  168. fc8 = brew.fc(
  169. model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
  170. )
  171. pred = brew.softmax(model, fc8, "pred")
  172. xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
  173. model.net.AveragedLoss(xent, "loss")
  174. return model, 224
  175. def OverFeat(order, cudnn_ws):
  176. my_arg_scope = {
  177. 'order': order,
  178. 'use_cudnn': True,
  179. 'cudnn_exhaustive_search': True,
  180. }
  181. if cudnn_ws:
  182. my_arg_scope['ws_nbytes_limit'] = cudnn_ws
  183. model = model_helper.ModelHelper(
  184. name="overfeat",
  185. arg_scope=my_arg_scope,
  186. )
  187. conv1 = brew.conv(
  188. model,
  189. "data",
  190. "conv1",
  191. 3,
  192. 96,
  193. 11,
  194. ('XavierFill', {}),
  195. ('ConstantFill', {}),
  196. stride=4,
  197. )
  198. relu1 = brew.relu(model, conv1, "conv1")
  199. pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2)
  200. conv2 = brew.conv(
  201. model, pool1, "conv2", 96, 256, 5, ('XavierFill', {}),
  202. ('ConstantFill', {})
  203. )
  204. relu2 = brew.relu(model, conv2, "conv2")
  205. pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2)
  206. conv3 = brew.conv(
  207. model,
  208. pool2,
  209. "conv3",
  210. 256,
  211. 512,
  212. 3,
  213. ('XavierFill', {}),
  214. ('ConstantFill', {}),
  215. pad=1,
  216. )
  217. relu3 = brew.relu(model, conv3, "conv3")
  218. conv4 = brew.conv(
  219. model,
  220. relu3,
  221. "conv4",
  222. 512,
  223. 1024,
  224. 3,
  225. ('XavierFill', {}),
  226. ('ConstantFill', {}),
  227. pad=1,
  228. )
  229. relu4 = brew.relu(model, conv4, "conv4")
  230. conv5 = brew.conv(
  231. model,
  232. relu4,
  233. "conv5",
  234. 1024,
  235. 1024,
  236. 3,
  237. ('XavierFill', {}),
  238. ('ConstantFill', {}),
  239. pad=1,
  240. )
  241. relu5 = brew.relu(model, conv5, "conv5")
  242. pool5 = brew.max_pool(model, relu5, "pool5", kernel=2, stride=2)
  243. fc6 = brew.fc(
  244. model, pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
  245. ('ConstantFill', {})
  246. )
  247. relu6 = brew.relu(model, fc6, "fc6")
  248. fc7 = brew.fc(
  249. model, relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
  250. )
  251. relu7 = brew.relu(model, fc7, "fc7")
  252. fc8 = brew.fc(
  253. model, relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
  254. )
  255. pred = brew.softmax(model, fc8, "pred")
  256. xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
  257. model.net.AveragedLoss(xent, "loss")
  258. return model, 231
  259. def VGGA(order, cudnn_ws):
  260. my_arg_scope = {
  261. 'order': order,
  262. 'use_cudnn': True,
  263. 'cudnn_exhaustive_search': True,
  264. }
  265. if cudnn_ws:
  266. my_arg_scope['ws_nbytes_limit'] = cudnn_ws
  267. model = model_helper.ModelHelper(
  268. name="vgga",
  269. arg_scope=my_arg_scope,
  270. )
  271. conv1 = brew.conv(
  272. model,
  273. "data",
  274. "conv1",
  275. 3,
  276. 64,
  277. 3,
  278. ('XavierFill', {}),
  279. ('ConstantFill', {}),
  280. pad=1,
  281. )
  282. relu1 = brew.relu(model, conv1, "conv1")
  283. pool1 = brew.max_pool(model, relu1, "pool1", kernel=2, stride=2)
  284. conv2 = brew.conv(
  285. model,
  286. pool1,
  287. "conv2",
  288. 64,
  289. 128,
  290. 3,
  291. ('XavierFill', {}),
  292. ('ConstantFill', {}),
  293. pad=1,
  294. )
  295. relu2 = brew.relu(model, conv2, "conv2")
  296. pool2 = brew.max_pool(model, relu2, "pool2", kernel=2, stride=2)
  297. conv3 = brew.conv(
  298. model,
  299. pool2,
  300. "conv3",
  301. 128,
  302. 256,
  303. 3,
  304. ('XavierFill', {}),
  305. ('ConstantFill', {}),
  306. pad=1,
  307. )
  308. relu3 = brew.relu(model, conv3, "conv3")
  309. conv4 = brew.conv(
  310. model,
  311. relu3,
  312. "conv4",
  313. 256,
  314. 256,
  315. 3,
  316. ('XavierFill', {}),
  317. ('ConstantFill', {}),
  318. pad=1,
  319. )
  320. relu4 = brew.relu(model, conv4, "conv4")
  321. pool4 = brew.max_pool(model, relu4, "pool4", kernel=2, stride=2)
  322. conv5 = brew.conv(
  323. model,
  324. pool4,
  325. "conv5",
  326. 256,
  327. 512,
  328. 3,
  329. ('XavierFill', {}),
  330. ('ConstantFill', {}),
  331. pad=1,
  332. )
  333. relu5 = brew.relu(model, conv5, "conv5")
  334. conv6 = brew.conv(
  335. model,
  336. relu5,
  337. "conv6",
  338. 512,
  339. 512,
  340. 3,
  341. ('XavierFill', {}),
  342. ('ConstantFill', {}),
  343. pad=1,
  344. )
  345. relu6 = brew.relu(model, conv6, "conv6")
  346. pool6 = brew.max_pool(model, relu6, "pool6", kernel=2, stride=2)
  347. conv7 = brew.conv(
  348. model,
  349. pool6,
  350. "conv7",
  351. 512,
  352. 512,
  353. 3,
  354. ('XavierFill', {}),
  355. ('ConstantFill', {}),
  356. pad=1,
  357. )
  358. relu7 = brew.relu(model, conv7, "conv7")
  359. conv8 = brew.conv(
  360. model,
  361. relu7,
  362. "conv8",
  363. 512,
  364. 512,
  365. 3,
  366. ('XavierFill', {}),
  367. ('ConstantFill', {}),
  368. pad=1,
  369. )
  370. relu8 = brew.relu(model, conv8, "conv8")
  371. pool8 = brew.max_pool(model, relu8, "pool8", kernel=2, stride=2)
  372. fcix = brew.fc(
  373. model, pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
  374. ('ConstantFill', {})
  375. )
  376. reluix = brew.relu(model, fcix, "fcix")
  377. fcx = brew.fc(
  378. model, reluix, "fcx", 4096, 4096, ('XavierFill', {}),
  379. ('ConstantFill', {})
  380. )
  381. relux = brew.relu(model, fcx, "fcx")
  382. fcxi = brew.fc(
  383. model, relux, "fcxi", 4096, 1000, ('XavierFill', {}),
  384. ('ConstantFill', {})
  385. )
  386. pred = brew.softmax(model, fcxi, "pred")
  387. xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
  388. model.net.AveragedLoss(xent, "loss")
  389. return model, 231
  390. def _InceptionModule(
  391. model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
  392. conv5_depths, pool_depth
  393. ):
  394. # path 1: 1x1 conv
  395. conv1 = brew.conv(
  396. model, input_blob, output_name + ":conv1", input_depth, conv1_depth, 1,
  397. ('XavierFill', {}), ('ConstantFill', {})
  398. )
  399. conv1 = brew.relu(model, conv1, conv1)
  400. # path 2: 1x1 conv + 3x3 conv
  401. conv3_reduce = brew.conv(
  402. model, input_blob, output_name + ":conv3_reduce", input_depth,
  403. conv3_depths[0], 1, ('XavierFill', {}), ('ConstantFill', {})
  404. )
  405. conv3_reduce = brew.relu(model, conv3_reduce, conv3_reduce)
  406. conv3 = brew.conv(
  407. model,
  408. conv3_reduce,
  409. output_name + ":conv3",
  410. conv3_depths[0],
  411. conv3_depths[1],
  412. 3,
  413. ('XavierFill', {}),
  414. ('ConstantFill', {}),
  415. pad=1,
  416. )
  417. conv3 = brew.relu(model, conv3, conv3)
  418. # path 3: 1x1 conv + 5x5 conv
  419. conv5_reduce = brew.conv(
  420. model, input_blob, output_name + ":conv5_reduce", input_depth,
  421. conv5_depths[0], 1, ('XavierFill', {}), ('ConstantFill', {})
  422. )
  423. conv5_reduce = brew.relu(model, conv5_reduce, conv5_reduce)
  424. conv5 = brew.conv(
  425. model,
  426. conv5_reduce,
  427. output_name + ":conv5",
  428. conv5_depths[0],
  429. conv5_depths[1],
  430. 5,
  431. ('XavierFill', {}),
  432. ('ConstantFill', {}),
  433. pad=2,
  434. )
  435. conv5 = brew.relu(model, conv5, conv5)
  436. # path 4: pool + 1x1 conv
  437. pool = brew.max_pool(
  438. model,
  439. input_blob,
  440. output_name + ":pool",
  441. kernel=3,
  442. stride=1,
  443. pad=1,
  444. )
  445. pool_proj = brew.conv(
  446. model, pool, output_name + ":pool_proj", input_depth, pool_depth, 1,
  447. ('XavierFill', {}), ('ConstantFill', {})
  448. )
  449. pool_proj = brew.relu(model, pool_proj, pool_proj)
  450. output = brew.concat(model, [conv1, conv3, conv5, pool_proj], output_name)
  451. return output
  452. def Inception(order, cudnn_ws):
  453. my_arg_scope = {
  454. 'order': order,
  455. 'use_cudnn': True,
  456. 'cudnn_exhaustive_search': True,
  457. }
  458. if cudnn_ws:
  459. my_arg_scope['ws_nbytes_limit'] = cudnn_ws
  460. model = model_helper.ModelHelper(
  461. name="inception",
  462. arg_scope=my_arg_scope,
  463. )
  464. conv1 = brew.conv(
  465. model,
  466. "data",
  467. "conv1",
  468. 3,
  469. 64,
  470. 7,
  471. ('XavierFill', {}),
  472. ('ConstantFill', {}),
  473. stride=2,
  474. pad=3,
  475. )
  476. relu1 = brew.relu(model, conv1, "conv1")
  477. pool1 = brew.max_pool(model, relu1, "pool1", kernel=3, stride=2, pad=1)
  478. conv2a = brew.conv(
  479. model, pool1, "conv2a", 64, 64, 1, ('XavierFill', {}),
  480. ('ConstantFill', {})
  481. )
  482. conv2a = brew.relu(model, conv2a, conv2a)
  483. conv2 = brew.conv(
  484. model,
  485. conv2a,
  486. "conv2",
  487. 64,
  488. 192,
  489. 3,
  490. ('XavierFill', {}),
  491. ('ConstantFill', {}),
  492. pad=1,
  493. )
  494. relu2 = brew.relu(model, conv2, "conv2")
  495. pool2 = brew.max_pool(model, relu2, "pool2", kernel=3, stride=2, pad=1)
  496. # Inception modules
  497. inc3 = _InceptionModule(
  498. model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
  499. )
  500. inc4 = _InceptionModule(
  501. model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
  502. )
  503. pool5 = brew.max_pool(model, inc4, "pool5", kernel=3, stride=2, pad=1)
  504. inc5 = _InceptionModule(
  505. model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
  506. )
  507. inc6 = _InceptionModule(
  508. model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
  509. )
  510. inc7 = _InceptionModule(
  511. model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
  512. )
  513. inc8 = _InceptionModule(
  514. model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
  515. )
  516. inc9 = _InceptionModule(
  517. model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
  518. )
  519. pool9 = brew.max_pool(model, inc9, "pool9", kernel=3, stride=2, pad=1)
  520. inc10 = _InceptionModule(
  521. model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
  522. )
  523. inc11 = _InceptionModule(
  524. model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
  525. )
  526. pool11 = brew.average_pool(model, inc11, "pool11", kernel=7, stride=1)
  527. fc = brew.fc(
  528. model, pool11, "fc", 1024, 1000, ('XavierFill', {}),
  529. ('ConstantFill', {})
  530. )
  531. # It seems that Soumith's benchmark does not have softmax on top
  532. # for Inception. We will add it anyway so we can have a proper
  533. # backward pass.
  534. pred = brew.softmax(model, fc, "pred")
  535. xent = model.net.LabelCrossEntropy([pred, "label"], "xent")
  536. model.net.AveragedLoss(xent, "loss")
  537. return model, 224
  538. def AddParameterUpdate(model):
  539. """ Simple plain SGD update -- not tuned to actually train the models """
  540. ITER = brew.iter(model, "iter")
  541. LR = model.net.LearningRate(
  542. ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
  543. ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
  544. for param in model.params:
  545. param_grad = model.param_to_grad[param]
  546. model.net.WeightedSum([param, ONE, param_grad, LR], param)
  547. def Benchmark(model_gen, arg):
  548. model, input_size = model_gen(arg.order, arg.cudnn_ws)
  549. model.Proto().type = arg.net_type
  550. model.Proto().num_workers = arg.num_workers
  551. # In order to be able to run everything without feeding more stuff, let's
  552. # add the data and label blobs to the parameter initialization net as well.
  553. if arg.order == "NCHW":
  554. input_shape = [arg.batch_size, 3, input_size, input_size]
  555. else:
  556. input_shape = [arg.batch_size, input_size, input_size, 3]
  557. if arg.model == "MLP":
  558. input_shape = [arg.batch_size, input_size]
  559. model.param_init_net.GaussianFill(
  560. [],
  561. "data",
  562. shape=input_shape,
  563. mean=0.0,
  564. std=1.0
  565. )
  566. model.param_init_net.UniformIntFill(
  567. [],
  568. "label",
  569. shape=[arg.batch_size, ],
  570. min=0,
  571. max=999
  572. )
  573. if arg.forward_only:
  574. print('{}: running forward only.'.format(arg.model))
  575. else:
  576. print('{}: running forward-backward.'.format(arg.model))
  577. model.AddGradientOperators(["loss"])
  578. AddParameterUpdate(model)
  579. if arg.order == 'NHWC':
  580. print(
  581. '==WARNING==\n'
  582. 'NHWC order with CuDNN may not be supported yet, so I might\n'
  583. 'exit suddenly.'
  584. )
  585. if not arg.cpu:
  586. model.param_init_net.RunAllOnGPU()
  587. model.net.RunAllOnGPU()
  588. if arg.engine:
  589. for op in model.net.Proto().op:
  590. op.engine = arg.engine
  591. if arg.dump_model:
  592. # Writes out the pbtxt for benchmarks on e.g. Android
  593. with open(
  594. "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w"
  595. ) as fid:
  596. fid.write(str(model.param_init_net.Proto()))
  597. with open("{0}.pbtxt".format(arg.model, arg.batch_size), "w") as fid:
  598. fid.write(str(model.net.Proto()))
  599. workspace.RunNetOnce(model.param_init_net)
  600. workspace.CreateNet(model.net)
  601. workspace.BenchmarkNet(
  602. model.net.Proto().name, arg.warmup_iterations, arg.iterations,
  603. arg.layer_wise_benchmark)
  604. def GetArgumentParser():
  605. parser = argparse.ArgumentParser(description="Caffe2 benchmark.")
  606. parser.add_argument(
  607. "--batch_size",
  608. type=int,
  609. default=128,
  610. help="The batch size."
  611. )
  612. parser.add_argument("--model", type=str, help="The model to benchmark.")
  613. parser.add_argument(
  614. "--order",
  615. type=str,
  616. default="NCHW",
  617. help="The order to evaluate."
  618. )
  619. parser.add_argument(
  620. "--cudnn_ws",
  621. type=int,
  622. help="The cudnn workspace size."
  623. )
  624. parser.add_argument(
  625. "--iterations",
  626. type=int,
  627. default=10,
  628. help="Number of iterations to run the network."
  629. )
  630. parser.add_argument(
  631. "--warmup_iterations",
  632. type=int,
  633. default=10,
  634. help="Number of warm-up iterations before benchmarking."
  635. )
  636. parser.add_argument(
  637. "--forward_only",
  638. action='store_true',
  639. help="If set, only run the forward pass."
  640. )
  641. parser.add_argument(
  642. "--layer_wise_benchmark",
  643. action='store_true',
  644. help="If True, run the layer-wise benchmark as well."
  645. )
  646. parser.add_argument(
  647. "--cpu",
  648. action='store_true',
  649. help="If True, run testing on CPU instead of GPU."
  650. )
  651. parser.add_argument(
  652. "--engine",
  653. type=str,
  654. default="",
  655. help="If set, blindly prefer the given engine(s) for every op.")
  656. parser.add_argument(
  657. "--dump_model",
  658. action='store_true',
  659. help="If True, dump the model prototxts to disk."
  660. )
  661. parser.add_argument("--net_type", type=str, default="dag")
  662. parser.add_argument("--num_workers", type=int, default=2)
  663. parser.add_argument("--use-nvtx", default=False, action='store_true')
  664. parser.add_argument("--htrace_span_log_path", type=str)
  665. return parser
  666. if __name__ == '__main__':
  667. args, extra_args = GetArgumentParser().parse_known_args()
  668. if (
  669. not args.batch_size or not args.model or not args.order
  670. ):
  671. GetArgumentParser().print_help()
  672. else:
  673. workspace.GlobalInit(
  674. ['caffe2', '--caffe2_log_level=0'] + extra_args +
  675. (['--caffe2_use_nvtx'] if args.use_nvtx else []) +
  676. (['--caffe2_htrace_span_log_path=' + args.htrace_span_log_path]
  677. if args.htrace_span_log_path else []))
  678. model_map = {
  679. 'AlexNet': AlexNet,
  680. 'OverFeat': OverFeat,
  681. 'VGGA': VGGA,
  682. 'Inception': Inception,
  683. 'MLP': MLP,
  684. }
  685. Benchmark(model_map[args.model], args)