memonger_test.py 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842
  1. import numpy as np
  2. from caffe2.python import workspace, memonger, core, model_helper, brew
  3. from caffe2.proto import caffe2_pb2
  4. import caffe2.python.hypothesis_test_util as hu
  5. from future.utils import viewvalues
  6. import hypothesis.strategies as st
  7. from hypothesis import given, settings
  8. import unittest
  9. def has_blob(proto, needle):
  10. for op in proto.op:
  11. for inp in op.input:
  12. if inp == needle:
  13. return True
  14. for outp in op.output:
  15. if outp == needle:
  16. return True
  17. return False
  18. def count_blobs(proto):
  19. blobs = set()
  20. for op in proto.op:
  21. blobs = blobs.union(set(op.input)).union(set(op.output))
  22. return len(blobs)
  23. class MemongerTest(hu.HypothesisTestCase):
  24. @given(input_dim=st.integers(min_value=1, max_value=10),
  25. output_dim=st.integers(min_value=1, max_value=10),
  26. batch_size=st.integers(min_value=1, max_value=10),
  27. do=st.sampled_from(hu.device_options),
  28. algo=st.sampled_from(memonger.AssignmentAlgorithm))
  29. @settings(max_examples=5, deadline=None)
  30. def test_simple_memonger(self, input_dim, output_dim, batch_size, do, algo):
  31. m = model_helper.ModelHelper()
  32. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  33. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  34. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  35. fc3.Relu([], fc3)\
  36. .Softmax([], "pred") \
  37. .LabelCrossEntropy(["label"], ["xent"]) \
  38. .AveragedLoss([], "loss")
  39. input_to_grad = m.AddGradientOperators(["loss"])
  40. m.net.Proto().device_option.CopyFrom(do)
  41. m.param_init_net.Proto().device_option.CopyFrom(do)
  42. static_blobs = \
  43. [o for op in m.param_init_net.Proto().op for o in op.output] + \
  44. ["data", "label", "loss", input_to_grad["fc1_w"]]
  45. optimization = memonger.optimize_interference(
  46. m.Proto(), static_blobs, algo=algo)
  47. data = np.random.randn(batch_size, input_dim).astype(np.float32)
  48. label = np.random.randint(
  49. low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
  50. workspace.RunNetOnce(m.param_init_net)
  51. workspace.FeedBlob("data", data, device_option=do)
  52. workspace.FeedBlob("label", label, device_option=do)
  53. workspace.RunNetOnce(m.net)
  54. loss = workspace.FetchBlob("loss")
  55. grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
  56. workspace.RunNetOnce(optimization.net)
  57. optimized_loss = workspace.FetchBlob("loss")
  58. optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
  59. np.testing.assert_almost_equal(loss, optimized_loss)
  60. np.testing.assert_almost_equal(grad, optimized_grad)
  61. stats = memonger.compute_statistics(optimization.assignments)
  62. self.assertLess(stats.optimized_nbytes, stats.baseline_nbytes)
  63. # run with blob sizes
  64. blob_sizes = memonger.collect_blob_sizes(m.Proto())
  65. optimization1 = memonger.optimize_interference(
  66. m.Proto(), static_blobs, blob_sizes=blob_sizes, algo=algo)
  67. workspace.RunNetOnce(optimization1.net)
  68. optimized_loss = workspace.FetchBlob("loss")
  69. optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
  70. np.testing.assert_almost_equal(loss, optimized_loss)
  71. np.testing.assert_almost_equal(grad, optimized_grad)
  72. stats = memonger.compute_statistics(optimization1.assignments)
  73. self.assertLessEqual(stats.optimized_nbytes, stats.baseline_nbytes)
  74. @given(input_dim=st.integers(min_value=1, max_value=10),
  75. output_dim=st.integers(min_value=1, max_value=10),
  76. batch_size=st.integers(min_value=1, max_value=10),
  77. do=st.sampled_from(hu.device_options))
  78. @settings(max_examples=5, deadline=None)
  79. def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
  80. m = model_helper.ModelHelper()
  81. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  82. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  83. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  84. fc3.Relu([], fc3)\
  85. .Softmax([], "pred") \
  86. .LabelCrossEntropy(["label"], ["xent"]) \
  87. .AveragedLoss([], "loss")
  88. input_to_grad = m.AddGradientOperators(["loss"])
  89. m.net.Proto().device_option.CopyFrom(do)
  90. m.param_init_net.Proto().device_option.CopyFrom(do)
  91. static_blobs = \
  92. [o for op in m.param_init_net.Proto().op for o in op.output] + \
  93. ["data", "label", "loss", input_to_grad["fc1_w"]]
  94. optimized_net = memonger.optimize_inference_fast(
  95. m.Proto(), static_blobs)
  96. data = np.random.randn(batch_size, input_dim).astype(np.float32)
  97. label = np.random.randint(
  98. low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
  99. workspace.RunNetOnce(m.param_init_net)
  100. workspace.FeedBlob("data", data, device_option=do)
  101. workspace.FeedBlob("label", label, device_option=do)
  102. workspace.RunNetOnce(m.net)
  103. loss = workspace.FetchBlob("loss")
  104. grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
  105. workspace.RunNetOnce(optimized_net)
  106. optimized_loss = workspace.FetchBlob("loss")
  107. optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
  108. np.testing.assert_almost_equal(loss, optimized_loss)
  109. np.testing.assert_almost_equal(grad, optimized_grad)
  110. self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
  111. def test_fast_memonger_unique_outputs(self):
  112. m = model_helper.ModelHelper()
  113. fc = []
  114. for i in range(2):
  115. z = brew.fc(
  116. m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2)
  117. fc.append(z)
  118. r = []
  119. # Trick is here to have same input appear twice in a same Sum
  120. for x in fc:
  121. for y in fc:
  122. r.append(brew.sum(m, [x, y], 1))
  123. concated = brew.concat(m, r, "concated")
  124. brew.relu(m, concated, "merged")
  125. static_blobs = \
  126. [o for op in m.param_init_net.Proto().op for o in op.output] + \
  127. ["merged"] + ["data{}".format(i) for i in range(len(fc))]
  128. optimized_net = memonger.optimize_inference_fast(
  129. m.Proto(), static_blobs)
  130. for op in optimized_net.op:
  131. self.assertEqual(len(op.output), len(set(op.output)), str(op))
  132. @given(input_dim=st.integers(min_value=1, max_value=4),
  133. output_dim=st.integers(min_value=1, max_value=4),
  134. batch_size=st.integers(min_value=1, max_value=4))
  135. def test_gradient_optim(self, input_dim, output_dim, batch_size):
  136. m = model_helper.ModelHelper()
  137. with core.NameScope("name_x"):
  138. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  139. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  140. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  141. fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
  142. fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
  143. fc5.Relu([], fc5)\
  144. .Softmax([], "pred") \
  145. .LabelCrossEntropy(["label"], ["xent"]) \
  146. .AveragedLoss([], "loss")
  147. input_to_grad = m.AddGradientOperators(["name_x/loss"])
  148. blobs_before = count_blobs(m.net.Proto())
  149. optim_proto = memonger.share_grad_blobs(
  150. m.net,
  151. ["name_x/loss"],
  152. set(viewvalues(m.param_to_grad)),
  153. "name_x/",
  154. share_activations=False,
  155. )
  156. blobs_after = count_blobs(optim_proto)
  157. self.assertLess(blobs_after, blobs_before)
  158. optim_proto_wacts = memonger.share_grad_blobs(
  159. m.net,
  160. ["name_x/loss"],
  161. set(viewvalues(m.param_to_grad)),
  162. "name_x/",
  163. share_activations=True,
  164. dont_share_blobs=set([str(input_to_grad["name_x/fc1_w"])]),
  165. )
  166. blobs_wact_optim = count_blobs(optim_proto_wacts)
  167. self.assertLessEqual(blobs_wact_optim, blobs_after)
  168. # Check that the last activations are not shared
  169. self.assertTrue(has_blob(optim_proto, "name_x/fc5"))
  170. self.assertTrue(
  171. has_blob(optim_proto_wacts, "name_x/fc5"),
  172. "Dont remap final activation",
  173. )
  174. # Test networks produce exactly same gradients
  175. data = np.random.randn(batch_size, input_dim).astype(np.float32)
  176. label = np.random.randint(
  177. low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
  178. workspace.RunNetOnce(m.param_init_net)
  179. workspace.FeedBlob("name_x/data", data)
  180. workspace.FeedBlob("name_x/label", label)
  181. workspace.RunNetOnce(m.net)
  182. loss = workspace.FetchBlob("name_x/loss")
  183. grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
  184. workspace.RunNetOnce(optim_proto)
  185. optimized_loss = workspace.FetchBlob("name_x/loss")
  186. optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
  187. np.testing.assert_almost_equal(loss, optimized_loss)
  188. np.testing.assert_almost_equal(grad, optimized_grad)
  189. workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
  190. # Run with the forward optimization
  191. workspace.RunNetOnce(optim_proto_wacts)
  192. optimized_loss = workspace.FetchBlob("name_x/loss")
  193. optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
  194. np.testing.assert_almost_equal(loss, optimized_loss)
  195. np.testing.assert_almost_equal(grad, optimized_grad)
  196. @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
  197. def test_memonger_mix_cpu_gpu(self):
  198. '''
  199. Check that memonger does not make blobs cross CPU/GPU boundary
  200. '''
  201. m = model_helper.ModelHelper()
  202. with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
  203. fc1 = brew.fc(m, "data", "fc1", dim_in=2, dim_out=2)
  204. fc2 = brew.fc(m, fc1, "fc2", dim_in=2, dim_out=2)
  205. fc3 = brew.fc(m, fc2, "fc3", dim_in=2, dim_out=2)
  206. fc4 = brew.fc(m, fc3, "fc4", dim_in=2, dim_out=2)
  207. fc4_cpu = m.net.CopyGPUToCPU(fc4, "fc4_cpu")
  208. with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
  209. fc5_cpu = brew.fc(m, fc4_cpu, "fc5_cpu", dim_in=2, dim_out=2)
  210. fc6_cpu = brew.fc(m, fc5_cpu, "fc6_cpu", dim_in=2, dim_out=2)
  211. fc7_cpu = brew.fc(m, fc6_cpu, "fc7_cpu", dim_in=2, dim_out=2)
  212. fc7_cpu.Relu([], fc7_cpu) \
  213. .Softmax([], "pred") \
  214. .LabelCrossEntropy(["label"], ["xent"]) \
  215. .AveragedLoss([], "loss")
  216. m.AddGradientOperators(["loss"])
  217. blobs_before = count_blobs(m.net.Proto())
  218. optim_proto = memonger.share_grad_blobs(
  219. m.net,
  220. ["loss"],
  221. set(viewvalues(m.param_to_grad)),
  222. "",
  223. share_activations=True,
  224. dont_share_blobs=set(),
  225. )
  226. blobs_after = count_blobs(optim_proto)
  227. self.assertLess(blobs_after, blobs_before)
  228. # Create set of blobs on CPU side and GPU side and check they don't
  229. # overlap
  230. device_blobs = {caffe2_pb2.CPU: set(), workspace.GpuDeviceType: set()}
  231. for op in optim_proto.op:
  232. if op.type not in ['CopyCPUToGPU', "CopyGPUToCPU"]:
  233. dev = op.device_option.device_type
  234. for b in list(op.input) + list(op.output):
  235. device_blobs[dev].add(b)
  236. device_crossers = device_blobs[caffe2_pb2.CPU].intersection(
  237. device_blobs[workspace.GpuDeviceType]
  238. )
  239. self.assertEquals(device_crossers, set())
  240. @given(input_dim=st.integers(min_value=4, max_value=4),
  241. output_dim=st.integers(min_value=4, max_value=4),
  242. batch_size=st.integers(min_value=4, max_value=4))
  243. @settings(deadline=1000)
  244. def test_gradient_optim_tree(self, input_dim, output_dim, batch_size):
  245. m = model_helper.ModelHelper()
  246. with core.NameScope("name_x"):
  247. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  248. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  249. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  250. fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
  251. fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
  252. fc5.Relu([], fc5) \
  253. .Softmax([], "pred1") \
  254. .LabelCrossEntropy(["label"], ["xent1"]) \
  255. .AveragedLoss([], "loss1")
  256. fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
  257. fc6.Relu([], fc6) \
  258. .Softmax([], "pred2") \
  259. .LabelCrossEntropy(["label"], ["xent2"]) \
  260. .AveragedLoss([], "loss2")
  261. input_to_grad = m.AddGradientOperators(["name_x/loss1", "name_x/loss2"])
  262. blobs_before = count_blobs(m.net.Proto())
  263. optim_proto = memonger.share_grad_blobs(
  264. m.net,
  265. ["name_x/loss1", "name_x/loss2"],
  266. set(viewvalues(m.param_to_grad)),
  267. "name_x", # "name_x//shared_gradinp_0_shared" if using "name_x/"
  268. share_activations=True,
  269. dont_share_blobs=set(['name_x/fc6', 'name_x/fc5',
  270. str(input_to_grad["name_x/fc1_w"])]),
  271. )
  272. blobs_after = count_blobs(optim_proto)
  273. self.assertLess(blobs_after, blobs_before)
  274. self.assertTrue(has_blob(optim_proto, "name_x/fc6"))
  275. # Test networks produce exactly same gradients
  276. data = np.random.randn(batch_size, input_dim).astype(np.float32)
  277. label = np.random.randint(
  278. low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
  279. workspace.RunNetOnce(m.param_init_net)
  280. workspace.FeedBlob("name_x/data", data)
  281. workspace.FeedBlob("name_x/label", label)
  282. workspace.RunNetOnce(m.net)
  283. loss1 = workspace.FetchBlob("name_x/loss1")
  284. loss2 = workspace.FetchBlob("name_x/loss2")
  285. grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
  286. workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
  287. workspace.RunNetOnce(optim_proto)
  288. optimized_loss1 = workspace.FetchBlob("name_x/loss1")
  289. optimized_loss2 = workspace.FetchBlob("name_x/loss2")
  290. optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
  291. np.testing.assert_almost_equal(loss1, optimized_loss1)
  292. np.testing.assert_almost_equal(loss2, optimized_loss2)
  293. np.testing.assert_almost_equal(grad, optimized_grad)
  294. @given(input_dim=st.integers(min_value=4, max_value=4),
  295. output_dim=st.integers(min_value=4, max_value=4),
  296. batch_size=st.integers(min_value=4, max_value=4))
  297. @settings(deadline=1000)
  298. def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
  299. m = model_helper.ModelHelper()
  300. m.Proto().type = "dag"
  301. m.Proto().num_workers = 4
  302. with core.NameScope("name_x"):
  303. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  304. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  305. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  306. fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
  307. fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
  308. # Branch
  309. fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
  310. fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
  311. fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
  312. fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
  313. fc5.Relu([], fc5sum) \
  314. .Softmax([], "pred1") \
  315. .LabelCrossEntropy(["label"], ["xent1"]) \
  316. .AveragedLoss([], "loss1")
  317. fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
  318. fc6.Relu([], fc6) \
  319. .Softmax([], "pred2") \
  320. .LabelCrossEntropy(["label"], ["xent2"]) \
  321. .AveragedLoss([], "loss2")
  322. blobs_before = count_blobs(m.net.Proto())
  323. optim_proto = memonger.optimize_inference_for_dag(
  324. m.net, ["name_x/data"], "name_x"
  325. )
  326. blobs_after = count_blobs(optim_proto)
  327. self.assertLess(blobs_after, blobs_before)
  328. # Test networks produce exactly same results
  329. data = np.random.randn(batch_size, input_dim).astype(np.float32)
  330. label = np.random.randint(
  331. low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
  332. workspace.RunNetOnce(m.param_init_net)
  333. workspace.FeedBlob("name_x/data", data)
  334. workspace.FeedBlob("name_x/label", label)
  335. workspace.RunNetOnce(m.net)
  336. loss1 = workspace.FetchBlob("name_x/loss1")
  337. loss2 = workspace.FetchBlob("name_x/loss2")
  338. workspace.RunNetOnce(optim_proto)
  339. optimized_loss1 = workspace.FetchBlob("name_x/loss1")
  340. optimized_loss2 = workspace.FetchBlob("name_x/loss2")
  341. np.testing.assert_almost_equal(loss1, optimized_loss1)
  342. np.testing.assert_almost_equal(loss2, optimized_loss2)
  343. @given(input_dim=st.integers(min_value=4, max_value=4),
  344. output_dim=st.integers(min_value=4, max_value=4),
  345. batch_size=st.integers(min_value=4, max_value=4))
  346. @settings(deadline=10000)
  347. def test_forward_optim_tree_harder(self, input_dim, output_dim, batch_size):
  348. m = model_helper.ModelHelper()
  349. m.net.Proto().type = "dag"
  350. m.net.Proto().num_workers = 4
  351. m.net.AddExternalInput("label")
  352. m.net.AddExternalInput("data")
  353. with core.NameScope("name_x"):
  354. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  355. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  356. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  357. fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
  358. fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
  359. # Branch
  360. fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
  361. fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
  362. fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
  363. fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
  364. fc5sum.Relu([], "relu1") \
  365. .Softmax([], "pred1") \
  366. .LabelCrossEntropy(["label"], ["xent1"]) \
  367. .AveragedLoss([], "loss1")
  368. fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
  369. fc6.Relu([], fc6) \
  370. .Softmax([], "pred2") \
  371. .LabelCrossEntropy(["label"], ["xent2"]) \
  372. .AveragedLoss([], "loss2")
  373. blobs_before = count_blobs(m.net.Proto())
  374. optim_proto = memonger.optimize_inference_for_dag(
  375. m.net, ["name_x/data"], "name_x/"
  376. )
  377. blobs_after = count_blobs(optim_proto)
  378. # Extra test with when one of the parameters is also an input.
  379. # This caused a bug before.
  380. optim_proto_extra_input = memonger.optimize_inference_for_dag(
  381. m.net, ["name_x/data", "name_x/fc1_w"], "name_x/"
  382. )
  383. blobs_after_extra_input = count_blobs(optim_proto_extra_input)
  384. self.assertEqual(blobs_after, blobs_after_extra_input)
  385. ###
  386. print(str(optim_proto))
  387. self.assertLess(blobs_after, blobs_before)
  388. # Test networks produce exactly same results
  389. data = np.random.randn(batch_size, input_dim).astype(np.float32)
  390. label = np.random.randint(
  391. low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
  392. workspace.RunNetOnce(m.param_init_net)
  393. workspace.FeedBlob("name_x/data", data)
  394. workspace.FeedBlob("name_x/label", label)
  395. workspace.RunNetOnce(m.net)
  396. loss1 = workspace.FetchBlob("name_x/loss1")
  397. loss2 = workspace.FetchBlob("name_x/loss2")
  398. workspace.RunNetOnce(optim_proto)
  399. optimized_loss1 = workspace.FetchBlob("name_x/loss1")
  400. optimized_loss2 = workspace.FetchBlob("name_x/loss2")
  401. np.testing.assert_almost_equal(loss1, optimized_loss1)
  402. np.testing.assert_almost_equal(loss2, optimized_loss2)
  403. # This test reproduces scenario where dag traversal for finding
  404. # shared blobs was not always starting from ops with in degree of 0
  405. @settings(deadline=10000)
  406. def test_forward_optim_tree_dag_traversal(self):
  407. input_dim = 4
  408. output_dim = 4
  409. batch_size = 4
  410. m = model_helper.ModelHelper()
  411. m.Proto().type = "dag"
  412. m.Proto().num_workers = 4
  413. with core.NameScope("name_x"):
  414. fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
  415. fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
  416. fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
  417. fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
  418. fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
  419. # Branch
  420. fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
  421. fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
  422. fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
  423. fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
  424. fc5.Relu([], fc5sum) \
  425. .Softmax([], "pred1") \
  426. .LabelCrossEntropy(["label"], ["xent1"]) \
  427. .AveragedLoss([], "loss1")
  428. fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
  429. fc6.Relu([], fc6) \
  430. .Softmax([], "pred2") \
  431. .LabelCrossEntropy(["label"], ["xent2"]) \
  432. .AveragedLoss([], "loss2")
  433. blobs_before = count_blobs(m.net.Proto())
  434. # adding name_x/fc5_w as heads (which belongs to non-root op)
  435. # to make sure that dag traversal always starts from root ops
  436. optim_proto = memonger.optimize_inference_for_dag(
  437. m.net, ["name_x/fc5_w", "name_x/data"], "name_x"
  438. )
  439. blobs_after = count_blobs(optim_proto)
  440. self.assertLess(blobs_after, blobs_before)
  441. # This is specifically to verify the op schema check being done in memonger
  442. def test_forward_optim_tree_enforce_inplace_op_invalid(self):
  443. m = model_helper.ModelHelper()
  444. m.Proto().type = "dag"
  445. m.Proto().num_workers = 4
  446. net = m.net
  447. net.IndexFreeze("A", "B") # enforce inplace op
  448. net.Sum(["B", "B"], "C")
  449. net.Relu("C", "D")
  450. net.Sum(["D", "D"], "E")
  451. with self.assertRaises(RuntimeError):
  452. memonger.optimize_inference_for_dag(net, ["A"], "")
  453. # Here inplace op is specifically a root op to repro the scenario where dag
  454. # memonger could treat all the output blobs as shareable blobs and fails
  455. # assertion of input blob with the same name not allowed to share
  456. def test_forward_optim_tree_enforce_inplace_op_valid_and_as_head(self):
  457. m = model_helper.ModelHelper()
  458. m.Proto().type = "dag"
  459. m.Proto().num_workers = 4
  460. net = m.net
  461. net.IndexFreeze("A", "A") # enforce inplace op
  462. net.Sum(["A", "A"], "B")
  463. net.Relu("B", "C")
  464. net.Relu("C", "D")
  465. net.Sum(["D", "D"], "E")
  466. blobs_before = count_blobs(m.net.Proto())
  467. optim_proto = memonger.optimize_inference_for_dag(
  468. net, ["A"], ""
  469. )
  470. blobs_after = count_blobs(optim_proto)
  471. self.assertLess(blobs_after, blobs_before)
  472. def test_rnn(self):
  473. from caffe2.python import rnn_cell
  474. T = 5
  475. model = model_helper.ModelHelper()
  476. seq_lengths, labels = \
  477. model.net.AddExternalInputs(
  478. 'seq_lengths', 'labels',
  479. )
  480. init_blobs = []
  481. for i in range(2):
  482. hidden_init, cell_init = model.net.AddExternalInputs(
  483. "hidden_init_{}".format(i),
  484. "cell_init_{}".format(i)
  485. )
  486. init_blobs.extend([hidden_init, cell_init])
  487. model.param_init_net.ConstantFill([], ["input"], shape=[T, 4, 10])
  488. output, last_hidden, _, last_state = rnn_cell.LSTM(
  489. model=model,
  490. input_blob="input",
  491. seq_lengths=seq_lengths,
  492. initial_states=init_blobs,
  493. dim_in=10,
  494. dim_out=[10, 10],
  495. scope="lstm1",
  496. forward_only=False,
  497. drop_states=True,
  498. return_last_layer_only=True,
  499. )
  500. softmax, loss = model.net.SoftmaxWithLoss(
  501. [model.Flatten(output), "labels"],
  502. ['softmax', 'loss'],
  503. )
  504. model.AddGradientOperators([loss])
  505. blobs_before = count_blobs(model.net.Proto())
  506. optim_proto = memonger.share_grad_blobs(
  507. model.net,
  508. ["loss"],
  509. set(viewvalues(model.param_to_grad)),
  510. "",
  511. share_activations=True,
  512. dont_share_blobs=set(),
  513. )
  514. blobs_after = count_blobs(optim_proto)
  515. self.assertLess(blobs_after, blobs_before)
  516. # Run once to see all blobs are set up correctly
  517. for init_blob in init_blobs:
  518. workspace.FeedBlob(init_blob, np.zeros(
  519. [1, 4, 10], dtype=np.float32
  520. ))
  521. workspace.FeedBlob("seq_lengths", np.array([T] * 4, dtype=np.int32))
  522. workspace.FeedBlob("labels", np.random.rand(T).astype(np.int32))
  523. workspace.RunNetOnce(model.param_init_net)
  524. workspace.RunNetOnce(model.net)
  525. def test_compute_interference_graph_inplace_ops(self):
  526. m = model_helper.ModelHelper()
  527. m.Copy("b1", "b1")
  528. m.Copy("b1", "b1")
  529. m.Copy("b1", "b1")
  530. g = memonger.compute_interference_graph(m.net.Proto().op)
  531. self.assertEqual(list(g.edges()), [(0, 1), (0, 2), (1, 2)])
  532. def test_topological_sort_longest_path(self):
  533. m = model_helper.ModelHelper()
  534. # 0
  535. m.Copy("conv0_w_comp", "conv0_w")
  536. # 1
  537. conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
  538. # 2
  539. m.Copy("conv2_w", "conv2_w")
  540. # 3
  541. brew.conv(m, conv0, "conv2", 16, 32, 4)
  542. g = memonger.compute_interference_graph(m.net.Proto().op)
  543. orders_org = memonger.topological_sort_traversal(g)
  544. orders_gt_org = [2, 0, 1, 3]
  545. self.assertEqual(orders_gt_org, list(orders_org))
  546. orders = memonger.topological_sort_traversal_longest_path(g)
  547. # longer path is in front of the shorter one
  548. orders_gt = [0, 1, 2, 3]
  549. self.assertEqual(orders_gt, list(orders))
  550. def test_topological_sort_longest_path_multi_target(self):
  551. # two outputs: conv2 and data4
  552. m = model_helper.ModelHelper()
  553. # 0
  554. m.Copy("conv0_w_comp", "conv0_w")
  555. # 1
  556. conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
  557. # 2
  558. m.Copy("conv2_w", "conv2_w")
  559. # 3
  560. brew.conv(m, conv0, "conv2", 16, 32, 4)
  561. # 4
  562. m.Copy("data1", "data2")
  563. # 5
  564. m.Copy("data2", "data3")
  565. g = memonger.compute_interference_graph(m.net.Proto().op)
  566. orders_org = memonger.topological_sort_traversal(g)
  567. orders_gt_org = [4, 5, 2, 0, 1, 3]
  568. self.assertEqual(orders_gt_org, list(orders_org))
  569. orders = memonger.topological_sort_traversal_longest_path(g)
  570. # longer path is in front of the shorter one
  571. orders_gt = [0, 1, 2, 3, 4, 5]
  572. self.assertEqual(orders_gt, list(orders))
  573. def test_topological_sort_longest_path_single_node(self):
  574. # single node
  575. m = model_helper.ModelHelper()
  576. # 0
  577. m.Copy("conv0_w_comp", "conv0_w")
  578. g = memonger.compute_interference_graph(m.net.Proto().op)
  579. orders_org = memonger.topological_sort_traversal(g)
  580. orders_gt_org = [0]
  581. self.assertEqual(orders_gt_org, list(orders_org))
  582. orders = memonger.topological_sort_traversal_longest_path(g)
  583. # longer path is in front of the shorter one
  584. orders_gt = [0]
  585. self.assertEqual(orders_gt, list(orders))
  586. def test_compute_assignments_greedy(self):
  587. LiveRange = memonger.LiveRange
  588. ranges_sorted = [
  589. ('b1', LiveRange(1, 3, 10)),
  590. ('b2', LiveRange(3, 4, 1)),
  591. ('b3', LiveRange(5, 6, 1)),
  592. ('b4', LiveRange(5, 7, 10)),
  593. ]
  594. assignment_gt = [
  595. [ranges_sorted[0], ranges_sorted[3]],
  596. [ranges_sorted[1], ranges_sorted[2]],
  597. ]
  598. best = memonger.compute_assignments_greedy(ranges_sorted, None)
  599. self.assertEqual(memonger.get_memory_usage(best), 11)
  600. self.assertEqual(best, assignment_gt)
  601. def test_compute_assignments_dp(self):
  602. LiveRange = memonger.LiveRange
  603. ranges_sorted = [
  604. ('b1', LiveRange(1, 3, 10)),
  605. ('b2', LiveRange(3, 4, 1)),
  606. ('b3', LiveRange(5, 6, 1)),
  607. ('b4', LiveRange(5, 7, 10)),
  608. ]
  609. best = memonger.compute_assignments_dp(ranges_sorted, None)
  610. self.assertEqual(memonger.get_memory_usage(best), 11)
  611. def test_compute_assignments_dp1(self):
  612. LiveRange = memonger.LiveRange
  613. ranges_sorted = [
  614. ('b1', LiveRange(1, 2, 10)),
  615. ('b2', LiveRange(4, 6, 1)),
  616. ('b3', LiveRange(5, 6, 10)),
  617. ]
  618. best = memonger.compute_assignments_dp(ranges_sorted, [])
  619. self.assertEqual(memonger.get_memory_usage(best), 11)
  620. @given(input_dim=st.integers(min_value=4, max_value=4),
  621. output_dim=st.integers(min_value=4, max_value=4),
  622. batch_size=st.integers(min_value=4, max_value=4))
  623. def test_verify_graph_equality(self, input_dim, output_dim, batch_size):
  624. m = model_helper.ModelHelper()
  625. m.Proto().type = "dag"
  626. m.Proto().num_workers = 4
  627. with core.NameScope("name_x"):
  628. fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
  629. fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  630. fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
  631. brew.sum(m, [fc2, fc3], "out")
  632. m2 = model_helper.ModelHelper()
  633. m2.Proto().type = "dag"
  634. m2.Proto().num_workers = 4
  635. with core.NameScope("name_x"):
  636. fc1 = brew.fc(m2, "data", "other_x", dim_in=input_dim, dim_out=output_dim)
  637. fc2 = brew.fc(m2, fc1, "other_y", dim_in=output_dim, dim_out=output_dim)
  638. fc3 = brew.fc(m2, fc1, "other_z", dim_in=output_dim, dim_out=output_dim)
  639. brew.sum(m2, [fc2, fc3], "out")
  640. self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
  641. @given(input_dim=st.integers(min_value=4, max_value=4),
  642. output_dim=st.integers(min_value=4, max_value=4),
  643. batch_size=st.integers(min_value=4, max_value=4))
  644. def test_verify_graph_equality_harder(self, input_dim, output_dim, batch_size):
  645. m = model_helper.ModelHelper()
  646. m.Proto().type = "dag"
  647. m.Proto().num_workers = 4
  648. with core.NameScope("name_x"):
  649. fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
  650. fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  651. fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
  652. fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
  653. fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
  654. brew.sum(m, [fc3a, fc3b], "out")
  655. m2 = model_helper.ModelHelper()
  656. m2.Proto().type = "dag"
  657. m2.Proto().num_workers = 4
  658. with core.NameScope("name_x"):
  659. fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
  660. fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  661. fc2b = brew.fc(m2, fc1, "z", dim_in=output_dim, dim_out=output_dim)
  662. fc3a = brew.fc(m2, fc2a, "y", dim_in=output_dim, dim_out=output_dim)
  663. fc3b = brew.fc(m2, fc2b, "z", dim_in=output_dim, dim_out=output_dim)
  664. brew.sum(m2, [fc3a, fc3b], "out")
  665. self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
  666. @given(input_dim=st.integers(min_value=4, max_value=4),
  667. output_dim=st.integers(min_value=4, max_value=4),
  668. batch_size=st.integers(min_value=4, max_value=4))
  669. def test_verify_graph_inequality(self, input_dim, output_dim, batch_size):
  670. m = model_helper.ModelHelper()
  671. m.Proto().type = "dag"
  672. m.Proto().num_workers = 4
  673. with core.NameScope("name_x"):
  674. fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
  675. fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  676. fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
  677. brew.sum(m, [fc2, fc3], "out")
  678. m2 = model_helper.ModelHelper()
  679. m2.Proto().type = "dag"
  680. m2.Proto().num_workers = 4
  681. with core.NameScope("name_x"):
  682. fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
  683. fc2 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  684. fc3 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  685. brew.sum(m2, [fc2, fc3], "out")
  686. self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
  687. @given(input_dim=st.integers(min_value=4, max_value=4),
  688. output_dim=st.integers(min_value=4, max_value=4),
  689. batch_size=st.integers(min_value=4, max_value=4))
  690. def test_verify_graph_inequality_harder(self, input_dim, output_dim, batch_size):
  691. m = model_helper.ModelHelper()
  692. m.Proto().type = "dag"
  693. m.Proto().num_workers = 4
  694. with core.NameScope("name_x"):
  695. fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
  696. fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  697. fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
  698. fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
  699. fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
  700. brew.sum(m, [fc3a, fc3b], "out")
  701. m2 = model_helper.ModelHelper()
  702. m2.Proto().type = "dag"
  703. m2.Proto().num_workers = 4
  704. with core.NameScope("name_x"):
  705. fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
  706. fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  707. fc2b = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
  708. fc3a = brew.fc(m2, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
  709. fc3b = brew.fc(m2, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
  710. brew.sum(m2, [fc3a, fc3b], "out")
  711. self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
  712. def test_release_blobs_when_used(self):
  713. m = model_helper.ModelHelper()
  714. fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
  715. fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
  716. fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
  717. fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
  718. m.net.Alias(["u"], ["u_alias"])
  719. brew.sum(m, [fc3, fc4], "out")
  720. with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data"))
  721. expect_frees = {"x", "y", "z"} # out is external output
  722. # and u is aliased so cannot be freed
  723. found_frees = set()
  724. for op in with_frees.op:
  725. if op.type == "Free":
  726. self.assertFalse(op.input[0] in found_frees) # no double frees
  727. found_frees.add(op.input[0])
  728. else:
  729. # Check a freed blob is not used anymore
  730. for inp in op.input:
  731. self.assertFalse(inp in found_frees)
  732. for outp in op.output:
  733. self.assertFalse(outp in found_frees)
  734. self.assertEqual(expect_frees, found_frees)
  735. if __name__ == '__main__':
  736. unittest.main()