| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842 |
- import numpy as np
- from caffe2.python import workspace, memonger, core, model_helper, brew
- from caffe2.proto import caffe2_pb2
- import caffe2.python.hypothesis_test_util as hu
- from future.utils import viewvalues
- import hypothesis.strategies as st
- from hypothesis import given, settings
- import unittest
- def has_blob(proto, needle):
- for op in proto.op:
- for inp in op.input:
- if inp == needle:
- return True
- for outp in op.output:
- if outp == needle:
- return True
- return False
- def count_blobs(proto):
- blobs = set()
- for op in proto.op:
- blobs = blobs.union(set(op.input)).union(set(op.output))
- return len(blobs)
- class MemongerTest(hu.HypothesisTestCase):
- @given(input_dim=st.integers(min_value=1, max_value=10),
- output_dim=st.integers(min_value=1, max_value=10),
- batch_size=st.integers(min_value=1, max_value=10),
- do=st.sampled_from(hu.device_options),
- algo=st.sampled_from(memonger.AssignmentAlgorithm))
- @settings(max_examples=5, deadline=None)
- def test_simple_memonger(self, input_dim, output_dim, batch_size, do, algo):
- m = model_helper.ModelHelper()
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc3.Relu([], fc3)\
- .Softmax([], "pred") \
- .LabelCrossEntropy(["label"], ["xent"]) \
- .AveragedLoss([], "loss")
- input_to_grad = m.AddGradientOperators(["loss"])
- m.net.Proto().device_option.CopyFrom(do)
- m.param_init_net.Proto().device_option.CopyFrom(do)
- static_blobs = \
- [o for op in m.param_init_net.Proto().op for o in op.output] + \
- ["data", "label", "loss", input_to_grad["fc1_w"]]
- optimization = memonger.optimize_interference(
- m.Proto(), static_blobs, algo=algo)
- data = np.random.randn(batch_size, input_dim).astype(np.float32)
- label = np.random.randint(
- low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
- workspace.RunNetOnce(m.param_init_net)
- workspace.FeedBlob("data", data, device_option=do)
- workspace.FeedBlob("label", label, device_option=do)
- workspace.RunNetOnce(m.net)
- loss = workspace.FetchBlob("loss")
- grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
- workspace.RunNetOnce(optimization.net)
- optimized_loss = workspace.FetchBlob("loss")
- optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
- np.testing.assert_almost_equal(loss, optimized_loss)
- np.testing.assert_almost_equal(grad, optimized_grad)
- stats = memonger.compute_statistics(optimization.assignments)
- self.assertLess(stats.optimized_nbytes, stats.baseline_nbytes)
- # run with blob sizes
- blob_sizes = memonger.collect_blob_sizes(m.Proto())
- optimization1 = memonger.optimize_interference(
- m.Proto(), static_blobs, blob_sizes=blob_sizes, algo=algo)
- workspace.RunNetOnce(optimization1.net)
- optimized_loss = workspace.FetchBlob("loss")
- optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
- np.testing.assert_almost_equal(loss, optimized_loss)
- np.testing.assert_almost_equal(grad, optimized_grad)
- stats = memonger.compute_statistics(optimization1.assignments)
- self.assertLessEqual(stats.optimized_nbytes, stats.baseline_nbytes)
- @given(input_dim=st.integers(min_value=1, max_value=10),
- output_dim=st.integers(min_value=1, max_value=10),
- batch_size=st.integers(min_value=1, max_value=10),
- do=st.sampled_from(hu.device_options))
- @settings(max_examples=5, deadline=None)
- def test_fast_memonger(self, input_dim, output_dim, batch_size, do):
- m = model_helper.ModelHelper()
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc3.Relu([], fc3)\
- .Softmax([], "pred") \
- .LabelCrossEntropy(["label"], ["xent"]) \
- .AveragedLoss([], "loss")
- input_to_grad = m.AddGradientOperators(["loss"])
- m.net.Proto().device_option.CopyFrom(do)
- m.param_init_net.Proto().device_option.CopyFrom(do)
- static_blobs = \
- [o for op in m.param_init_net.Proto().op for o in op.output] + \
- ["data", "label", "loss", input_to_grad["fc1_w"]]
- optimized_net = memonger.optimize_inference_fast(
- m.Proto(), static_blobs)
- data = np.random.randn(batch_size, input_dim).astype(np.float32)
- label = np.random.randint(
- low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
- workspace.RunNetOnce(m.param_init_net)
- workspace.FeedBlob("data", data, device_option=do)
- workspace.FeedBlob("label", label, device_option=do)
- workspace.RunNetOnce(m.net)
- loss = workspace.FetchBlob("loss")
- grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
- workspace.RunNetOnce(optimized_net)
- optimized_loss = workspace.FetchBlob("loss")
- optimized_grad = workspace.FetchBlob(str(input_to_grad["fc1_w"]))
- np.testing.assert_almost_equal(loss, optimized_loss)
- np.testing.assert_almost_equal(grad, optimized_grad)
- self.assertLess(count_blobs(optimized_net), count_blobs(m.Proto()))
- def test_fast_memonger_unique_outputs(self):
- m = model_helper.ModelHelper()
- fc = []
- for i in range(2):
- z = brew.fc(
- m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2)
- fc.append(z)
- r = []
- # Trick is here to have same input appear twice in a same Sum
- for x in fc:
- for y in fc:
- r.append(brew.sum(m, [x, y], 1))
- concated = brew.concat(m, r, "concated")
- brew.relu(m, concated, "merged")
- static_blobs = \
- [o for op in m.param_init_net.Proto().op for o in op.output] + \
- ["merged"] + ["data{}".format(i) for i in range(len(fc))]
- optimized_net = memonger.optimize_inference_fast(
- m.Proto(), static_blobs)
- for op in optimized_net.op:
- self.assertEqual(len(op.output), len(set(op.output)), str(op))
- @given(input_dim=st.integers(min_value=1, max_value=4),
- output_dim=st.integers(min_value=1, max_value=4),
- batch_size=st.integers(min_value=1, max_value=4))
- def test_gradient_optim(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
- fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
- fc5.Relu([], fc5)\
- .Softmax([], "pred") \
- .LabelCrossEntropy(["label"], ["xent"]) \
- .AveragedLoss([], "loss")
- input_to_grad = m.AddGradientOperators(["name_x/loss"])
- blobs_before = count_blobs(m.net.Proto())
- optim_proto = memonger.share_grad_blobs(
- m.net,
- ["name_x/loss"],
- set(viewvalues(m.param_to_grad)),
- "name_x/",
- share_activations=False,
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- optim_proto_wacts = memonger.share_grad_blobs(
- m.net,
- ["name_x/loss"],
- set(viewvalues(m.param_to_grad)),
- "name_x/",
- share_activations=True,
- dont_share_blobs=set([str(input_to_grad["name_x/fc1_w"])]),
- )
- blobs_wact_optim = count_blobs(optim_proto_wacts)
- self.assertLessEqual(blobs_wact_optim, blobs_after)
- # Check that the last activations are not shared
- self.assertTrue(has_blob(optim_proto, "name_x/fc5"))
- self.assertTrue(
- has_blob(optim_proto_wacts, "name_x/fc5"),
- "Dont remap final activation",
- )
- # Test networks produce exactly same gradients
- data = np.random.randn(batch_size, input_dim).astype(np.float32)
- label = np.random.randint(
- low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
- workspace.RunNetOnce(m.param_init_net)
- workspace.FeedBlob("name_x/data", data)
- workspace.FeedBlob("name_x/label", label)
- workspace.RunNetOnce(m.net)
- loss = workspace.FetchBlob("name_x/loss")
- grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
- workspace.RunNetOnce(optim_proto)
- optimized_loss = workspace.FetchBlob("name_x/loss")
- optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
- np.testing.assert_almost_equal(loss, optimized_loss)
- np.testing.assert_almost_equal(grad, optimized_grad)
- workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
- # Run with the forward optimization
- workspace.RunNetOnce(optim_proto_wacts)
- optimized_loss = workspace.FetchBlob("name_x/loss")
- optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
- np.testing.assert_almost_equal(loss, optimized_loss)
- np.testing.assert_almost_equal(grad, optimized_grad)
- @unittest.skipIf(not workspace.has_gpu_support, "No gpu support.")
- def test_memonger_mix_cpu_gpu(self):
- '''
- Check that memonger does not make blobs cross CPU/GPU boundary
- '''
- m = model_helper.ModelHelper()
- with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType, 0)):
- fc1 = brew.fc(m, "data", "fc1", dim_in=2, dim_out=2)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=2, dim_out=2)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=2, dim_out=2)
- fc4 = brew.fc(m, fc3, "fc4", dim_in=2, dim_out=2)
- fc4_cpu = m.net.CopyGPUToCPU(fc4, "fc4_cpu")
- with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)):
- fc5_cpu = brew.fc(m, fc4_cpu, "fc5_cpu", dim_in=2, dim_out=2)
- fc6_cpu = brew.fc(m, fc5_cpu, "fc6_cpu", dim_in=2, dim_out=2)
- fc7_cpu = brew.fc(m, fc6_cpu, "fc7_cpu", dim_in=2, dim_out=2)
- fc7_cpu.Relu([], fc7_cpu) \
- .Softmax([], "pred") \
- .LabelCrossEntropy(["label"], ["xent"]) \
- .AveragedLoss([], "loss")
- m.AddGradientOperators(["loss"])
- blobs_before = count_blobs(m.net.Proto())
- optim_proto = memonger.share_grad_blobs(
- m.net,
- ["loss"],
- set(viewvalues(m.param_to_grad)),
- "",
- share_activations=True,
- dont_share_blobs=set(),
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- # Create set of blobs on CPU side and GPU side and check they don't
- # overlap
- device_blobs = {caffe2_pb2.CPU: set(), workspace.GpuDeviceType: set()}
- for op in optim_proto.op:
- if op.type not in ['CopyCPUToGPU', "CopyGPUToCPU"]:
- dev = op.device_option.device_type
- for b in list(op.input) + list(op.output):
- device_blobs[dev].add(b)
- device_crossers = device_blobs[caffe2_pb2.CPU].intersection(
- device_blobs[workspace.GpuDeviceType]
- )
- self.assertEquals(device_crossers, set())
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- @settings(deadline=1000)
- def test_gradient_optim_tree(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
- fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
- fc5.Relu([], fc5) \
- .Softmax([], "pred1") \
- .LabelCrossEntropy(["label"], ["xent1"]) \
- .AveragedLoss([], "loss1")
- fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
- fc6.Relu([], fc6) \
- .Softmax([], "pred2") \
- .LabelCrossEntropy(["label"], ["xent2"]) \
- .AveragedLoss([], "loss2")
- input_to_grad = m.AddGradientOperators(["name_x/loss1", "name_x/loss2"])
- blobs_before = count_blobs(m.net.Proto())
- optim_proto = memonger.share_grad_blobs(
- m.net,
- ["name_x/loss1", "name_x/loss2"],
- set(viewvalues(m.param_to_grad)),
- "name_x", # "name_x//shared_gradinp_0_shared" if using "name_x/"
- share_activations=True,
- dont_share_blobs=set(['name_x/fc6', 'name_x/fc5',
- str(input_to_grad["name_x/fc1_w"])]),
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- self.assertTrue(has_blob(optim_proto, "name_x/fc6"))
- # Test networks produce exactly same gradients
- data = np.random.randn(batch_size, input_dim).astype(np.float32)
- label = np.random.randint(
- low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
- workspace.RunNetOnce(m.param_init_net)
- workspace.FeedBlob("name_x/data", data)
- workspace.FeedBlob("name_x/label", label)
- workspace.RunNetOnce(m.net)
- loss1 = workspace.FetchBlob("name_x/loss1")
- loss2 = workspace.FetchBlob("name_x/loss2")
- grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
- workspace.FeedBlob(str(input_to_grad["name_x/fc1_w"]), np.array([0.0]))
- workspace.RunNetOnce(optim_proto)
- optimized_loss1 = workspace.FetchBlob("name_x/loss1")
- optimized_loss2 = workspace.FetchBlob("name_x/loss2")
- optimized_grad = workspace.FetchBlob(str(input_to_grad["name_x/fc1_w"]))
- np.testing.assert_almost_equal(loss1, optimized_loss1)
- np.testing.assert_almost_equal(loss2, optimized_loss2)
- np.testing.assert_almost_equal(grad, optimized_grad)
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- @settings(deadline=1000)
- def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
- fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
- # Branch
- fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
- fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
- fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
- fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
- fc5.Relu([], fc5sum) \
- .Softmax([], "pred1") \
- .LabelCrossEntropy(["label"], ["xent1"]) \
- .AveragedLoss([], "loss1")
- fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
- fc6.Relu([], fc6) \
- .Softmax([], "pred2") \
- .LabelCrossEntropy(["label"], ["xent2"]) \
- .AveragedLoss([], "loss2")
- blobs_before = count_blobs(m.net.Proto())
- optim_proto = memonger.optimize_inference_for_dag(
- m.net, ["name_x/data"], "name_x"
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- # Test networks produce exactly same results
- data = np.random.randn(batch_size, input_dim).astype(np.float32)
- label = np.random.randint(
- low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
- workspace.RunNetOnce(m.param_init_net)
- workspace.FeedBlob("name_x/data", data)
- workspace.FeedBlob("name_x/label", label)
- workspace.RunNetOnce(m.net)
- loss1 = workspace.FetchBlob("name_x/loss1")
- loss2 = workspace.FetchBlob("name_x/loss2")
- workspace.RunNetOnce(optim_proto)
- optimized_loss1 = workspace.FetchBlob("name_x/loss1")
- optimized_loss2 = workspace.FetchBlob("name_x/loss2")
- np.testing.assert_almost_equal(loss1, optimized_loss1)
- np.testing.assert_almost_equal(loss2, optimized_loss2)
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- @settings(deadline=10000)
- def test_forward_optim_tree_harder(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- m.net.Proto().type = "dag"
- m.net.Proto().num_workers = 4
- m.net.AddExternalInput("label")
- m.net.AddExternalInput("data")
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
- fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
- # Branch
- fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
- fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
- fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
- fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
- fc5sum.Relu([], "relu1") \
- .Softmax([], "pred1") \
- .LabelCrossEntropy(["label"], ["xent1"]) \
- .AveragedLoss([], "loss1")
- fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
- fc6.Relu([], fc6) \
- .Softmax([], "pred2") \
- .LabelCrossEntropy(["label"], ["xent2"]) \
- .AveragedLoss([], "loss2")
- blobs_before = count_blobs(m.net.Proto())
- optim_proto = memonger.optimize_inference_for_dag(
- m.net, ["name_x/data"], "name_x/"
- )
- blobs_after = count_blobs(optim_proto)
- # Extra test with when one of the parameters is also an input.
- # This caused a bug before.
- optim_proto_extra_input = memonger.optimize_inference_for_dag(
- m.net, ["name_x/data", "name_x/fc1_w"], "name_x/"
- )
- blobs_after_extra_input = count_blobs(optim_proto_extra_input)
- self.assertEqual(blobs_after, blobs_after_extra_input)
- ###
- print(str(optim_proto))
- self.assertLess(blobs_after, blobs_before)
- # Test networks produce exactly same results
- data = np.random.randn(batch_size, input_dim).astype(np.float32)
- label = np.random.randint(
- low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
- workspace.RunNetOnce(m.param_init_net)
- workspace.FeedBlob("name_x/data", data)
- workspace.FeedBlob("name_x/label", label)
- workspace.RunNetOnce(m.net)
- loss1 = workspace.FetchBlob("name_x/loss1")
- loss2 = workspace.FetchBlob("name_x/loss2")
- workspace.RunNetOnce(optim_proto)
- optimized_loss1 = workspace.FetchBlob("name_x/loss1")
- optimized_loss2 = workspace.FetchBlob("name_x/loss2")
- np.testing.assert_almost_equal(loss1, optimized_loss1)
- np.testing.assert_almost_equal(loss2, optimized_loss2)
- # This test reproduces scenario where dag traversal for finding
- # shared blobs was not always starting from ops with in degree of 0
- @settings(deadline=10000)
- def test_forward_optim_tree_dag_traversal(self):
- input_dim = 4
- output_dim = 4
- batch_size = 4
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
- fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
- fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)
- # Branch
- fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
- fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
- fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)
- fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")
- fc5.Relu([], fc5sum) \
- .Softmax([], "pred1") \
- .LabelCrossEntropy(["label"], ["xent1"]) \
- .AveragedLoss([], "loss1")
- fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
- fc6.Relu([], fc6) \
- .Softmax([], "pred2") \
- .LabelCrossEntropy(["label"], ["xent2"]) \
- .AveragedLoss([], "loss2")
- blobs_before = count_blobs(m.net.Proto())
- # adding name_x/fc5_w as heads (which belongs to non-root op)
- # to make sure that dag traversal always starts from root ops
- optim_proto = memonger.optimize_inference_for_dag(
- m.net, ["name_x/fc5_w", "name_x/data"], "name_x"
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- # This is specifically to verify the op schema check being done in memonger
- def test_forward_optim_tree_enforce_inplace_op_invalid(self):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- net = m.net
- net.IndexFreeze("A", "B") # enforce inplace op
- net.Sum(["B", "B"], "C")
- net.Relu("C", "D")
- net.Sum(["D", "D"], "E")
- with self.assertRaises(RuntimeError):
- memonger.optimize_inference_for_dag(net, ["A"], "")
- # Here inplace op is specifically a root op to repro the scenario where dag
- # memonger could treat all the output blobs as shareable blobs and fails
- # assertion of input blob with the same name not allowed to share
- def test_forward_optim_tree_enforce_inplace_op_valid_and_as_head(self):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- net = m.net
- net.IndexFreeze("A", "A") # enforce inplace op
- net.Sum(["A", "A"], "B")
- net.Relu("B", "C")
- net.Relu("C", "D")
- net.Sum(["D", "D"], "E")
- blobs_before = count_blobs(m.net.Proto())
- optim_proto = memonger.optimize_inference_for_dag(
- net, ["A"], ""
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- def test_rnn(self):
- from caffe2.python import rnn_cell
- T = 5
- model = model_helper.ModelHelper()
- seq_lengths, labels = \
- model.net.AddExternalInputs(
- 'seq_lengths', 'labels',
- )
- init_blobs = []
- for i in range(2):
- hidden_init, cell_init = model.net.AddExternalInputs(
- "hidden_init_{}".format(i),
- "cell_init_{}".format(i)
- )
- init_blobs.extend([hidden_init, cell_init])
- model.param_init_net.ConstantFill([], ["input"], shape=[T, 4, 10])
- output, last_hidden, _, last_state = rnn_cell.LSTM(
- model=model,
- input_blob="input",
- seq_lengths=seq_lengths,
- initial_states=init_blobs,
- dim_in=10,
- dim_out=[10, 10],
- scope="lstm1",
- forward_only=False,
- drop_states=True,
- return_last_layer_only=True,
- )
- softmax, loss = model.net.SoftmaxWithLoss(
- [model.Flatten(output), "labels"],
- ['softmax', 'loss'],
- )
- model.AddGradientOperators([loss])
- blobs_before = count_blobs(model.net.Proto())
- optim_proto = memonger.share_grad_blobs(
- model.net,
- ["loss"],
- set(viewvalues(model.param_to_grad)),
- "",
- share_activations=True,
- dont_share_blobs=set(),
- )
- blobs_after = count_blobs(optim_proto)
- self.assertLess(blobs_after, blobs_before)
- # Run once to see all blobs are set up correctly
- for init_blob in init_blobs:
- workspace.FeedBlob(init_blob, np.zeros(
- [1, 4, 10], dtype=np.float32
- ))
- workspace.FeedBlob("seq_lengths", np.array([T] * 4, dtype=np.int32))
- workspace.FeedBlob("labels", np.random.rand(T).astype(np.int32))
- workspace.RunNetOnce(model.param_init_net)
- workspace.RunNetOnce(model.net)
- def test_compute_interference_graph_inplace_ops(self):
- m = model_helper.ModelHelper()
- m.Copy("b1", "b1")
- m.Copy("b1", "b1")
- m.Copy("b1", "b1")
- g = memonger.compute_interference_graph(m.net.Proto().op)
- self.assertEqual(list(g.edges()), [(0, 1), (0, 2), (1, 2)])
- def test_topological_sort_longest_path(self):
- m = model_helper.ModelHelper()
- # 0
- m.Copy("conv0_w_comp", "conv0_w")
- # 1
- conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
- # 2
- m.Copy("conv2_w", "conv2_w")
- # 3
- brew.conv(m, conv0, "conv2", 16, 32, 4)
- g = memonger.compute_interference_graph(m.net.Proto().op)
- orders_org = memonger.topological_sort_traversal(g)
- orders_gt_org = [2, 0, 1, 3]
- self.assertEqual(orders_gt_org, list(orders_org))
- orders = memonger.topological_sort_traversal_longest_path(g)
- # longer path is in front of the shorter one
- orders_gt = [0, 1, 2, 3]
- self.assertEqual(orders_gt, list(orders))
- def test_topological_sort_longest_path_multi_target(self):
- # two outputs: conv2 and data4
- m = model_helper.ModelHelper()
- # 0
- m.Copy("conv0_w_comp", "conv0_w")
- # 1
- conv0 = brew.conv(m, "data", "conv0", 32, 32, 4)
- # 2
- m.Copy("conv2_w", "conv2_w")
- # 3
- brew.conv(m, conv0, "conv2", 16, 32, 4)
- # 4
- m.Copy("data1", "data2")
- # 5
- m.Copy("data2", "data3")
- g = memonger.compute_interference_graph(m.net.Proto().op)
- orders_org = memonger.topological_sort_traversal(g)
- orders_gt_org = [4, 5, 2, 0, 1, 3]
- self.assertEqual(orders_gt_org, list(orders_org))
- orders = memonger.topological_sort_traversal_longest_path(g)
- # longer path is in front of the shorter one
- orders_gt = [0, 1, 2, 3, 4, 5]
- self.assertEqual(orders_gt, list(orders))
- def test_topological_sort_longest_path_single_node(self):
- # single node
- m = model_helper.ModelHelper()
- # 0
- m.Copy("conv0_w_comp", "conv0_w")
- g = memonger.compute_interference_graph(m.net.Proto().op)
- orders_org = memonger.topological_sort_traversal(g)
- orders_gt_org = [0]
- self.assertEqual(orders_gt_org, list(orders_org))
- orders = memonger.topological_sort_traversal_longest_path(g)
- # longer path is in front of the shorter one
- orders_gt = [0]
- self.assertEqual(orders_gt, list(orders))
- def test_compute_assignments_greedy(self):
- LiveRange = memonger.LiveRange
- ranges_sorted = [
- ('b1', LiveRange(1, 3, 10)),
- ('b2', LiveRange(3, 4, 1)),
- ('b3', LiveRange(5, 6, 1)),
- ('b4', LiveRange(5, 7, 10)),
- ]
- assignment_gt = [
- [ranges_sorted[0], ranges_sorted[3]],
- [ranges_sorted[1], ranges_sorted[2]],
- ]
- best = memonger.compute_assignments_greedy(ranges_sorted, None)
- self.assertEqual(memonger.get_memory_usage(best), 11)
- self.assertEqual(best, assignment_gt)
- def test_compute_assignments_dp(self):
- LiveRange = memonger.LiveRange
- ranges_sorted = [
- ('b1', LiveRange(1, 3, 10)),
- ('b2', LiveRange(3, 4, 1)),
- ('b3', LiveRange(5, 6, 1)),
- ('b4', LiveRange(5, 7, 10)),
- ]
- best = memonger.compute_assignments_dp(ranges_sorted, None)
- self.assertEqual(memonger.get_memory_usage(best), 11)
- def test_compute_assignments_dp1(self):
- LiveRange = memonger.LiveRange
- ranges_sorted = [
- ('b1', LiveRange(1, 2, 10)),
- ('b2', LiveRange(4, 6, 1)),
- ('b3', LiveRange(5, 6, 10)),
- ]
- best = memonger.compute_assignments_dp(ranges_sorted, [])
- self.assertEqual(memonger.get_memory_usage(best), 11)
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- def test_verify_graph_equality(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m, [fc2, fc3], "out")
- m2 = model_helper.ModelHelper()
- m2.Proto().type = "dag"
- m2.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m2, "data", "other_x", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m2, fc1, "other_y", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m2, fc1, "other_z", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m2, [fc2, fc3], "out")
- self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- def test_verify_graph_equality_harder(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
- fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
- fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m, [fc3a, fc3b], "out")
- m2 = model_helper.ModelHelper()
- m2.Proto().type = "dag"
- m2.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc2b = brew.fc(m2, fc1, "z", dim_in=output_dim, dim_out=output_dim)
- fc3a = brew.fc(m2, fc2a, "y", dim_in=output_dim, dim_out=output_dim)
- fc3b = brew.fc(m2, fc2b, "z", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m2, [fc3a, fc3b], "out")
- self.assertTrue(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- def test_verify_graph_inequality(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m, [fc2, fc3], "out")
- m2 = model_helper.ModelHelper()
- m2.Proto().type = "dag"
- m2.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc3 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m2, [fc2, fc3], "out")
- self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
- @given(input_dim=st.integers(min_value=4, max_value=4),
- output_dim=st.integers(min_value=4, max_value=4),
- batch_size=st.integers(min_value=4, max_value=4))
- def test_verify_graph_inequality_harder(self, input_dim, output_dim, batch_size):
- m = model_helper.ModelHelper()
- m.Proto().type = "dag"
- m.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2a = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc2b = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
- fc3a = brew.fc(m, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
- fc3b = brew.fc(m, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m, [fc3a, fc3b], "out")
- m2 = model_helper.ModelHelper()
- m2.Proto().type = "dag"
- m2.Proto().num_workers = 4
- with core.NameScope("name_x"):
- fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
- fc2a = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc2b = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
- fc3a = brew.fc(m2, fc2a, "u", dim_in=output_dim, dim_out=output_dim)
- fc3b = brew.fc(m2, fc2b, "v", dim_in=output_dim, dim_out=output_dim)
- brew.sum(m2, [fc3a, fc3b], "out")
- self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
- def test_release_blobs_when_used(self):
- m = model_helper.ModelHelper()
- fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
- fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
- fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
- fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
- m.net.Alias(["u"], ["u_alias"])
- brew.sum(m, [fc3, fc4], "out")
- with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data"))
- expect_frees = {"x", "y", "z"} # out is external output
- # and u is aliased so cannot be freed
- found_frees = set()
- for op in with_frees.op:
- if op.type == "Free":
- self.assertFalse(op.input[0] in found_frees) # no double frees
- found_frees.add(op.input[0])
- else:
- # Check a freed blob is not used anymore
- for inp in op.input:
- self.assertFalse(inp in found_frees)
- for outp in op.output:
- self.assertFalse(outp in found_frees)
- self.assertEqual(expect_frees, found_frees)
- if __name__ == '__main__':
- unittest.main()
|