gradient_check_test.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. # TODO(jiayq): as more and more tests are moving to hypothesis test, we
  2. # can gradually remove this test script. DO NOT ADD MORE TESTS TO THIS
  3. # FILE.
  4. import numpy as np
  5. from caffe2.python import (
  6. brew,
  7. core,
  8. device_checker,
  9. gradient_checker,
  10. model_helper,
  11. test_util,
  12. workspace,
  13. )
  14. from caffe2.python.gradient_checker import NetGradientChecker
  15. from caffe2.python.net_builder import ops, NetBuilder
  16. from caffe2.proto import caffe2_pb2
  17. import unittest
  18. from typing import Optional
  19. if workspace.has_gpu_support and workspace.NumGpuDevices() > 0:
  20. _gpu_dev_option = caffe2_pb2.DeviceOption()
  21. _gpu_dev_option.device_type = workspace.GpuDeviceType
  22. cpu_device_option = caffe2_pb2.DeviceOption()
  23. gpu_device_checker = device_checker.DeviceChecker(
  24. 0.01, [_gpu_dev_option]
  25. )
  26. device_checker = device_checker.DeviceChecker(
  27. 0.01, [_gpu_dev_option, cpu_device_option]
  28. )
  29. gpu_gradient_checkers = [
  30. gradient_checker.GradientChecker(
  31. 0.005, 0.05, _gpu_dev_option, "gpu_checker_ws"
  32. ),
  33. ]
  34. gradient_checkers = [
  35. gradient_checker.GradientChecker(
  36. 0.005, 0.05, _gpu_dev_option, "gpu_checker_ws"
  37. ),
  38. gradient_checker.GradientChecker(
  39. 0.01, 0.05, cpu_device_option, "cpu_checker_ws"
  40. ),
  41. ]
  42. gpu_device_option: Optional[caffe2_pb2.DeviceOption] = _gpu_dev_option
  43. else:
  44. cpu_device_option = caffe2_pb2.DeviceOption()
  45. gpu_device_option = None
  46. gpu_device_checker = device_checker.DeviceChecker(
  47. 0.01, []
  48. )
  49. device_checker = device_checker.DeviceChecker(0.01, [cpu_device_option])
  50. gradient_checkers = [
  51. gradient_checker.GradientChecker(
  52. 0.01, 0.05, cpu_device_option, "cpu_checker_ws"
  53. )
  54. ]
  55. gpu_gradient_checkers = []
  56. class TestLRN(test_util.TestCase):
  57. def setUp(self):
  58. self.test_configs = [(6, 10), (3, 13), ]
  59. def testLRN(self):
  60. for input_size, depth in self.test_configs:
  61. op = core.CreateOperator("LRN",
  62. ["X"],
  63. ["Y", "Y_scale"],
  64. size=11,
  65. alpha=0.001,
  66. beta=0.5,
  67. bias=2.0,
  68. order="NHWC"
  69. )
  70. X = np.random.rand(2, input_size, input_size,
  71. depth).astype(np.float32)
  72. res = device_checker.CheckSimple(op, [X], [0])
  73. self.assertTrue(res)
  74. for checker in gradient_checkers:
  75. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  76. self.assertTrue(res)
  77. class TestFlatten(test_util.TestCase):
  78. def testFlatten(self):
  79. op = core.CreateOperator("Flatten", ["X"], ["Y"])
  80. X = np.random.rand(2, 3, 4, 5).astype(np.float32)
  81. res = device_checker.CheckSimple(op, [X], [0])
  82. self.assertTrue(res)
  83. for checker in gradient_checkers:
  84. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  85. self.assertTrue(res)
  86. class TestConcat(test_util.TestCase):
  87. def setUp(self):
  88. self.test_configs = [
  89. # input_size, depth1, depth2, depth3, depth4
  90. (3, 2, 3, 4, 5),
  91. (4, 5, 4, 3, 2),
  92. ]
  93. def testConcatNHWC(self):
  94. for input_size, d1, d2, d3, d4 in self.test_configs:
  95. op = core.CreateOperator("Concat",
  96. ["X1", "X2", "X3", "X4"],
  97. ["Y", "Y_dims"],
  98. order="NHWC"
  99. )
  100. Xs = [
  101. np.random.rand(2, input_size, input_size,
  102. d1).astype(np.float32),
  103. np.random.rand(2, input_size, input_size,
  104. d2).astype(np.float32),
  105. np.random.rand(2, input_size, input_size,
  106. d3).astype(np.float32),
  107. np.random.rand(2, input_size, input_size, d4).astype(np.float32)
  108. ]
  109. for i in range(4):
  110. res = device_checker.CheckSimple(op, Xs, [0])
  111. self.assertTrue(res)
  112. for checker in gradient_checkers:
  113. res, grad, grad_estimated = checker.CheckSimple(op, Xs, i,
  114. [0])
  115. self.assertTrue(res)
  116. def testConcatNCHW(self):
  117. for input_size, d1, d2, d3, d4 in self.test_configs:
  118. op = core.CreateOperator("Concat",
  119. ["X1", "X2", "X3", "X4"],
  120. ["Y", "Y_dims"],
  121. order="NCHW"
  122. )
  123. Xs = [
  124. np.random.rand(2, d1, input_size,
  125. input_size).astype(np.float32),
  126. np.random.rand(2, d2, input_size,
  127. input_size).astype(np.float32),
  128. np.random.rand(2, d3, input_size,
  129. input_size).astype(np.float32),
  130. np.random.rand(2, d4, input_size, input_size).astype(np.float32)
  131. ]
  132. for i in range(4):
  133. res = device_checker.CheckSimple(op, Xs, [0])
  134. self.assertTrue(res)
  135. for checker in gradient_checkers:
  136. res, grad, grad_estimated = checker.CheckSimple(op, Xs, i,
  137. [0])
  138. self.assertTrue(res)
  139. class TestRelu(test_util.TestCase):
  140. def setUp(self):
  141. self.test_configs = [
  142. # input size
  143. # (0, 1),
  144. (1, 1),
  145. (2, 1),
  146. (1, 3, 3, 1),
  147. (2, 3, 3, 1),
  148. (1, 5, 5, 3),
  149. (2, 5, 5, 3),
  150. ]
  151. def testRelu(self):
  152. for input_size in self.test_configs:
  153. op = core.CreateOperator("Relu", ["X"], ["Y"])
  154. X = np.random.rand(*input_size).astype(np.float32)
  155. # go away from the origin point to avoid kink problems
  156. X += 0.01 * np.sign(X)
  157. X[X == 0] = 0.01
  158. res = device_checker.CheckSimple(op, [X], [0])
  159. self.assertTrue(res)
  160. for checker in gradient_checkers:
  161. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  162. self.assertTrue(res)
  163. class TestTanh(test_util.TestCase):
  164. def setUp(self):
  165. self.test_configs = [
  166. # (0, 1),
  167. (1, 1),
  168. (2, 1),
  169. (1, 2, 3, 4),
  170. ]
  171. def testTanh(self):
  172. for input_size in self.test_configs:
  173. op = core.CreateOperator("Tanh", ["X"], ["Y"])
  174. X = np.random.rand(*input_size).astype(np.float32) - 0.5
  175. res = device_checker.CheckSimple(op, [X], [0])
  176. self.assertTrue(res)
  177. for checker in gradient_checkers:
  178. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  179. self.assertTrue(res)
  180. class TestAbs(test_util.TestCase):
  181. def setUp(self):
  182. self.test_configs = [
  183. (1, 1),
  184. (2, 3),
  185. (2, 3, 4),
  186. (2, 3, 4, 5),
  187. ]
  188. def testAbs(self):
  189. for input_size in self.test_configs:
  190. op = core.CreateOperator("Abs", ["X"], ["Y"])
  191. X = np.random.rand(*input_size).astype(np.float32)
  192. # go away from the origin point to avoid kink problems
  193. X += 0.01 * np.sign(X)
  194. X[X == 0] = 0.01
  195. res = device_checker.CheckSimple(op, [X], [0])
  196. self.assertTrue(res)
  197. for checker in gradient_checkers:
  198. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  199. self.assertTrue(res)
  200. class TestExp(test_util.TestCase):
  201. def setUp(self):
  202. self.test_configs = [
  203. # (0, 1),
  204. (1, 1),
  205. (2, 1),
  206. (1, 2, 3, 4),
  207. ]
  208. def testExp(self):
  209. for input_size in self.test_configs:
  210. op = core.CreateOperator("Exp", ["X"], ["Y"])
  211. X = np.random.rand(*input_size).astype(np.float32) - 0.5
  212. res = device_checker.CheckSimple(op, [X], [0])
  213. self.assertTrue(res)
  214. for checker in gradient_checkers:
  215. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  216. self.assertTrue(res)
  217. class TestCos(test_util.TestCase):
  218. def setUp(self):
  219. self.test_configs = [
  220. (1, 1),
  221. (2, 3),
  222. (2, 3, 4),
  223. (2, 3, 4, 5),
  224. ]
  225. def testCos(self):
  226. for input_size in self.test_configs:
  227. op = core.CreateOperator("Cos", ["X"], ["Y"])
  228. X = np.random.rand(*input_size).astype(np.float32) - 0.5
  229. res = device_checker.CheckSimple(op, [X], [0])
  230. self.assertTrue(res)
  231. for checker in gradient_checkers:
  232. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  233. self.assertTrue(res)
  234. class TestSin(test_util.TestCase):
  235. def setUp(self):
  236. self.test_configs = [
  237. (1, 1),
  238. (2, 3),
  239. (2, 3, 4),
  240. (2, 3, 4, 5),
  241. ]
  242. def testSin(self):
  243. for input_size in self.test_configs:
  244. op = core.CreateOperator("Sin", ["X"], ["Y"])
  245. X = np.random.rand(*input_size).astype(np.float32) - 0.5
  246. res = device_checker.CheckSimple(op, [X], [0])
  247. self.assertTrue(res)
  248. for checker in gradient_checkers:
  249. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  250. self.assertTrue(res)
  251. class TestSigmoid(test_util.TestCase):
  252. def setUp(self):
  253. self.test_configs = [
  254. # (0, 1),
  255. (1, 1),
  256. (2, 1),
  257. (1, 2, 3, 4),
  258. ]
  259. def testSigmoid(self):
  260. for input_size in self.test_configs:
  261. op = core.CreateOperator("Sigmoid", ["X"], ["Y"])
  262. X = np.random.rand(*input_size).astype(np.float32) - 0.5
  263. res = device_checker.CheckSimple(op, [X], [0])
  264. self.assertTrue(res)
  265. for checker in gradient_checkers:
  266. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  267. self.assertTrue(res)
  268. class TestSum(test_util.TestCase):
  269. def setUp(self):
  270. self.test_configs = [
  271. # ((0, 1), False),
  272. ((1, 2, 3, 4), True),
  273. ((1, 2, 3, 4), False)]
  274. def testSum(self):
  275. for (input_size, in_place) in self.test_configs:
  276. op = core.CreateOperator("Sum", ["X1", "X2"],
  277. ["Y" if not in_place else "X1"])
  278. X1 = np.random.rand(*input_size).astype(np.float32) - 0.5
  279. X2 = np.random.rand(*input_size).astype(np.float32) - 0.5
  280. res = device_checker.CheckSimple(op, [X1, X2], [0])
  281. self.assertTrue(res)
  282. for checker in gradient_checkers:
  283. res, grad, grad_estimated = checker.CheckSimple(
  284. op, [X1, X2], 0, [0])
  285. self.assertTrue(res)
  286. res, grad, grad_estimated = checker.CheckSimple(
  287. op, [X1, X2], 1, [0])
  288. self.assertTrue(res)
  289. class TestMakeTwoClass(test_util.TestCase):
  290. def setUp(self):
  291. self.test_configs = [
  292. # input size
  293. # (0, 1),
  294. (1,),
  295. (7,),
  296. (1, 3),
  297. (2, 5),
  298. ]
  299. def testMakeTwoClass(self):
  300. for input_size in self.test_configs:
  301. op = core.CreateOperator("MakeTwoClass", ["X"], ["Y"])
  302. X = np.random.rand(*input_size).astype(np.float32)
  303. # step a little to avoid gradient problems
  304. X[X < 0.01] += 0.01
  305. X[X > 0.99] -= 0.01
  306. res = device_checker.CheckSimple(op, [X], [0])
  307. self.assertTrue(res)
  308. for checker in gradient_checkers:
  309. res, grad, grad_estimated = checker.CheckSimple(op, [X], 0, [0])
  310. self.assertTrue(res)
  311. class TestNetGradientChecker(test_util.TestCase):
  312. def test_net_gradient_checker(self):
  313. model = model_helper.ModelHelper(name="test")
  314. const = model.net.AddExternalInputs("const1", "const2")
  315. fc = brew.fc(model, dim_in=3, dim_out=4, blob_in="X", blob_out="Y", axis=0)
  316. dist = [model.net.SquaredL2Distance([fc, c]) for c in const]
  317. losses = [model.net.AveragedLoss(d) for d in dist] # using two losses here
  318. workspace.RunNetOnce(model.param_init_net)
  319. NetGradientChecker.Check(
  320. model.net,
  321. outputs_with_grad=losses,
  322. input_values={"X": np.array([1, 2, 3], dtype="float32"),
  323. const[0]: np.array([1, 1, 1, 1], dtype="float32"),
  324. const[1]: np.array([2, 2, 2, 2], dtype="float32")},
  325. input_to_check="X",
  326. )
  327. def test_net_comparison(self):
  328. # (a + b) * (c + d) == a * c + a * d + b * c + b * d
  329. net1 = core.Net("net1")
  330. a, b, c, d = net1.AddExternalInputs("a", "b", "c", "d")
  331. a_b = net1.Sum([a, b], "a+b")
  332. c_d = net1.Sum([c, d], "c+d")
  333. x = net1.Mul([a_b, c_d], "x")
  334. net2 = core.Net("net2")
  335. ac = net2.Mul([a, c], "ac")
  336. ad = net2.Mul([a, d], "ad")
  337. bc = net2.Mul([b, c], "bc")
  338. bd = net2.Mul([b, d], "bd")
  339. y = net2.Sum([ac, ad, bc, bd], "y")
  340. input_values = {blob: np.array([i], dtype=np.float32)
  341. for i, blob in enumerate([a, b, c, d])}
  342. NetGradientChecker.CompareNets(
  343. [net1, net2], [[x], [y]], [0],
  344. inputs_with_grads=[a, b, c, d],
  345. input_values=input_values,
  346. )
  347. class TestIf(test_util.TestCase):
  348. def testIf(self):
  349. W_a_values = [2.0, 1.5]
  350. B_a_values = [0.5]
  351. W_b_values = [7.0, 3.5]
  352. B_b_values = [1.5]
  353. with NetBuilder(_use_control_ops=True) as init_nb:
  354. W_a = ops.UniformFill([], "W_a", shape=[1, 2], min=-1., max=1.)
  355. B_a = ops.ConstantFill([], "B_a", shape=[1], value=0.0)
  356. W_b = ops.UniformFill([], "W_b", shape=[1, 2], min=-1., max=1.)
  357. B_b = ops.ConstantFill([], "B_b", shape=[1], value=0.0)
  358. W_gt_a = ops.GivenTensorFill(
  359. [], "W_gt_a", shape=[1, 2], values=W_a_values)
  360. B_gt_a = ops.GivenTensorFill([], "B_gt_a", shape=[1], values=B_a_values)
  361. W_gt_b = ops.GivenTensorFill(
  362. [], "W_gt_b", shape=[1, 2], values=W_b_values)
  363. B_gt_b = ops.GivenTensorFill([], "B_gt_b", shape=[1], values=B_b_values)
  364. params = [W_gt_a, B_gt_a, W_a, B_a, W_gt_b, B_gt_b, W_b, B_b]
  365. with NetBuilder(_use_control_ops=True, initial_scope=params) as train_nb:
  366. Y_pred = ops.ConstantFill([], "Y_pred", shape=[1], value=0.0)
  367. Y_noise = ops.ConstantFill([], "Y_noise", shape=[1], value=0.0)
  368. switch = ops.UniformFill(
  369. [], "switch", shape=[1], min=-1., max=1., run_once=0)
  370. zero = ops.ConstantFill([], "zero", shape=[1], value=0.0)
  371. X = ops.GaussianFill(
  372. [], "X", shape=[4096, 2], mean=0.0, std=1.0, run_once=0)
  373. noise = ops.GaussianFill(
  374. [], "noise", shape=[4096, 1], mean=0.0, std=1.0, run_once=0)
  375. with ops.IfNet(ops.LT([switch, zero])):
  376. Y_gt = ops.FC([X, W_gt_a, B_gt_a], "Y_gt")
  377. ops.Add([Y_gt, noise], Y_noise)
  378. ops.FC([X, W_a, B_a], Y_pred)
  379. with ops.Else():
  380. Y_gt = ops.FC([X, W_gt_b, B_gt_b], "Y_gt")
  381. ops.Add([Y_gt, noise], Y_noise)
  382. ops.FC([X, W_b, B_b], Y_pred)
  383. dist = ops.SquaredL2Distance([Y_noise, Y_pred], "dist")
  384. loss = dist.AveragedLoss([], ["loss"])
  385. assert len(init_nb.get()) == 1, "Expected a single init net produced"
  386. assert len(train_nb.get()) == 1, "Expected a single train net produced"
  387. train_net = train_nb.get()[0]
  388. gradient_map = train_net.AddGradientOperators([loss])
  389. init_net = init_nb.get()[0]
  390. ITER = init_net.ConstantFill(
  391. [], "ITER", shape=[1], value=0, dtype=core.DataType.INT64)
  392. train_net.Iter(ITER, ITER)
  393. LR = train_net.LearningRate(ITER, "LR", base_lr=-0.1,
  394. policy="step", stepsize=20, gamma=0.9)
  395. ONE = init_net.ConstantFill([], "ONE", shape=[1], value=1.)
  396. train_net.WeightedSum([W_a, ONE, gradient_map[W_a], LR], W_a)
  397. train_net.WeightedSum([B_a, ONE, gradient_map[B_a], LR], B_a)
  398. train_net.WeightedSum([W_b, ONE, gradient_map[W_b], LR], W_b)
  399. train_net.WeightedSum([B_b, ONE, gradient_map[B_b], LR], B_b)
  400. workspace.RunNetOnce(init_net)
  401. workspace.CreateNet(train_net)
  402. # print("Before training, W_a is: {}".format(workspace.FetchBlob("W_a")))
  403. # print("Before training, B_a is: {}".format(workspace.FetchBlob("B_a")))
  404. # print("Before training, W_b is: {}".format(workspace.FetchBlob("W_b")))
  405. # print("Before training, B_b is: {}".format(workspace.FetchBlob("B_b")))
  406. for _epoch in range(1000):
  407. workspace.RunNet(train_net.Proto().name)
  408. # print("After training, W_a is: {}".format(workspace.FetchBlob("W_a")))
  409. # print("After training, B_a is: {}".format(workspace.FetchBlob("B_a")))
  410. # print("After training, W_b is: {}".format(workspace.FetchBlob("W_b")))
  411. # print("After training, B_b is: {}".format(workspace.FetchBlob("B_b")))
  412. # print("Ground truth W_a is: {}".format(workspace.FetchBlob("W_gt_a")))
  413. # print("Ground truth B_a is: {}".format(workspace.FetchBlob("B_gt_a")))
  414. # print("Ground truth W_b is: {}".format(workspace.FetchBlob("W_gt_b")))
  415. # print("Ground truth B_b is: {}".format(workspace.FetchBlob("B_gt_b")))
  416. values_map = {
  417. "W_a": W_a_values,
  418. "B_a": B_a_values,
  419. "W_b": W_b_values,
  420. "B_b": B_b_values,
  421. }
  422. train_eps = 0.01
  423. for blob_name, values in values_map.items():
  424. trained_values = workspace.FetchBlob(blob_name)
  425. if trained_values.ndim == 2:
  426. self.assertEqual(trained_values.shape[0], 1)
  427. trained_values = trained_values[0][:]
  428. else:
  429. self.assertEqual(trained_values.ndim, 1)
  430. self.assertEqual(trained_values.size, len(values))
  431. for idx in range(len(trained_values)):
  432. self.assertTrue(abs(trained_values[idx] - values[idx]) < train_eps)
  433. class TestWhile(test_util.TestCase):
  434. @unittest.skip("Skip flaky test.")
  435. def testWhile(self):
  436. with NetBuilder(_use_control_ops=True) as nb:
  437. ops.Copy(ops.Const(0), "i")
  438. ops.Copy(ops.Const(1), "one")
  439. ops.Copy(ops.Const(2), "two")
  440. ops.Copy(ops.Const(2.0), "x")
  441. ops.Copy(ops.Const(3.0), "y")
  442. ops.Copy(ops.Const(2.0), "z")
  443. # raises x to the power of 4 and y to the power of 2
  444. # and z to the power of 3
  445. with ops.WhileNet():
  446. with ops.Condition():
  447. ops.Add(["i", "one"], "i")
  448. ops.LE(["i", "two"])
  449. ops.Pow("x", "x", exponent=2.0)
  450. with ops.IfNet(ops.LT(["i", "two"])):
  451. ops.Pow("y", "y", exponent=2.0)
  452. with ops.Else():
  453. ops.Pow("z", "z", exponent=3.0)
  454. ops.Add(["x", "y"], "x_plus_y")
  455. ops.Add(["x_plus_y", "z"], "s")
  456. assert len(nb.get()) == 1, "Expected a single net produced"
  457. net = nb.get()[0]
  458. net.AddGradientOperators(["s"])
  459. workspace.RunNetOnce(net)
  460. # (x^4)' = 4x^3
  461. self.assertAlmostEqual(workspace.FetchBlob("x_grad"), 32)
  462. self.assertAlmostEqual(workspace.FetchBlob("x"), 16)
  463. # (y^2)' = 2y
  464. self.assertAlmostEqual(workspace.FetchBlob("y_grad"), 6)
  465. self.assertAlmostEqual(workspace.FetchBlob("y"), 9)
  466. # (z^3)' = 3z^2
  467. self.assertAlmostEqual(workspace.FetchBlob("z_grad"), 12)
  468. self.assertAlmostEqual(workspace.FetchBlob("z"), 8)
  469. if __name__ == '__main__':
  470. workspace.GlobalInit(["python"])
  471. unittest.main()