optimizer_test_util.py 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237
  1. ## @package optimizer_test_util
  2. # Module caffe2.python.optimizer_test_util
  3. import unittest
  4. import numpy as np
  5. from caffe2.python import brew, core, workspace, cnn, optimizer
  6. from caffe2.python.modeling.initializers import (
  7. Initializer, PseudoFP16Initializer)
  8. from caffe2.python.model_helper import ModelHelper
  9. class OptimizerTestBase(object):
  10. """
  11. This is an abstract base class.
  12. Don't inherit from unittest.TestCase, and don't name it 'Test*'.
  13. Do, however, do these things in classes which inherit from this.
  14. """
  15. def _createDense(self, dtype=core.DataType.FLOAT):
  16. perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
  17. np.random.seed(123) # make test deterministic
  18. numpy_dtype = np.float32 if dtype == core.DataType.FLOAT else np.float16
  19. initializer = Initializer if dtype == core.DataType.FLOAT else \
  20. PseudoFP16Initializer
  21. data = np.random.randint(
  22. 2,
  23. size=(20, perfect_model.size)).astype(numpy_dtype)
  24. label = np.dot(data, perfect_model)[:, np.newaxis]
  25. model = ModelHelper(name="test", arg_scope={'order': 'NCHW'})
  26. out = brew.fc(
  27. model,
  28. 'data', 'fc', perfect_model.size, 1, ('ConstantFill', {}),
  29. ('ConstantFill', {}), axis=0,
  30. WeightInitializer=initializer, BiasInitializer=initializer
  31. )
  32. if dtype == core.DataType.FLOAT16:
  33. out = model.HalfToFloat(out, out + "_fp32")
  34. sq = model.SquaredL2Distance([out, 'label'])
  35. loss = model.AveragedLoss(sq, "avg_loss")
  36. grad_map = model.AddGradientOperators([loss])
  37. self.assertIsInstance(grad_map['fc_w'], core.BlobReference)
  38. return (model, perfect_model, data, label)
  39. def testDense(self):
  40. model, perfect_model, data, label = self._createDense()
  41. optimizer = self.build_optimizer(model)
  42. workspace.FeedBlob('data', data[0])
  43. workspace.FeedBlob('label', label[0])
  44. workspace.RunNetOnce(model.param_init_net)
  45. workspace.CreateNet(model.net, True)
  46. for _ in range(2000):
  47. idx = np.random.randint(data.shape[0])
  48. workspace.FeedBlob('data', data[idx])
  49. workspace.FeedBlob('label', label[idx])
  50. workspace.RunNet(model.net.Proto().name)
  51. np.testing.assert_allclose(
  52. perfect_model[np.newaxis, :],
  53. workspace.FetchBlob('fc_w'),
  54. atol=1e-2
  55. )
  56. self.check_optimizer(optimizer)
  57. @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
  58. def testGPUDense(self, dtype=core.DataType.FLOAT):
  59. device_opt = core.DeviceOption(workspace.GpuDeviceType, 0)
  60. with core.DeviceScope(device_opt):
  61. model, _perfect_model, data, label = self._createDense(dtype)
  62. if dtype == core.DataType.FLOAT16:
  63. fc_fp32_for_host = model.HalfToFloat('fc', 'fc_fp32_for_host')
  64. model.CopyGPUToCPU(fc_fp32_for_host, 'fc_cpu')
  65. else:
  66. model.CopyGPUToCPU('fc', 'fc_cpu')
  67. workspace.FeedBlob('data', data[0])
  68. workspace.FeedBlob('label', label[0])
  69. # Add some CPU ops
  70. brew.fc(model, 'fc_cpu', 'fc2', dim_in=1, dim_out=10, axis=0)
  71. # Create optimizer in default device scope
  72. self.build_optimizer(model)
  73. if self._skip_gpu:
  74. return
  75. # Run net to see it does not crash
  76. workspace.RunNetOnce(model.param_init_net)
  77. workspace.CreateNet(model.net, True)
  78. workspace.RunNet(model.net.Proto().name)
  79. def testSparse(self):
  80. # to test duplicated indices we assign two indices to each weight and
  81. # thus each weight might count once or twice
  82. DUPLICATION = 2
  83. perfect_model = np.array([2, 6, 5, 0, 1]).astype(np.float32)
  84. np.random.seed(123) # make test deterministic
  85. data = np.random.randint(
  86. 2,
  87. size=(20, perfect_model.size * DUPLICATION)).astype(np.float32)
  88. label = np.dot(data, np.repeat(perfect_model, DUPLICATION))
  89. model = cnn.CNNModelHelper("NCHW", name="test")
  90. # imitate what model wrapper does
  91. w = model.param_init_net.ConstantFill(
  92. [], 'w', shape=[perfect_model.size], value=0.0)
  93. model.params.append(w)
  94. picked = model.net.Gather([w, 'indices'], 'gather')
  95. out = model.ReduceFrontSum(picked, 'sum')
  96. sq = model.SquaredL2Distance([out, 'label'])
  97. loss = model.AveragedLoss(sq, "avg_loss")
  98. grad_map = model.AddGradientOperators([loss])
  99. self.assertIsInstance(grad_map['w'], core.GradientSlice)
  100. optimizer = self.build_optimizer(model)
  101. workspace.CreateBlob('indices')
  102. workspace.CreateBlob('label')
  103. for indices_type in [np.int32, np.int64]:
  104. workspace.RunNetOnce(model.param_init_net)
  105. workspace.CreateNet(model.net, True)
  106. for _ in range(2000):
  107. idx = np.random.randint(data.shape[0])
  108. # transform into indices of binary features
  109. indices = np.repeat(np.arange(perfect_model.size),
  110. DUPLICATION)[data[idx] == 1]
  111. if indices.size == 0:
  112. continue
  113. workspace.FeedBlob(
  114. 'indices',
  115. indices.reshape((indices.size,)).astype(indices_type)
  116. )
  117. workspace.FeedBlob('label',
  118. np.array(label[idx]).astype(np.float32))
  119. workspace.RunNet(model.net.Proto().name)
  120. np.testing.assert_allclose(
  121. perfect_model,
  122. workspace.FetchBlob('w'),
  123. atol=1e-2
  124. )
  125. self.check_optimizer(optimizer)
  126. class LRModificationTestBase(object):
  127. """
  128. This is an abstract base class.
  129. Don't inherit from unittest.TestCase, and don't name it 'Test*'.
  130. Do, however, do these things in classes which inherit from this.
  131. """
  132. def _gradient_ratio_reference(self, model, params, max_gradient_norm):
  133. from caffe2.python import core
  134. sum_squared_norms = 0.0
  135. for param in params:
  136. grad = (
  137. model.param_to_grad[param]
  138. if not isinstance(
  139. model.param_to_grad[param],
  140. core.GradientSlice,
  141. ) else model.param_to_grad[param].values
  142. )
  143. val = workspace.FetchBlob(grad)
  144. sum_squared_norms += np.power(np.linalg.norm(val), 2.0)
  145. global_norm = np.sqrt(sum_squared_norms)
  146. clip_norm = max_gradient_norm
  147. norm_ratio = clip_norm / np.maximum(clip_norm, global_norm)
  148. return norm_ratio
  149. def test_global_norm_based_gradient_clipping(self):
  150. max_gradient_norm = 1.0
  151. model, perfect_model, data, label = self._createDense()
  152. opt = self.build_optimizer(model, max_gradient_norm=max_gradient_norm)
  153. params = []
  154. for param in model.GetParams(top_scope=True):
  155. if param in model.param_to_grad:
  156. if not isinstance(
  157. model.param_to_grad[param],
  158. core.GradientSlice,
  159. ):
  160. params.append(param)
  161. workspace.FeedBlob('data', data[0])
  162. workspace.FeedBlob('label', label[0])
  163. workspace.RunNetOnce(model.param_init_net)
  164. workspace.CreateNet(model.net, True)
  165. self.assertIsNotNone(opt._lr_multiplier)
  166. # Run net once
  167. idx = np.random.randint(data.shape[0])
  168. workspace.FeedBlob('data', data[idx])
  169. workspace.FeedBlob('label', label[idx])
  170. workspace.RunNet(model.net.Proto().name)
  171. reference = self._gradient_ratio_reference(
  172. model,
  173. params,
  174. max_gradient_norm,
  175. )
  176. norm_ratio = workspace.FetchBlob(
  177. 'norm_clipped_grad_update/norm_ratio')
  178. np.testing.assert_almost_equal(norm_ratio, reference)
  179. self.assertTrue(
  180. reference < 1.0, "Bad test, gradient not being scaled."
  181. )
  182. def test_lr_injection(self):
  183. model, perfect_model, data, label = self._createDense()
  184. opt = self.build_optimizer(
  185. model, max_gradient_norm=1, allow_lr_injection=True
  186. )
  187. workspace.FeedBlob('data', data[0])
  188. workspace.FeedBlob('label', label[0])
  189. workspace.RunNetOnce(model.param_init_net)
  190. workspace.CreateNet(model.net, True)
  191. # Test LR injection initialized properly
  192. self.assertIsNotNone(opt._lr_multiplier)
  193. self.assertEqual(optimizer.get_lr_injection(), 1)
  194. # Test that we're able to modify the value of the lr_injection
  195. optimizer.set_lr_injection(0)
  196. self.assertEqual(optimizer.get_lr_injection(), 0)
  197. # Test that setting the lr_injector properly propagates to the
  198. # lr_multiplier. Here, we have both lr_injector and norm_ratio that
  199. # affect the lr_multiplier
  200. workspace.RunNet(model.net.Proto().name)
  201. self.assertEqual(workspace.FetchBlob('lr_multiplier'), 0)