regularizer_test.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. import caffe2.python.hypothesis_test_util as hu
  2. import hypothesis.strategies as st
  3. import numpy as np
  4. import numpy.testing as npt
  5. from caffe2.python import core, layer_model_instantiator, regularizer, schema, workspace
  6. from caffe2.python.layer_test_util import LayersTestCase
  7. from caffe2.python.optimizer import SgdOptimizer
  8. from caffe2.python.regularizer import L1Norm, RegularizationBy
  9. from caffe2.python.regularizer_context import RegularizerContext, UseRegularizer
  10. from hypothesis import given
  11. class TestRegularizerContext(LayersTestCase):
  12. @given(X=hu.arrays(dims=[2, 5]))
  13. def test_regularizer_context(self, X):
  14. weight_reg_out = L1Norm(0.2)
  15. bias_reg_out = L1Norm(0)
  16. regularizers = {"WEIGHT": weight_reg_out, "BIAS": bias_reg_out}
  17. output_dims = 2
  18. input_record = self.new_record(schema.Scalar((np.float32, (5,))))
  19. schema.FeedRecord(input_record, [X])
  20. with UseRegularizer(regularizers):
  21. weight_reg = RegularizerContext.current().get_regularizer("WEIGHT")
  22. bias_reg = RegularizerContext.current().get_regularizer("BIAS")
  23. optim = SgdOptimizer(0.15)
  24. assert (
  25. weight_reg == weight_reg_out
  26. ), "fail to get correct weight reg from context"
  27. assert bias_reg == bias_reg_out, "fail to get correct bias reg from context"
  28. fc_output = self.model.FC(
  29. input_record,
  30. output_dims,
  31. weight_optim=optim,
  32. bias_optim=optim,
  33. weight_reg=weight_reg,
  34. bias_reg=bias_reg,
  35. )
  36. # model.output_schema has to a struct
  37. self.model.output_schema = schema.Struct(("fc_output", fc_output))
  38. self.assertEqual(schema.Scalar((np.float32, (output_dims,))), fc_output)
  39. _, train_net = layer_model_instantiator.generate_training_nets(self.model)
  40. ops = train_net.Proto().op
  41. ops_type_list = [ops[i].type for i in range(len(ops))]
  42. assert ops_type_list.count("LpNorm") == 2
  43. assert ops_type_list.count("Scale") == 4
  44. assert ops_type_list.count("LpNormGradient") == 2
  45. class TestRegularizer(LayersTestCase):
  46. @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0)))
  47. def test_log_barrier(self, X):
  48. param = core.BlobReference("X")
  49. workspace.FeedBlob(param, X)
  50. train_init_net, train_net = self.get_training_nets()
  51. reg = regularizer.LogBarrier(1.0)
  52. output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS)
  53. reg(
  54. train_net,
  55. train_init_net,
  56. param,
  57. grad=None,
  58. by=RegularizationBy.AFTER_OPTIMIZER,
  59. )
  60. workspace.RunNetOnce(train_init_net)
  61. workspace.RunNetOnce(train_net)
  62. def ref(X):
  63. return (
  64. np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype(
  65. np.float32
  66. ),
  67. np.clip(X, 1e-9, None),
  68. )
  69. for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)):
  70. npt.assert_allclose(x, y, rtol=1e-3)
  71. @given(
  72. X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0)),
  73. left_open=st.booleans(),
  74. right_open=st.booleans(),
  75. eps=hu.floats(min_value=1e-6, max_value=1e-4),
  76. ub=hu.floats(min_value=-1.0, max_value=1.0),
  77. lb=hu.floats(min_value=-1.0, max_value=1.0),
  78. **hu.gcs_cpu_only
  79. )
  80. def test_bounded_grad_proj(self, X, left_open, right_open, eps, ub, lb, gc, dc):
  81. if ub - (eps if right_open else 0.) < lb + (eps if left_open else 0.):
  82. return
  83. param = core.BlobReference("X")
  84. workspace.FeedBlob(param, X)
  85. train_init_net, train_net = self.get_training_nets()
  86. reg = regularizer.BoundedGradientProjection(
  87. lb=lb, ub=ub, left_open=left_open, right_open=right_open, epsilon=eps
  88. )
  89. output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS)
  90. reg(
  91. train_net,
  92. train_init_net,
  93. param,
  94. grad=None,
  95. by=RegularizationBy.AFTER_OPTIMIZER,
  96. )
  97. workspace.RunNetOnce(train_init_net)
  98. workspace.RunNetOnce(train_net)
  99. def ref(X):
  100. return np.clip(
  101. X, lb + (eps if left_open else 0.), ub - (eps if right_open else 0.)
  102. )
  103. assert output is None
  104. npt.assert_allclose(workspace.blobs[param], ref(X), atol=1e-7)
  105. @given(
  106. output_dim=st.integers(1, 10),
  107. input_num=st.integers(3, 30),
  108. reg_weight=st.integers(0, 10)
  109. )
  110. def test_group_l1_norm(self, output_dim, input_num, reg_weight):
  111. """
  112. 1. create a weight blob
  113. 2. create random group splits
  114. 3. run group_l1_nrom with the weight blob
  115. 4. run equivalent np operations to calculate group l1 norm
  116. 5. compare if the results from 3 and 4 are equal
  117. """
  118. def compare_reference(weight, group_boundaries, reg_lambda, output):
  119. group_splits = np.hsplit(weight, group_boundaries[1:-1])
  120. l2_reg = np.sqrt([np.sum(np.square(g)) for g in group_splits])
  121. l2_normalized = np.multiply(l2_reg,
  122. np.array([np.sqrt(g.shape[1]) for g in group_splits]))
  123. result = np.multiply(np.sum(l2_normalized), reg_lambda)
  124. npt.assert_almost_equal(result, workspace.blobs[output], decimal=2)
  125. weight = np.random.rand(output_dim, input_num).astype(np.float32)
  126. feature_num = np.random.randint(low=1, high=input_num - 1)
  127. group_boundaries = [0]
  128. group_boundaries = np.append(
  129. group_boundaries,
  130. np.sort(
  131. np.random.choice(range(1, input_num - 1), feature_num, replace=False)
  132. ),
  133. )
  134. group_boundaries = np.append(group_boundaries, [input_num])
  135. split_info = np.diff(group_boundaries)
  136. weight_blob = core.BlobReference("weight_blob")
  137. workspace.FeedBlob(weight_blob, weight)
  138. train_init_net, train_net = self.get_training_nets()
  139. reg = regularizer.GroupL1Norm(reg_weight * 0.1, split_info.tolist())
  140. output = reg(
  141. train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS
  142. )
  143. workspace.RunNetOnce(train_init_net)
  144. workspace.RunNetOnce(train_net)
  145. compare_reference(weight, group_boundaries, reg_weight * 0.1, output)
  146. @given(
  147. param_dim=st.integers(10, 30),
  148. k=st.integers(5, 9),
  149. reg_weight=st.integers(0, 10)
  150. )
  151. def test_l1_norm_trimmed(self, param_dim, k, reg_weight):
  152. weight = np.random.rand(param_dim).astype(np.float32)
  153. weight_blob = core.BlobReference("weight_blob")
  154. workspace.FeedBlob(weight_blob, weight)
  155. train_init_net, train_net = self.get_training_nets()
  156. reg = regularizer.L1NormTrimmed(reg_weight * 0.1, k)
  157. output = reg(
  158. train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS
  159. )
  160. workspace.RunNetOnce(train_init_net)
  161. workspace.RunNetOnce(train_net)
  162. result = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) * reg_weight * 0.1
  163. npt.assert_almost_equal(result, workspace.blobs[output], decimal=2)
  164. @given(
  165. param_dim=st.integers(10, 30),
  166. k=st.integers(5, 9),
  167. l1=st.integers(0, 10),
  168. l2=st.integers(0, 10)
  169. )
  170. def test_elastic_l1_norm_trimmed(self, param_dim, k, l1, l2):
  171. weight = np.random.rand(param_dim).astype(np.float32)
  172. weight_blob = core.BlobReference("weight_blob")
  173. workspace.FeedBlob(weight_blob, weight)
  174. train_init_net, train_net = self.get_training_nets()
  175. reg = regularizer.ElasticNetL1NormTrimmed(l1 * 0.1, l2 * 0.1, k)
  176. output = reg(
  177. train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS
  178. )
  179. workspace.RunNetOnce(train_init_net)
  180. workspace.RunNetOnce(train_net)
  181. l1_norm = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)])
  182. l2_norm = np.sum(np.square(weight))
  183. result = l1_norm * l1 * 0.1 + l2_norm * l2 * 0.1
  184. npt.assert_almost_equal(result, workspace.blobs[output], decimal=2)
  185. @given(
  186. row_dim=st.integers(5, 10),
  187. norm=st.floats(min_value=1.0, max_value=4.0),
  188. data_strategy=st.data(),
  189. )
  190. def test_fp16_max_norm(self, row_dim, norm, data_strategy):
  191. weight = np.random.rand(row_dim, 5).astype(np.float16)
  192. grad = np.random.rand(row_dim, 5).astype(np.float16)
  193. # generate indices that will be updated
  194. indices = data_strategy.draw(
  195. hu.tensor(
  196. dtype=np.int64,
  197. min_dim=1,
  198. max_dim=1,
  199. elements=st.sampled_from(np.arange(weight.shape[0])),
  200. )
  201. )
  202. indices = np.unique(indices)
  203. # compute expected result
  204. result = weight.copy()
  205. # prevent dived by zero
  206. eps = 1e-12
  207. norms = np.sqrt(np.sum(result[indices, ] ** 2, axis=1, keepdims=True))
  208. # if the norms are smaller than max_norm, then it doesn't need update
  209. desired = np.clip(norms, 0, norm)
  210. # apply max norm
  211. result[indices, ] *= desired / (eps + norms)
  212. weight_blob = core.BlobReference("weight_blob")
  213. workspace.FeedBlob(weight_blob, weight)
  214. grad_blob = core.BlobReference("grad_blob")
  215. workspace.FeedBlob(grad_blob, grad)
  216. indices_blob = core.BlobReference("indices")
  217. workspace.FeedBlob(indices_blob, indices)
  218. grad_blob_slice = core.GradientSlice(indices=indices_blob, values=grad_blob)
  219. train_init_net, train_net = self.get_training_nets()
  220. reg = regularizer.MaxNorm(norm, dtype='fp16')
  221. reg(
  222. train_net, train_init_net, weight_blob, grad_blob_slice, by=RegularizationBy.AFTER_OPTIMIZER
  223. )
  224. workspace.RunNetOnce(train_init_net)
  225. workspace.RunNetOnce(train_net)
  226. npt.assert_almost_equal(result, workspace.FetchBlob('weight_blob'), decimal=2)