| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387 |
- ## @package gradient_checker
- # Module caffe2.python.gradient_checker
- import os
- import numpy as np
- from caffe2.python import core, workspace, net_drawer
- from caffe2.proto import caffe2_pb2
- def getGradientForOp(op):
- return core.GradientRegistry.GetGradientForOp(
- op, [s + '_grad' for s in op.output])
- def _get_grad_blob(grad_map, input_to_check):
- grad_blob = grad_map[input_to_check]
- if isinstance(grad_blob, core.BlobReference):
- return workspace.blobs[grad_blob]
- # If grad_blob is not a single blob, it should be a gradient slice.
- # To make it comparable with the estimiated gradient which is dense,
- # we need to first convert grad_blob to dense gradient.
- assert isinstance(grad_blob, core.GradientSlice)
- dense_grad = 'tmp_dense_grad'
- sparse_to_dense_op = core.CreateOperator(
- 'SparseToDense',
- [grad_blob.indices, grad_blob.values, input_to_check],
- dense_grad,
- )
- workspace.RunOperatorOnce(sparse_to_dense_op)
- return workspace.blobs[dense_grad]
- def _get_grad(net, outputs, outputs_with_grad, input_values, inputs_with_grads):
- grad_net = net.Clone(net.Name() + "_copy")
- grad_map = grad_net.AddGradientOperators(outputs_with_grad)
- for name, value in (input_values or {}).items():
- workspace.blobs[name] = value
- for input_to_check in inputs_with_grads:
- assert input_to_check in grad_map, (
- '{} has no gradient, cannot check net gradient.'.format(
- input_to_check))
- assert str(input_to_check) in workspace.blobs
- workspace.RunNetOnce(grad_net)
- forward_results = [(output, workspace.blobs[output]) for output in outputs]
- grads = {input_to_check: _get_grad_blob(grad_map, input_to_check)
- for input_to_check in inputs_with_grads}
- return forward_results, grads, grad_net
- def _assert_close(value1, value2, threshold, err_msg=''):
- np.testing.assert_allclose(
- value1, value2,
- atol=threshold, rtol=threshold,
- err_msg=err_msg,
- )
- delta = np.abs(value1 - value2).flatten()
- return np.mean(delta), max(delta)
- class NetGradientChecker(object):
- @staticmethod
- def CompareNets(nets, outputs, outputs_with_grad_ids,
- inputs_with_grads, input_values=None,
- threshold=0.0000001, print_net_images=False):
- def _get_output_with_grad_names(net_outputs):
- return [net_outputs[i] for i in outputs_with_grad_ids]
- if print_net_images:
- for i, net in enumerate(nets):
- png = net_drawer.GetPydotGraph(net).create_png()
- with open("caffe2_net_forward_" + str(i) + net.Name() + ".png",
- 'wb') \
- as f:
- f.write(png)
- results = [
- _get_grad(net, net_outputs,
- _get_output_with_grad_names(net_outputs),
- input_values, inputs_with_grads)
- for net, net_outputs in zip(nets, outputs)
- ]
- if print_net_images:
- _, _, backward_nets = zip(*results)
- for i, net in enumerate(backward_nets):
- png = net_drawer.GetPydotGraph(net).create_png()
- with open("caffe2_net_" + str(i) + net.Name() + ".png", 'wb') \
- as f:
- f.write(png)
- first_net_results, first_net_grads, _ = results[0]
- for net_results, net_grads, _ in results[1:]:
- assert len(net_results) == len(first_net_results)
- for idx, ((blob1, blob_value1), (blob2, blob_value2)) in enumerate(
- zip(first_net_results, net_results)):
- _assert_close(
- blob_value1, blob_value2, threshold,
- err_msg="Different forward pass results for output id {}. "
- "Corresponding output blobs: {} and {}".format(
- idx, blob1, blob2))
- assert net_grads.keys() == first_net_grads.keys()
- for blob, blob_grad_value in net_grads.items():
- _assert_close(
- first_net_grads[blob], blob_grad_value, threshold,
- err_msg="Different gradients for input {}".format(blob))
- @staticmethod
- def Check(net, outputs_with_grad, input_values,
- input_to_check, step_size=0.0001,
- threshold=0.05, print_net=True):
- net_results, net_grads, full_net = _get_grad(
- net, [], outputs_with_grad, input_values, [input_to_check])
- analytic_grad = net_grads[input_to_check]
- def GetLoss(new_value):
- workspace.blobs[input_to_check] = new_value
- workspace.RunNetOnce(full_net)
- return sum([
- workspace.blobs[output]
- for output in outputs_with_grad
- ]).sum()
- def GetValue(dim, delta):
- input_value = input_values[input_to_check].copy()
- input_value.flat[dim] += delta
- return input_value
- grad_estimate = np.zeros_like(input_values[input_to_check])
- for dim in range(input_values[input_to_check].size):
- pos_loss = GetLoss(GetValue(dim, step_size))
- neg_loss = GetLoss(GetValue(dim, -step_size))
- grad_estimate.flat[dim] = (pos_loss - neg_loss) / step_size / 2
- err_msg = "Error in gradient check for net_copy {}".format(
- net.Name())
- if print_net:
- err_msg += ": {}".format(net.Proto())
- return _assert_close(analytic_grad, grad_estimate, threshold, err_msg)
- class GradientChecker:
- """A gradient checker in Python.
- This is not the most efficient way to check gradients, as the Python
- interface will involve a lot of copies back and forth operations. Use at your
- own risk.
- """
- def __init__(
- self,
- stepsize,
- threshold,
- device_option=None,
- workspace_name="gradient_check",
- input_device_options=None,
- ):
- self._stepsize = stepsize
- self._threshold = threshold
- self._device_option = device_option or caffe2_pb2.DeviceOption()
- self._workspace_name = workspace_name
- if input_device_options is None:
- self._input_device_options = {}
- else:
- self._input_device_options = input_device_options
- def GetLossAndGrad(
- self, op, grad_ops, inputs, input_names, input_to_check, grad_name,
- outputs_with_grads
- ):
- for i in range(len(inputs)):
- workspace.FeedBlob(input_names[i], inputs[i],
- self._input_device_options.get(
- input_names[i], self._device_option))
- x = inputs[input_to_check]
- # Run.
- workspace.RunOperatorOnce(op)
- loss = 0.
- # Get Loss and feed in the gradients, run gradient ops.
- for idx in outputs_with_grads:
- name = op.output[idx]
- arr = workspace.FetchBlob(name)
- loss += (arr**2).sum()
- workspace.FeedBlob(name + '_grad', arr, self._device_option)
- loss /= 2.
- # Run gradient ops
- workspace.RunOperatorsOnce(grad_ops)
- # Get gradients
- if isinstance(grad_name, core.GradientSlice):
- workspace.FeedBlob('zeros', np.zeros_like(x, dtype=np.float32))
- workspace.FeedBlob('ones', np.ones(1, dtype=np.float32))
- gv_cpu_op = core.CreateOperator(
- 'EnsureCPUOutput', grad_name.values, grad_name.values + '_cpu',
- device_option=self._device_option
- )
- gi_cpu_op = core.CreateOperator(
- 'EnsureCPUOutput', grad_name.indices, grad_name.indices + '_cpu',
- device_option=self._device_option
- )
- sparse_to_dense_op = core.CreateOperator(
- 'ScatterWeightedSum',
- [
- 'zeros', 'ones', grad_name.indices + '_cpu',
- grad_name.values + '_cpu', 'ones'
- ],
- 'zeros',
- )
- workspace.RunOperatorOnce(gv_cpu_op)
- workspace.RunOperatorOnce(gi_cpu_op)
- workspace.RunOperatorOnce(sparse_to_dense_op)
- grad = workspace.FetchBlob('zeros')
- else:
- grad = workspace.FetchBlob(grad_name)
- return loss, grad
- def CheckSimple(
- self,
- op,
- inputs,
- input_to_check,
- outputs_with_grads,
- grad_ops=None,
- input_device_options=None,
- ensure_outputs_are_inferred=False,
- ):
- """Checks the operator in a very simple fashion by stacking a sum of
- squares on the top.
- Inputs:
- op: the operator to be checked.
- inputs: the input data in numpy arrays.
- input_to_check: an index specifying which input blob we should
- check.
- outputs_with_grads: indices specifying which output blobs will we
- need to check gradients with. For these outputs, we will collect a
- squared sum and also feed in their gradients.
- grad_operator: the gradient operator. If not given, we will get the
- gradient operator from the gradient registry.
- input_device_options: an optional mapping from input names to
- DeviceOptions (to override the default DeviceOption)
- ensure_outputs_are_inferred: if set will assert that the gradient output
- shapes matches the inferred shapes
- Outputs:
- boolean: True if it passes, False if it does not pass.
- """
- # Entering the checker workspace
- old_ws_name = workspace.CurrentWorkspace()
- if self._workspace_name != old_ws_name:
- workspace.SwitchWorkspace(self._workspace_name, True)
- op.device_option.CopyFrom(self._device_option)
- if grad_ops is None:
- # TODO(jiayq): use the gradient registration instead of the old
- # hack.
- grad_ops, g_input = getGradientForOp(op)
- _input_device_options = input_device_options or \
- core.InferOpBlobDevicesAsDict(op)[0]
- # First, feed in the input.
- for i, arr in enumerate(inputs):
- workspace.FeedBlob(
- op.input[i], arr,
- _input_device_options.get(
- op.input[i], self._device_option))
- # Get the loss and gradient for the original.
- grad_name = g_input[input_to_check]
- loss, grad = self.GetLossAndGrad(
- op, grad_ops, inputs, op.input, input_to_check, grad_name,
- outputs_with_grads,
- )
- grad_estimate = np.zeros_like(inputs[input_to_check])
- if grad_estimate.shape != grad.shape:
- raise Exception(
- "Mismatched gradient shapes: estimated ({}), grad ({})".format(
- grad_estimate.shape, grad.shape))
- if ensure_outputs_are_inferred:
- self._assertInferTensorChecks(op, grad_ops)
- full_grad_check = os.getenv('CAFFE2_FULL_GRAD_CHECK') == '1'
- dims_to_check = inputs[input_to_check].size
- for current_dim in range(dims_to_check):
- # Grad check is very expensive (as it involves running the op from
- # scratch for each of the input tensor elements). Thus, let's
- # run it by default only on a small subset of dimensions. Here we
- # apply very scientific approach: the first and the last 3 elements
- # of each tensor. Pass CAFFE2_FULL_GRAD_CHECK=1 env var to enable
- # the full check
- if not full_grad_check and current_dim >= 3 and \
- current_dim + 3 < dims_to_check:
- grad_estimate.flat[current_dim] = grad.flat[current_dim]
- continue
- # Positive gradient
- inputs[input_to_check].flat[current_dim] += self._stepsize
- pos_loss, _ = self.GetLossAndGrad(
- op, grad_ops, inputs, op.input, input_to_check, grad_name,
- outputs_with_grads
- )
- # Negative gradient
- inputs[input_to_check].flat[current_dim] -= self._stepsize * 2
- neg_loss, _ = self.GetLossAndGrad(
- op, grad_ops, inputs, op.input, input_to_check, grad_name,
- outputs_with_grads
- )
- # Recover the value
- inputs[input_to_check].flat[current_dim] += self._stepsize
- grad_estimate.flat[current_dim] = (
- pos_loss - neg_loss) / self._stepsize / 2
- # Now, check correctness
- fail_mat = ~np.isclose(
- grad, grad_estimate, atol=self._threshold, rtol=self._threshold)
- if np.any(fail_mat):
- idx = np.flatnonzero(fail_mat)
- print('Failed. [idx, grad, grad_estimate] are:')
- print(np.vstack([idx, grad.flat[idx], grad_estimate.flat[idx]]).T)
- ret = False
- else:
- ret = True
- # After finishing, cleaning up things.
- if self._workspace_name != old_ws_name:
- # We reset the workspace to make sure everything intermediate is
- # cleaned up. Note that there is no need to delete a workspace -
- # when empty it takes a very limited amount of memory.
- workspace.ResetWorkspace()
- workspace.SwitchWorkspace(old_ws_name)
- return ret, grad, grad_estimate
- def _assertInferTensorChecks(self, op, grad_ops):
- tmp_net = caffe2_pb2.NetDef()
- tmp_net.op.extend([op])
- tmp_net.op.extend(grad_ops)
- inferred_shapes, inferred_types = workspace.InferShapesAndTypes(
- [tmp_net],
- nets_proto=True,
- )
- outputs = set()
- for grad_op in grad_ops:
- outputs.update(grad_op.output)
- for output in outputs:
- if output not in inferred_shapes:
- raise Exception(
- "expected output {} to be inferred".format(output))
- blob = workspace.FetchBlob(output)
- correct_shape = list(blob.shape)
- inferred_shape = list(inferred_shapes[output])
- if correct_shape != inferred_shape:
- raise Exception(
- "Mismatched inferred shape: want({}), got({})".format(
- correct_shape, inferred_shape))
- if type(blob) is np.ndarray:
- if blob.dtype == np.dtype('float64'):
- correct_type = caffe2_pb2.TensorProto.DOUBLE
- elif blob.dtype == np.dtype('float32'):
- correct_type = caffe2_pb2.TensorProto.FLOAT
- elif blob.dtype == np.dtype('int32'):
- correct_type = caffe2_pb2.TensorProto.INT32
- elif blob.dtype == np.dtype('int64'):
- correct_type = caffe2_pb2.TensorProto.INT64
- else:
- correct_type = "unknown {}".format(np.dtype)
- else:
- correct_type = str(type(blob))
- inferred_type = inferred_types[output]
- if correct_type != inferred_type:
- raise Exception(
- "Mismatched inferred type: want({}), got({})".format(
- correct_type, inferred_type))
|