caffe_translator.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937
  1. ## @package caffe_translator
  2. # Module caffe2.python.caffe_translator
  3. import argparse
  4. import copy
  5. import logging
  6. import re
  7. import numpy as np # noqa
  8. from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2
  9. from caffe.proto import caffe_pb2
  10. from caffe2.python import core, utils, workspace
  11. from google.protobuf import text_format
  12. logging.basicConfig()
  13. log = logging.getLogger("caffe_translator")
  14. log.setLevel(logging.INFO)
  15. def _StateMeetsRule(state, rule):
  16. """A function that reproduces Caffe's StateMeetsRule functionality."""
  17. if rule.HasField('phase') and rule.phase != state.phase:
  18. return False
  19. if rule.HasField('min_level') and state.level < rule.min_level:
  20. return False
  21. if rule.HasField('max_level') and state.level > rule.max_level:
  22. return False
  23. curr_stages = set(list(state.stage))
  24. # all stages in rule.stages should be in, otherwise it's not a match.
  25. if len(rule.stage) and any([s not in curr_stages for s in rule.stage]):
  26. return False
  27. # none of the stage in rule.stages should be in, otherwise it's not a match.
  28. if len(rule.not_stage) and any([s in curr_stages for s in rule.not_stage]):
  29. return False
  30. # If none of the nonmatch happens, return True.
  31. return True
  32. def _ShouldInclude(net_state, layer):
  33. """A function that reproduces Caffe's inclusion and exclusion rule."""
  34. ret = (len(layer.include) == 0)
  35. # check exclude rules: if any exclusion is met, we shouldn't include.
  36. ret &= not any([_StateMeetsRule(net_state, rule) for rule in layer.exclude])
  37. if len(layer.include):
  38. # check include rules: if any inclusion is met, we should include.
  39. ret |= any([_StateMeetsRule(net_state, rule) for rule in layer.include])
  40. return ret
  41. def _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops):
  42. dim_map = {}
  43. ws = workspace.C.Workspace()
  44. for param in net_params.protos:
  45. ws.create_blob(param.name) \
  46. .feed(utils.Caffe2TensorToNumpyArray(param))
  47. external_input = net.op[0].input[0]
  48. ws.create_blob(external_input).feed(dummy_input)
  49. # Get dimensions with legacy pad
  50. for i in range(len(net.op)):
  51. op_def = net.op[i]
  52. ws._run_operator(op_def.SerializeToString())
  53. if i in legacy_pad_ops:
  54. output = op_def.output[0]
  55. blob_legacy = ws.fetch_blob(output)
  56. dim_map[i] = blob_legacy.shape
  57. return dim_map
  58. def _GetLegacyPadArgs(op_def, arg_map):
  59. pads = {}
  60. keys = ['pad_l', 'pad_t', 'pad_r', 'pad_b']
  61. is_pad = 'pad' in arg_map
  62. if is_pad:
  63. for k in keys:
  64. pads[k] = arg_map['pad'].i
  65. else:
  66. pads = {x: arg_map[x].i for x in keys}
  67. return pads
  68. def _AdjustDims(op_def, arg_map, pads, dim1, dim2):
  69. n1, c1, h1, w1 = dim1
  70. n2, c2, h2, w2 = dim2
  71. assert(n1 == n2)
  72. assert(c1 == c2)
  73. is_pad = 'pad' in arg_map
  74. if h1 != h2 or w1 != w2:
  75. if h1 == h2 + 1:
  76. pads['pad_b'] += 1
  77. elif h1 != h2:
  78. raise Exception("Unexpected dimensions for height:", h1, h2)
  79. if w1 == w2 + 1:
  80. pads['pad_r'] += 1
  81. elif w1 != w2:
  82. raise Exception("Unexpected dimensions for width:", w1, w2)
  83. if is_pad:
  84. op_def.arg.remove(arg_map['pad'])
  85. args = []
  86. for name in pads.keys():
  87. arg = caffe2_pb2.Argument()
  88. arg.name = name
  89. arg.i = pads[name]
  90. args.append(arg)
  91. op_def.arg.extend(args)
  92. else:
  93. for name in pads.keys():
  94. arg_map[name].i = pads[name]
  95. def _RemoveLegacyPad(net, net_params, input_dims):
  96. legacy_pad_ops = []
  97. for i in range(len(net.op)):
  98. op_def = net.op[i]
  99. if re.match(r'^(Conv|ConvTranspose|MaxPool|AveragePool)(\dD)?$',
  100. op_def.type):
  101. for arg in op_def.arg:
  102. if arg.name == 'legacy_pad':
  103. legacy_pad_ops.append(i)
  104. break
  105. if legacy_pad_ops:
  106. n, c, h, w = input_dims
  107. dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
  108. dim_map = _GetLegacyDims(net, net_params, dummy_input, legacy_pad_ops)
  109. # Running with the legacy pad argument removed
  110. # compare the dimensions and adjust pad argument when necessary
  111. ws = workspace.C.Workspace()
  112. external_input = net.op[0].input[0]
  113. ws.create_blob(external_input).feed_blob(dummy_input)
  114. for param in net_params.protos:
  115. ws.create_blob(param.name) \
  116. .feed_blob(utils.Caffe2TensorToNumpyArray(param))
  117. for i in range(len(net.op)):
  118. op_def = net.op[i]
  119. if i in legacy_pad_ops:
  120. arg_map = {}
  121. for arg in op_def.arg:
  122. arg_map[arg.name] = arg
  123. pads = _GetLegacyPadArgs(op_def, arg_map)
  124. # remove legacy pad arg
  125. for j in range(len(op_def.arg)):
  126. arg = op_def.arg[j]
  127. if arg.name == 'legacy_pad':
  128. del op_def.arg[j]
  129. break
  130. output = op_def.output[0]
  131. # use a new name to avoid the interference with inplace
  132. nonlegacy_output = output + '_nonlegacy'
  133. op_def.output[0] = nonlegacy_output
  134. ws._run_operator(op_def.SerializeToString())
  135. blob_nonlegacy = ws.fetch_blob(nonlegacy_output)
  136. # reset output name
  137. op_def.output[0] = output
  138. dim1 = dim_map[i]
  139. dim2 = blob_nonlegacy.shape
  140. _AdjustDims(op_def, arg_map, pads, dim1, dim2)
  141. ws._run_operator(op_def.SerializeToString())
  142. return net
  143. def _GetBlobDimMap(net, net_params, dummy_input):
  144. dim_map = {}
  145. ws = workspace.C.Workspace()
  146. for param in net_params.protos:
  147. ws.create_blob(param.name) \
  148. .feed(utils.Caffe2TensorToNumpyArray(param))
  149. external_input = net.op[0].input[0]
  150. ws.create_blob(external_input).feed(dummy_input)
  151. # Get dimensions with legacy pad
  152. for i in range(len(net.op)):
  153. op_def = net.op[i]
  154. ws._run_operator(op_def.SerializeToString())
  155. for output in op_def.output:
  156. blob = ws.fetch_blob(output)
  157. dim_map[output] = blob.shape
  158. return dim_map
  159. def _GetInputDims(caffe_net):
  160. input_dims = []
  161. if caffe_net.input_dim:
  162. input_dims = caffe_net.input_dim
  163. elif caffe_net.input_shape:
  164. input_dims = caffe_net.input_shape[0].dim
  165. elif caffe_net.layer[0].input_param.shape:
  166. # getting input dimension from first layer
  167. input_dims = caffe_net.layer[0].input_param.shape[0].dim
  168. return input_dims
  169. class TranslatorRegistry(object):
  170. registry_ = {}
  171. @classmethod
  172. def Register(cls, op_name):
  173. """A decorator for registering gradient mappings."""
  174. def Wrapper(func):
  175. cls.registry_[op_name] = func
  176. return func
  177. return Wrapper
  178. @classmethod
  179. def TranslateLayer(cls, layer, pretrained_blobs, is_test, **kwargs):
  180. try:
  181. caffe_ops, params = cls.registry_[layer.type](
  182. layer, pretrained_blobs, is_test, **kwargs)
  183. except KeyError:
  184. raise KeyError('No translator registered for layer: %s yet.' %
  185. str(layer))
  186. if caffe_ops is None:
  187. caffe_ops = []
  188. if type(caffe_ops) is not list:
  189. caffe_ops = [caffe_ops]
  190. return caffe_ops, params
  191. @classmethod
  192. def TranslateModel(
  193. cls,
  194. caffe_net,
  195. pretrained_net,
  196. is_test=False,
  197. net_state=None,
  198. remove_legacy_pad=False,
  199. input_dims=None
  200. ):
  201. net_state = caffe_pb2.NetState() if net_state is None else net_state
  202. net = caffe2_pb2.NetDef()
  203. net.name = caffe_net.name
  204. net_params = caffe2_pb2.TensorProtos()
  205. if len(caffe_net.layers) > 0:
  206. raise ValueError(
  207. 'I think something is wrong. This translation script '
  208. 'only accepts new style layers that are stored in the '
  209. 'layer field.'
  210. )
  211. if not input_dims:
  212. input_dims = _GetInputDims(caffe_net)
  213. for layer in caffe_net.layer:
  214. if not _ShouldInclude(net_state, layer):
  215. log.info('Current net state does not need layer {}'
  216. .format(layer.name))
  217. continue
  218. log.info('Translate layer {}'.format(layer.name))
  219. # Get pretrained one
  220. pretrained_layers = (
  221. [l for l in pretrained_net.layer
  222. if l.name == layer.name] + [l
  223. for l in pretrained_net.layers
  224. if l.name == layer.name]
  225. )
  226. if len(pretrained_layers) > 1:
  227. raise ValueError(
  228. 'huh? more than one pretrained layer of one name?')
  229. elif len(pretrained_layers) == 1:
  230. pretrained_blobs = [
  231. utils.CaffeBlobToNumpyArray(blob)
  232. for blob in pretrained_layers[0].blobs
  233. ]
  234. else:
  235. # No pretrained layer for the given layer name. We'll just pass
  236. # no parameter blobs.
  237. # print 'No pretrained layer for layer', layer.name
  238. pretrained_blobs = []
  239. operators, params = cls.TranslateLayer(
  240. layer, pretrained_blobs, is_test, net=net,
  241. net_params=net_params, input_dims=input_dims)
  242. net.op.extend(operators)
  243. net_params.protos.extend(params)
  244. if remove_legacy_pad:
  245. assert input_dims, \
  246. 'Please specify input_dims to remove legacy_pad'
  247. net = _RemoveLegacyPad(net, net_params, input_dims)
  248. return net, net_params
  249. def TranslateModel(*args, **kwargs):
  250. return TranslatorRegistry.TranslateModel(*args, **kwargs)
  251. def ConvertTensorProtosToInitNet(net_params, input_name):
  252. """Takes the net_params returned from TranslateModel, and wrap it as an
  253. init net that contain GivenTensorFill.
  254. This is a very simple feature that only works with float tensors, and is
  255. only intended to be used in an environment where you want a single
  256. initialization file - for more complex cases, use a db to store the
  257. parameters.
  258. """
  259. init_net = caffe2_pb2.NetDef()
  260. for tensor in net_params.protos:
  261. if len(tensor.float_data) == 0:
  262. raise RuntimeError(
  263. "Only float tensors are supported in this util.")
  264. op = core.CreateOperator(
  265. "GivenTensorFill", [], [tensor.name],
  266. arg=[
  267. utils.MakeArgument("shape", list(tensor.dims)),
  268. utils.MakeArgument("values", tensor.float_data)])
  269. init_net.op.extend([op])
  270. init_net.op.extend([core.CreateOperator("ConstantFill", [], [input_name], shape=[1])])
  271. return init_net
  272. def BaseTranslate(layer, caffe2_type):
  273. """A simple translate interface that maps the layer input and output."""
  274. caffe2_op = caffe2_pb2.OperatorDef()
  275. caffe2_op.type = caffe2_type
  276. caffe2_op.input.extend(layer.bottom)
  277. caffe2_op.output.extend(layer.top)
  278. return caffe2_op
  279. def AddArgument(op, key, value):
  280. """Makes an argument based on the value type."""
  281. op.arg.extend([utils.MakeArgument(key, value)])
  282. ################################################################################
  283. # Common translators for layers.
  284. ################################################################################
  285. @TranslatorRegistry.Register("Input")
  286. def TranslateInput(layer, pretrained_blobs, is_test, **kwargs):
  287. return [], []
  288. @TranslatorRegistry.Register("VideoData")
  289. def TranslateVideoData(layer, pretrained_blobs, is_test, **kwargs):
  290. return [], []
  291. @TranslatorRegistry.Register("Data")
  292. def TranslateData(layer, pretrained_blobs, is_test, **kwargs):
  293. return [], []
  294. # A function used in convolution, pooling and deconvolution to deal with
  295. # conv pool specific parameters.
  296. def _TranslateStridePadKernelHelper(param, caffe_op):
  297. try:
  298. if (len(param.stride) > 1 or len(param.kernel_size) > 1 or
  299. len(param.pad) > 1):
  300. raise NotImplementedError(
  301. "Translator currently does not support non-conventional "
  302. "pad/kernel/stride settings."
  303. )
  304. stride = param.stride[0] if len(param.stride) else 1
  305. pad = param.pad[0] if len(param.pad) else 0
  306. kernel = param.kernel_size[0] if len(param.kernel_size) else 0
  307. except TypeError:
  308. # This catches the case of a PoolingParameter, in which case we are
  309. # having non-repeating pad, stride and kernel.
  310. stride = param.stride
  311. pad = param.pad
  312. kernel = param.kernel_size
  313. # Get stride
  314. if param.HasField("stride_h") or param.HasField("stride_w"):
  315. AddArgument(caffe_op, "stride_h", param.stride_h)
  316. AddArgument(caffe_op, "stride_w", param.stride_w)
  317. else:
  318. AddArgument(caffe_op, "stride", stride)
  319. # Get pad
  320. if param.HasField("pad_h") or param.HasField("pad_w"):
  321. if param.pad_h == param.pad_w:
  322. AddArgument(caffe_op, "pad", param.pad_h)
  323. else:
  324. AddArgument(caffe_op, "pad_t", param.pad_h)
  325. AddArgument(caffe_op, "pad_b", param.pad_h)
  326. AddArgument(caffe_op, "pad_l", param.pad_w)
  327. AddArgument(caffe_op, "pad_r", param.pad_w)
  328. else:
  329. AddArgument(caffe_op, "pad", pad)
  330. # Get kernel
  331. if param.HasField("kernel_h") or param.HasField("kernel_w"):
  332. AddArgument(caffe_op, "kernel_h", param.kernel_h)
  333. AddArgument(caffe_op, "kernel_w", param.kernel_w)
  334. else:
  335. AddArgument(caffe_op, "kernel", kernel)
  336. @TranslatorRegistry.Register("Convolution3D")
  337. def TranslateConvNd(layer, pretrained_blobs, is_test, **kwargs):
  338. param = layer.convolution3d_param
  339. caffe_op = BaseTranslate(layer, "Conv")
  340. output = caffe_op.output[0]
  341. caffe_op.input.append(output + '_w')
  342. AddArgument(
  343. caffe_op,
  344. "kernels",
  345. [param.kernel_depth, param.kernel_size, param.kernel_size])
  346. AddArgument(
  347. caffe_op,
  348. "strides",
  349. [param.temporal_stride, param.stride, param.stride])
  350. temporal_pad = 0
  351. spatial_pad = 0
  352. if hasattr(param, 'temporal_pad'):
  353. temporal_pad = param.temporal_pad
  354. if hasattr(param, 'pad'):
  355. spatial_pad = param.pad
  356. AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
  357. # weight
  358. params = [
  359. utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
  360. # bias
  361. if len(pretrained_blobs) == 2:
  362. caffe_op.input.append(output + '_b')
  363. params.append(
  364. utils.NumpyArrayToCaffe2Tensor(
  365. pretrained_blobs[1].flatten(), output + '_b'))
  366. return caffe_op, params
  367. @TranslatorRegistry.Register("Convolution")
  368. def TranslateConv(layer, pretrained_blobs, is_test, **kwargs):
  369. param = layer.convolution_param
  370. caffe_op = BaseTranslate(layer, "Conv")
  371. output = caffe_op.output[0]
  372. caffe_op.input.append(output + '_w')
  373. _TranslateStridePadKernelHelper(param, caffe_op)
  374. # weight
  375. params = [
  376. utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')]
  377. # bias
  378. if len(pretrained_blobs) == 2:
  379. caffe_op.input.append(output + '_b')
  380. params.append(
  381. utils.NumpyArrayToCaffe2Tensor(
  382. pretrained_blobs[1].flatten(), output + '_b'))
  383. # Group convolution option
  384. if param.group != 1:
  385. AddArgument(caffe_op, "group", param.group)
  386. # Get dilation - not tested. If you have a model and this checks out,
  387. # please provide a test and uncomment this.
  388. if len(param.dilation) > 0:
  389. if len(param.dilation) == 1:
  390. AddArgument(caffe_op, "dilation", param.dilation[0])
  391. elif len(param.dilation) == 2:
  392. AddArgument(caffe_op, "dilation_h", param.dilation[0])
  393. AddArgument(caffe_op, "dilation_w", param.dilation[1])
  394. return caffe_op, params
  395. @TranslatorRegistry.Register("Deconvolution")
  396. def TranslateDeconv(layer, pretrained_blobs, is_test, **kwargs):
  397. param = layer.convolution_param
  398. if param.group > 1:
  399. raise NotImplementedError(
  400. "Translator currently does not support group deconvolution."
  401. )
  402. caffe_op = BaseTranslate(layer, "ConvTranspose")
  403. output = caffe_op.output[0]
  404. _TranslateStridePadKernelHelper(param, caffe_op)
  405. caffe_op.input.extend([output + '_w'])
  406. AddArgument(caffe_op, "order", "NCHW")
  407. weight = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_w')
  408. if param.bias_term:
  409. bias = utils.NumpyArrayToCaffe2Tensor(
  410. pretrained_blobs[1].flatten(), output + '_b'
  411. )
  412. caffe_op.input.extend([output + '_b'])
  413. return caffe_op, [weight, bias]
  414. else:
  415. return caffe_op, [weight]
  416. @TranslatorRegistry.Register("Crop")
  417. def TranslateCrop(layer, pretrained_blobs, is_test, **kwargs):
  418. net, net_params, input_dims = kwargs['net'], kwargs['net_params'], kwargs['input_dims']
  419. n, c, h, w = input_dims
  420. dummy_input = np.random.randn(n, c, h, w).astype(np.float32)
  421. dim_map = _GetBlobDimMap(net, net_params, dummy_input)
  422. param = layer.crop_param
  423. axis, offsets = param.axis, param.offset
  424. caffe_op = BaseTranslate(layer, "Slice")
  425. input_1 = caffe_op.input[1]
  426. input_1_dim = dim_map[input_1]
  427. starts, ends = [], []
  428. dims = len(dim_map[input_1])
  429. assert len(offsets) == 1, 'Caffe Translator for Crop only works for offset \
  430. of 1 for now'
  431. for _ in range(axis):
  432. starts.append(0)
  433. ends.append(-1)
  434. end_offset = [int(offsets[0] + input_1_dim[i]) for i in range(axis, dims)]
  435. ends.extend(end_offset)
  436. starts.extend([offsets[0]] * len(end_offset))
  437. op = caffe2_pb2.OperatorDef()
  438. op.input.extend([caffe_op.input[0]])
  439. op.output.extend(caffe_op.output)
  440. op.arg.extend(caffe_op.arg)
  441. op.type = caffe_op.type
  442. AddArgument(op, "starts", starts)
  443. AddArgument(op, "ends", ends)
  444. return op, []
  445. @TranslatorRegistry.Register("ReLU")
  446. def TranslateRelu(layer, pretrained_blobs, is_test, **kwargs):
  447. return BaseTranslate(layer, "Relu"), []
  448. @TranslatorRegistry.Register("Pooling")
  449. def TranslatePool(layer, pretrained_blobs, is_test, **kwargs):
  450. param = layer.pooling_param
  451. if param.pool == caffe_pb2.PoolingParameter.MAX:
  452. caffe_op = BaseTranslate(layer, "MaxPool")
  453. elif param.pool == caffe_pb2.PoolingParameter.AVE:
  454. caffe_op = BaseTranslate(layer, "AveragePool")
  455. _TranslateStridePadKernelHelper(param, caffe_op)
  456. AddArgument(caffe_op, "order", "NCHW")
  457. try:
  458. # In the Facebook port of Caffe, a torch_pooling field was added to
  459. # map the pooling computation of Torch. Essentially, it uses
  460. # floor((height + 2 * padding - kernel) / stride) + 1
  461. # instead of
  462. # ceil((height + 2 * padding - kernel) / stride) + 1
  463. # which is Caffe's version.
  464. # Torch pooling is actually the same as Caffe2 pooling, so we don't
  465. # need to do anything.
  466. is_torch_pooling = param.torch_pooling
  467. except AttributeError:
  468. is_torch_pooling = False
  469. if not is_torch_pooling:
  470. AddArgument(caffe_op, "legacy_pad",
  471. caffe2_legacy_pb2.CAFFE_LEGACY_POOLING)
  472. if param.global_pooling:
  473. AddArgument(caffe_op, "global_pooling", 1)
  474. return caffe_op, []
  475. @TranslatorRegistry.Register("Pooling3D")
  476. def TranslatePool3D(layer, pretrained_blobs, is_test, **kwargs):
  477. param = layer.pooling3d_param
  478. if param.pool == caffe_pb2.Pooling3DParameter.MAX:
  479. caffe_op = BaseTranslate(layer, "MaxPool")
  480. elif param.pool == caffe_pb2.Pooling3DParameter.AVE:
  481. caffe_op = BaseTranslate(layer, "AveragePool")
  482. AddArgument(caffe_op, "order", "NCHW")
  483. AddArgument(
  484. caffe_op,
  485. "kernels",
  486. [param.kernel_depth, param.kernel_size, param.kernel_size])
  487. AddArgument(
  488. caffe_op,
  489. "strides",
  490. [param.temporal_stride, param.stride, param.stride])
  491. temporal_pad = 0
  492. spatial_pad = 0
  493. if hasattr(param, 'temporal_pad'):
  494. temporal_pad = param.temporal_pad
  495. if hasattr(param, 'pad'):
  496. spatial_pad = param.pad
  497. AddArgument(caffe_op, "pads", [temporal_pad, spatial_pad, spatial_pad] * 2)
  498. return caffe_op, []
  499. @TranslatorRegistry.Register("LRN")
  500. def TranslateLRN(layer, pretrained_blobs, is_test, **kwargs):
  501. caffe_op = BaseTranslate(layer, "LRN")
  502. caffe_op.output.extend(['_' + caffe_op.output[0] + '_scale'])
  503. param = layer.lrn_param
  504. if param.norm_region != caffe_pb2.LRNParameter.ACROSS_CHANNELS:
  505. raise ValueError(
  506. "Does not support norm region other than across channels.")
  507. AddArgument(caffe_op, "size", int(param.local_size))
  508. AddArgument(caffe_op, "alpha", float(param.alpha))
  509. AddArgument(caffe_op, "beta", float(param.beta))
  510. AddArgument(caffe_op, "bias", float(param.k))
  511. AddArgument(caffe_op, "order", "NCHW")
  512. return caffe_op, []
  513. @TranslatorRegistry.Register("InnerProduct")
  514. def TranslateInnerProduct(layer, pretrained_blobs, is_test, **kwargs):
  515. param = layer.inner_product_param
  516. try:
  517. if param.axis != 1 or param.transpose:
  518. raise ValueError(
  519. "We don't have testing case for non-default axis and transpose "
  520. "cases yet so we are disabling it for now. If you have a model "
  521. "with this, please do send us your model for us to update this "
  522. "support, and you are more than welcome to send a PR for this.")
  523. except AttributeError:
  524. # We might be using an historic Caffe protobuf that does not have axis
  525. # and transpose arguments, so we will silently pass.
  526. pass
  527. caffe_op = BaseTranslate(layer, "FC")
  528. output = caffe_op.output[0]
  529. caffe_op.input.extend([output + '_w', output + '_b'])
  530. # To provide the old-style 4-dimensional blob (1, 1, dim_output, dim_input)
  531. # case, we always explicitly reshape the pretrained blob.
  532. if pretrained_blobs[0].ndim not in [2, 4]:
  533. raise ValueError("Unexpected weight ndim.")
  534. if (pretrained_blobs[0].ndim == 4 and
  535. list(pretrained_blobs[0].shape[:2]) != [1, 1]):
  536. raise ValueError(
  537. "If pretrained blob has 4 dims (old-style Caffe), the first two "
  538. "should be of value 1, but I got " + str(pretrained_blobs[0].shape))
  539. weight = utils.NumpyArrayToCaffe2Tensor(
  540. pretrained_blobs[0].reshape(-1, pretrained_blobs[0].shape[-1]),
  541. output + '_w'
  542. )
  543. bias = utils.NumpyArrayToCaffe2Tensor(
  544. pretrained_blobs[1].flatten(), output + '_b'
  545. )
  546. return caffe_op, [weight, bias]
  547. @TranslatorRegistry.Register("Dropout")
  548. def TranslateDropout(layer, pretrained_blobs, is_test, **kwargs):
  549. caffe_op = BaseTranslate(layer, "Dropout")
  550. caffe_op.output.extend(['_' + caffe_op.output[0] + '_mask'])
  551. param = layer.dropout_param
  552. AddArgument(caffe_op, "ratio", param.dropout_ratio)
  553. if (is_test):
  554. AddArgument(caffe_op, "is_test", 1)
  555. return caffe_op, []
  556. @TranslatorRegistry.Register("Softmax")
  557. def TranslateSoftmax(layer, pretrained_blobs, is_test, **kwargs):
  558. caffe_op = BaseTranslate(layer, "Softmax")
  559. return caffe_op, []
  560. @TranslatorRegistry.Register("SoftmaxWithLoss")
  561. def TranslateSoftmaxWithLoss(layer, pretrained_blobs, is_test, **kwargs):
  562. softmax_op = core.CreateOperator(
  563. "Softmax", [layer.bottom[0]],
  564. layer.bottom[0] + "_translator_autogen_softmax")
  565. xent_op = core.CreateOperator(
  566. "LabelCrossEntropy",
  567. [softmax_op.output[0], layer.bottom[1]],
  568. layer.bottom[0] + "_translator_autogen_xent")
  569. loss_op = core.CreateOperator(
  570. "AveragedLoss",
  571. xent_op.output[0],
  572. layer.top[0])
  573. return [softmax_op, xent_op, loss_op], []
  574. @TranslatorRegistry.Register("Accuracy")
  575. def TranslateAccuracy(layer, pretrained_blobs, is_test, **kwargs):
  576. caffe_op = BaseTranslate(layer, "Accuracy")
  577. if layer.accuracy_param.top_k != 1:
  578. AddArgument(caffe_op, "top_k", layer.accuracy_param.top_k)
  579. return caffe_op, []
  580. @TranslatorRegistry.Register("Concat")
  581. def TranslateConcat(layer, pretrained_blobs, is_test, **kwargs):
  582. caffe_op = BaseTranslate(layer, "Concat")
  583. caffe_op.output.extend(['_' + caffe_op.output[0] + '_dims'])
  584. AddArgument(caffe_op, "order", "NCHW")
  585. return caffe_op, []
  586. @TranslatorRegistry.Register("TanH")
  587. def TranslateTanH(layer, pretrained_blobs, is_test, **kwargs):
  588. caffe_op = BaseTranslate(layer, "Tanh")
  589. return caffe_op, []
  590. @TranslatorRegistry.Register("InstanceNorm")
  591. def TranslateInstanceNorm(layer, pretrained_blobs, is_test, **kwargs):
  592. caffe_op = BaseTranslate(layer, "InstanceNorm")
  593. output = caffe_op.output[0]
  594. weight = utils.NumpyArrayToCaffe2Tensor(
  595. pretrained_blobs[0].flatten(), output + '_w')
  596. bias = utils.NumpyArrayToCaffe2Tensor(
  597. pretrained_blobs[1].flatten(), output + '_b')
  598. caffe_op.input.extend([output + '_w', output + '_b'])
  599. AddArgument(caffe_op, "order", "NCHW")
  600. return caffe_op, [weight, bias]
  601. @TranslatorRegistry.Register("BatchNorm")
  602. def TranslateBatchNorm(layer, pretrained_blobs, is_test, **kwargs):
  603. caffe_op = BaseTranslate(layer, "SpatialBN")
  604. output = caffe_op.output[0]
  605. param = layer.batch_norm_param
  606. AddArgument(caffe_op, "is_test", is_test)
  607. AddArgument(caffe_op, "epsilon", param.eps)
  608. AddArgument(caffe_op, "order", "NCHW")
  609. caffe_op.input.extend(
  610. [output + "_scale",
  611. output + "_bias",
  612. output + "_mean",
  613. output + "_var"])
  614. if not is_test:
  615. caffe_op.output.extend(
  616. [output + "_mean",
  617. output + "_var",
  618. output + "_saved_mean",
  619. output + "_saved_var"])
  620. n_channels = pretrained_blobs[0].shape[0]
  621. if pretrained_blobs[2][0] != 0:
  622. mean = utils.NumpyArrayToCaffe2Tensor(
  623. (1. / pretrained_blobs[2][0]) * pretrained_blobs[0],
  624. output + '_mean')
  625. var = utils.NumpyArrayToCaffe2Tensor(
  626. (1. / pretrained_blobs[2][0]) * pretrained_blobs[1],
  627. output + '_var')
  628. else:
  629. raise RuntimeError("scalar is zero.")
  630. if len(pretrained_blobs) > 3:
  631. # IntelCaffe and NVCaffe uses fused BN+Scale,
  632. # three blobs for BN and two blobs for Scale,
  633. # so that the total number of blobs becomes five (including scale and bias).
  634. scale = utils.NumpyArrayToCaffe2Tensor(
  635. pretrained_blobs[3].flatten(),
  636. output + '_scale')
  637. bias = utils.NumpyArrayToCaffe2Tensor(
  638. pretrained_blobs[4].flatten(),
  639. output + '_bias')
  640. else:
  641. pretrained_blobs[2][0] = 1
  642. pretrained_blobs[2] = np.tile(pretrained_blobs[2], (n_channels, ))
  643. scale = utils.NumpyArrayToCaffe2Tensor(
  644. pretrained_blobs[2],
  645. output + '_scale')
  646. bias = utils.NumpyArrayToCaffe2Tensor(
  647. np.zeros_like(pretrained_blobs[2]),
  648. output + '_bias')
  649. return caffe_op, [scale, bias, mean, var]
  650. @TranslatorRegistry.Register("Eltwise")
  651. def TranslateElementWise(layer, pretrained_blobs, is_test, **kwargs):
  652. param = layer.eltwise_param
  653. # TODO(jiayq): if we have a protobuf that uses this, lift this constraint
  654. # and verify that we can correctly translate.
  655. if len(param.coeff) or param.operation != 1:
  656. raise RuntimeError("This eltwise layer is not yet supported.")
  657. caffe_op = BaseTranslate(layer, "Sum")
  658. return caffe_op, []
  659. @TranslatorRegistry.Register("Scale")
  660. def TranslateScale(layer, pretrained_blobs, is_test, **kwargs):
  661. mul_op = BaseTranslate(layer, "Mul")
  662. scale_param = layer.scale_param
  663. AddArgument(mul_op, "axis", scale_param.axis)
  664. AddArgument(mul_op, "broadcast", True)
  665. if len(mul_op.input) == 1:
  666. # the scale parameter is in pretrained blobs
  667. if scale_param.num_axes != 1:
  668. raise RuntimeError("This path has not been verified yet.")
  669. output = mul_op.output[0]
  670. mul_op_param = output + 'scale_w'
  671. mul_op.input.append(mul_op_param)
  672. weights = []
  673. weights.append(utils.NumpyArrayToCaffe2Tensor(
  674. pretrained_blobs[0].flatten(), mul_op_param))
  675. add_op = None
  676. if len(pretrained_blobs) == 1:
  677. # No bias-term in Scale layer
  678. pass
  679. elif len(pretrained_blobs) == 2:
  680. # Caffe Scale layer supports a bias term such that it computes
  681. # (scale_param * X + bias), whereas Caffe2 Mul op doesn't.
  682. # Include a separate Add op for the bias followed by Mul.
  683. add_op = copy.deepcopy(mul_op)
  684. add_op.type = "Add"
  685. add_op_param = output + 'scale_b'
  686. internal_blob = output + "_internal"
  687. del mul_op.output[:]
  688. mul_op.output.append(internal_blob)
  689. del add_op.input[:]
  690. add_op.input.append(internal_blob)
  691. add_op.input.append(add_op_param)
  692. weights.append(utils.NumpyArrayToCaffe2Tensor(
  693. pretrained_blobs[1].flatten(), add_op_param))
  694. else:
  695. raise RuntimeError("Unexpected number of pretrained blobs in Scale")
  696. caffe_ops = [mul_op]
  697. if add_op:
  698. caffe_ops.append(add_op)
  699. assert len(caffe_ops) == len(weights)
  700. return caffe_ops, weights
  701. elif len(mul_op.input) == 2:
  702. # TODO(jiayq): find a protobuf that uses this and verify.
  703. raise RuntimeError("This path has not been verified yet.")
  704. else:
  705. raise RuntimeError("Unexpected number of inputs.")
  706. @TranslatorRegistry.Register("Reshape")
  707. def TranslateReshape(layer, pretrained_blobs, is_test, **kwargs):
  708. caffe_op = BaseTranslate(layer, "Reshape")
  709. caffe_op.output.append("_" + caffe_op.input[0] + "_dims")
  710. reshape_param = layer.reshape_param
  711. AddArgument(caffe_op, 'shape', reshape_param.shape.dim)
  712. return caffe_op, []
  713. @TranslatorRegistry.Register("Flatten")
  714. def TranslateFlatten(layer, pretrained_blobs, is_test, **kwargs):
  715. param = layer.flatten_param
  716. if param.end_axis != -1:
  717. raise NotImplementedError("flatten_param.end_axis not supported yet.")
  718. if param.axis == 0:
  719. caffe_op = BaseTranslate(layer, "FlattenToVec")
  720. elif param.axis == 1:
  721. caffe_op = BaseTranslate(layer, "Flatten")
  722. else:
  723. # This could be a Reshape op, but dim size is not known here.
  724. raise NotImplementedError(
  725. "Not supported yet for flatten_param.axis {}.".format(param.axis))
  726. return caffe_op, []
  727. @TranslatorRegistry.Register("Sigmoid")
  728. def TranslateSigmoid(layer, pretrained_blobs, is_test, **kwargs):
  729. caffe_op = BaseTranslate(layer, "Sigmoid")
  730. return caffe_op, []
  731. @TranslatorRegistry.Register("ROIPooling")
  732. def TranslateROIPooling(layer, pretrained_blobs, is_test, **kwargs):
  733. caffe_op = BaseTranslate(layer, "RoIPool")
  734. AddArgument(caffe_op, "order", "NCHW")
  735. if is_test:
  736. AddArgument(caffe_op, "is_test", is_test)
  737. else:
  738. # Only used for gradient computation
  739. caffe_op.output.append(caffe_op.output[0] + '_argmaxes')
  740. param = layer.roi_pooling_param
  741. if param.HasField('pooled_h'):
  742. AddArgument(caffe_op, 'pooled_h', param.pooled_h)
  743. if param.HasField('pooled_w'):
  744. AddArgument(caffe_op, 'pooled_w', param.pooled_w)
  745. if param.HasField('spatial_scale'):
  746. AddArgument(caffe_op, 'spatial_scale', param.spatial_scale)
  747. return caffe_op, []
  748. @TranslatorRegistry.Register("PReLU")
  749. def TranslatePRelu(layer, pretrained_blobs, is_test, **kwargs):
  750. caffe_op = BaseTranslate(layer, "PRelu")
  751. output = caffe_op.output[0]
  752. caffe_op.input.extend([output + '_Slope'])
  753. slope = utils.NumpyArrayToCaffe2Tensor(pretrained_blobs[0], output + '_Slope')
  754. return caffe_op, [slope]
  755. @TranslatorRegistry.Register("Reduction")
  756. def TranslateReduction(layer, pretrained_blobs, is_test, **kwargs):
  757. param = layer.reduction_param
  758. if param.operation == caffe_pb2.ReductionParameter.SUM:
  759. caffe_op = BaseTranslate(layer, "ReduceBackSum")
  760. elif param.operation == caffe_pb2.ReductionParameter.MEAN:
  761. caffe_op = BaseTranslate(layer, "ReduceBackMean")
  762. else:
  763. raise NotImplementedError("Not yet supported")
  764. if param.axis > 0:
  765. # We can't figure out the number of dims to reduce from positive axis
  766. # for back reduction since the shape info is not known here.
  767. raise NotImplementedError("Not yet supported")
  768. num_reduce_dim = -param.axis
  769. AddArgument(caffe_op, "num_reduce_dim", num_reduce_dim)
  770. return caffe_op, []
  771. if __name__ == '__main__':
  772. parser = argparse.ArgumentParser(
  773. description="Utilitity to convert pretrained caffe models to Caffe2 models.")
  774. parser.add_argument("prototext", help="Caffe prototext.")
  775. parser.add_argument("caffemodel", help="Caffe trained model.")
  776. parser.add_argument("--init_net", help="Caffe2 initialization net.",
  777. default="init_net.pb")
  778. parser.add_argument("--predict_net", help="Caffe2 prediction net.",
  779. default="predict_net.pb")
  780. parser.add_argument("--remove_legacy_pad", help="Remove legacy pad \
  781. (Only works for nets with one input blob)",
  782. action="store_true",
  783. default=False)
  784. parser.add_argument("--input_dims", help="Dimension of input blob", nargs='+',
  785. type=int, default=[])
  786. args = parser.parse_args()
  787. caffenet = caffe_pb2.NetParameter()
  788. caffenet_pretrained = caffe_pb2.NetParameter()
  789. input_proto = args.prototext
  790. input_caffemodel = args.caffemodel
  791. output_init_net = args.init_net
  792. output_predict_net = args.predict_net
  793. with open(input_proto) as f:
  794. text_format.Merge(f.read(), caffenet)
  795. with open(input_caffemodel, 'rb') as f:
  796. caffenet_pretrained.ParseFromString(f.read())
  797. net, pretrained_params = TranslateModel(
  798. caffenet, caffenet_pretrained, is_test=True,
  799. remove_legacy_pad=args.remove_legacy_pad,
  800. input_dims=args.input_dims
  801. )
  802. # Assume there is one input and one output
  803. external_input = net.op[0].input[0]
  804. external_output = net.op[-1].output[0]
  805. net.external_input.extend([external_input])
  806. net.external_input.extend([param.name for param in pretrained_params.protos])
  807. net.external_output.extend([external_output])
  808. init_net = ConvertTensorProtosToInitNet(pretrained_params, external_input)
  809. with open(output_predict_net, 'wb') as f:
  810. f.write(net.SerializeToString())
  811. with open(output_predict_net + 'txt', 'w') as f:
  812. f.write(str(net))
  813. with open(output_init_net, 'wb') as f:
  814. f.write(init_net.SerializeToString())