conv.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. ## @package conv
  2. # Module caffe2.python.layers.conv
  3. from caffe2.python import schema
  4. from caffe2.python.layers.layers import (
  5. ModelLayer,
  6. )
  7. import numpy as np
  8. class Conv(ModelLayer):
  9. """
  10. Convolutional layer
  11. Input:
  12. - input_record: at least has the shape info of C (num_channels)
  13. - output_dim: number of convolutional filters
  14. - kernel_h, kernel_w: kernel size for h and w
  15. - stride_h, stride_w: stride for h and w
  16. - pad_b, pad_l, pad_r, pad_t: padding sizes, if stride == 1,
  17. 'None' value will do auto padding
  18. - order: either 'NHWC' or 'NCHW'
  19. """
  20. def __init__(self, model, input_record, output_dim, kernel_h, kernel_w,
  21. stride_h, stride_w, pad_b=None, pad_l=None, pad_r=None,
  22. pad_t=None, order='NHWC', kernel_init=None, bias_init=None,
  23. kernel_optim=None, bias_optim=None,
  24. name='conv', **kwargs):
  25. super(Conv, self).__init__(model, name, input_record, **kwargs)
  26. assert isinstance(input_record, schema.Scalar), "Incorrect input type"
  27. # input num_channels (C) is needed
  28. input_dims = input_record.field_type().shape
  29. assert (kernel_h > 0 and isinstance(kernel_h, int)), (
  30. "kernel_h should be positive integer")
  31. assert (kernel_w > 0 and isinstance(kernel_w, int)), (
  32. "kernel_w should be positive integer")
  33. self.kernel_h = kernel_h
  34. self.kernel_w = kernel_w
  35. assert (stride_h > 0 and isinstance(stride_h, int)), (
  36. "stride_h should be positive integer")
  37. assert (stride_w > 0 and isinstance(stride_w, int)), (
  38. "stride_w should be positive integer")
  39. self.stride_h = stride_h
  40. self.stride_w = stride_w
  41. # output_dim calculation (http://cs231n.github.io/convolutional-networks/)
  42. # output_dim_w = (input_dim_w - kernel_w + pad_r + pad_l) / stride_w + 1
  43. # so, do auto_padding requires
  44. # pad_r, pad_l = [(input_dim_w - 1) * stride_w - input_dim_w + kernel_w] / 2
  45. # similair for pad_t and pad_b to auto pad kernel_h
  46. # here we only do auto padding for stride = 1 case
  47. if stride_h == 1:
  48. pad_t = int((kernel_h - 1) / 2) if pad_t is None else pad_t
  49. pad_b = int((kernel_h - 1) / 2) if pad_b is None else pad_b
  50. else:
  51. pad_t = 0 if pad_t is None else pad_t
  52. pad_b = 0 if pad_b is None else pad_b
  53. if stride_w == 1:
  54. pad_r = int((kernel_w - 1) / 2) if pad_r is None else pad_r
  55. pad_l = int((kernel_w - 1) / 2) if pad_l is None else pad_l
  56. else:
  57. pad_r = 0 if pad_r is None else pad_r
  58. pad_l = 0 if pad_l is None else pad_l
  59. assert (pad_t >= 0 and isinstance(pad_t, int)), "pad_t should be int >= 0"
  60. assert (pad_b >= 0 and isinstance(pad_b, int)), "pad_b should be int >= 0"
  61. assert (pad_r >= 0 and isinstance(pad_r, int)), "pad_r should be int >= 0"
  62. assert (pad_l >= 0 and isinstance(pad_l, int)), "pad_l should be int >= 0"
  63. self.pad_t = pad_t
  64. self.pad_b = pad_b
  65. self.pad_r = pad_r
  66. self.pad_l = pad_l
  67. assert order in ['NHWC', 'NCHW'], "order should either 'NHWC' or 'NCHW'"
  68. self.order = order
  69. if order == 'NHWC':
  70. input_c = input_dims[-1]
  71. kernel_shape = [output_dim, kernel_h, kernel_w, input_c]
  72. elif order == 'NCHW':
  73. input_c = input_dims[0]
  74. kernel_shape = [output_dim, input_c, kernel_h, kernel_w]
  75. assert input_c > 0, (
  76. "Number of input channels in conv parameters should be positive")
  77. kernel_init = kernel_init if kernel_init else (
  78. 'XavierFill', {}
  79. )
  80. bias_init = bias_init if bias_init else (
  81. 'ConstantFill', {'value': 0.0}
  82. )
  83. self.kernel = self.create_param(
  84. param_name='conv_kernel',
  85. shape=kernel_shape,
  86. initializer=kernel_init,
  87. optimizer=kernel_optim,
  88. )
  89. self.bias = self.create_param(
  90. param_name='conv_bias',
  91. shape=[output_dim],
  92. initializer=bias_init,
  93. optimizer=bias_optim,
  94. )
  95. # the output_schema only has the num of output channels
  96. # output_h and output_w would be inferred internally
  97. self.output_schema = schema.Scalar(
  98. (np.float32, (output_dim,)),
  99. self.get_next_blob_reference('output')
  100. )
  101. def add_ops(self, net):
  102. net.Conv(
  103. self.input_record.field_blobs() + [self.kernel, self.bias],
  104. self.output_schema.field_blobs(),
  105. kernel_h=self.kernel_h,
  106. kernel_w=self.kernel_w,
  107. stride_h=self.stride_h,
  108. stride_w=self.stride_w,
  109. pad_t=self.pad_t,
  110. pad_l=self.pad_l,
  111. pad_b=self.pad_b,
  112. pad_r=self.pad_r,
  113. order=self.order
  114. )