| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434 |
- ## @package resnet
- # Module caffe2.python.models.resnet
- from caffe2.python import brew
- import logging
- '''
- Utility for creating ResNe(X)t
- "Deep Residual Learning for Image Recognition" by He, Zhang et. al. 2015
- "Aggregated Residual Transformations for Deep Neural Networks" by Xie et. al. 2016
- '''
- class ResNetBuilder():
- '''
- Helper class for constructing residual blocks.
- '''
- def __init__(
- self,
- model,
- prev_blob,
- no_bias,
- is_test,
- bn_epsilon=1e-5,
- bn_momentum=0.9,
- ):
- self.model = model
- self.comp_count = 0
- self.comp_idx = 0
- self.prev_blob = prev_blob
- self.is_test = is_test
- self.bn_epsilon = bn_epsilon
- self.bn_momentum = bn_momentum
- self.no_bias = 1 if no_bias else 0
- def add_conv(
- self,
- in_filters,
- out_filters,
- kernel,
- stride=1,
- group=1,
- pad=0,
- ):
- self.comp_idx += 1
- self.prev_blob = brew.conv(
- self.model,
- self.prev_blob,
- 'comp_%d_conv_%d' % (self.comp_count, self.comp_idx),
- in_filters,
- out_filters,
- weight_init=("MSRAFill", {}),
- kernel=kernel,
- stride=stride,
- group=group,
- pad=pad,
- no_bias=self.no_bias,
- )
- return self.prev_blob
- def add_relu(self):
- self.prev_blob = brew.relu(
- self.model,
- self.prev_blob,
- self.prev_blob, # in-place
- )
- return self.prev_blob
- def add_spatial_bn(self, num_filters):
- self.prev_blob = brew.spatial_bn(
- self.model,
- self.prev_blob,
- 'comp_%d_spatbn_%d' % (self.comp_count, self.comp_idx),
- num_filters,
- epsilon=self.bn_epsilon,
- momentum=self.bn_momentum,
- is_test=self.is_test,
- )
- return self.prev_blob
- '''
- Add a "bottleneck" component as described in He et. al. Figure 3 (right)
- '''
- def add_bottleneck(
- self,
- input_filters, # num of feature maps from preceding layer
- base_filters, # num of filters internally in the component
- output_filters, # num of feature maps to output
- stride=1,
- group=1,
- spatial_batch_norm=True,
- ):
- self.comp_idx = 0
- shortcut_blob = self.prev_blob
- # 1x1
- self.add_conv(
- input_filters,
- base_filters,
- kernel=1,
- stride=1,
- )
- if spatial_batch_norm:
- self.add_spatial_bn(base_filters)
- self.add_relu()
- # 3x3 (note the pad, required for keeping dimensions)
- self.add_conv(
- base_filters,
- base_filters,
- kernel=3,
- stride=stride,
- group=group,
- pad=1,
- )
- if spatial_batch_norm:
- self.add_spatial_bn(base_filters)
- self.add_relu()
- # 1x1
- last_conv = self.add_conv(base_filters, output_filters, kernel=1)
- if spatial_batch_norm:
- last_conv = self.add_spatial_bn(output_filters)
- # Summation with input signal (shortcut)
- # When the number of feature maps mismatch between the input
- # and output (this usually happens when the residual stage
- # changes), we need to do a projection for the short cut
- if output_filters != input_filters:
- shortcut_blob = brew.conv(
- self.model,
- shortcut_blob,
- 'shortcut_projection_%d' % self.comp_count,
- input_filters,
- output_filters,
- weight_init=("MSRAFill", {}),
- kernel=1,
- stride=stride,
- no_bias=self.no_bias,
- )
- if spatial_batch_norm:
- shortcut_blob = brew.spatial_bn(
- self.model,
- shortcut_blob,
- 'shortcut_projection_%d_spatbn' % self.comp_count,
- output_filters,
- epsilon=self.bn_epsilon,
- momentum=self.bn_momentum,
- is_test=self.is_test,
- )
- self.prev_blob = brew.sum(
- self.model, [shortcut_blob, last_conv],
- 'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
- )
- self.comp_idx += 1
- self.add_relu()
- # Keep track of number of high level components if this ResNetBuilder
- self.comp_count += 1
- return output_filters
- def add_simple_block(
- self,
- input_filters,
- num_filters,
- down_sampling=False,
- spatial_batch_norm=True
- ):
- self.comp_idx = 0
- shortcut_blob = self.prev_blob
- # 3x3
- self.add_conv(
- input_filters,
- num_filters,
- kernel=3,
- stride=(1 if down_sampling is False else 2),
- pad=1
- )
- if spatial_batch_norm:
- self.add_spatial_bn(num_filters)
- self.add_relu()
- last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
- if spatial_batch_norm:
- last_conv = self.add_spatial_bn(num_filters)
- # Increase of dimensions, need a projection for the shortcut
- if (num_filters != input_filters):
- shortcut_blob = brew.conv(
- self.model,
- shortcut_blob,
- 'shortcut_projection_%d' % self.comp_count,
- input_filters,
- num_filters,
- weight_init=("MSRAFill", {}),
- kernel=1,
- stride=(1 if down_sampling is False else 2),
- no_bias=self.no_bias,
- )
- if spatial_batch_norm:
- shortcut_blob = brew.spatial_bn(
- self.model,
- shortcut_blob,
- 'shortcut_projection_%d_spatbn' % self.comp_count,
- num_filters,
- epsilon=1e-3,
- is_test=self.is_test,
- )
- self.prev_blob = brew.sum(
- self.model, [shortcut_blob, last_conv],
- 'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
- )
- self.comp_idx += 1
- self.add_relu()
- # Keep track of number of high level components if this ResNetBuilder
- self.comp_count += 1
- def create_resnet_32x32(
- model, data, num_input_channels, num_groups, num_labels, is_test=False
- ):
- '''
- Create residual net for smaller images (sec 4.2 of He et. al (2015))
- num_groups = 'n' in the paper
- '''
- # conv1 + maxpool
- brew.conv(
- model, data, 'conv1', num_input_channels, 16, kernel=3, stride=1
- )
- brew.spatial_bn(
- model, 'conv1', 'conv1_spatbn', 16, epsilon=1e-3, is_test=is_test
- )
- brew.relu(model, 'conv1_spatbn', 'relu1')
- # Number of blocks as described in sec 4.2
- filters = [16, 32, 64]
- builder = ResNetBuilder(model, 'relu1', no_bias=0, is_test=is_test)
- prev_filters = 16
- for groupidx in range(0, 3):
- for blockidx in range(0, 2 * num_groups):
- builder.add_simple_block(
- prev_filters if blockidx == 0 else filters[groupidx],
- filters[groupidx],
- down_sampling=(True if blockidx == 0 and
- groupidx > 0 else False))
- prev_filters = filters[groupidx]
- # Final layers
- brew.average_pool(
- model, builder.prev_blob, 'final_avg', kernel=8, stride=1
- )
- brew.fc(model, 'final_avg', 'last_out', 64, num_labels)
- softmax = brew.softmax(model, 'last_out', 'softmax')
- return softmax
- RESNEXT_BLOCK_CONFIG = {
- 18: (2, 2, 2, 2),
- 34: (3, 4, 6, 3),
- 50: (3, 4, 6, 3),
- 101: (3, 4, 23, 3),
- 152: (3, 8, 36, 3),
- 200: (3, 24, 36, 3),
- }
- RESNEXT_STRIDES = [1, 2, 2, 2]
- logging.basicConfig()
- log = logging.getLogger("resnext_builder")
- log.setLevel(logging.DEBUG)
- # The conv1 and final_avg kernel/stride args provide a basic mechanism for
- # adapting resnet50 for different sizes of input images.
- def create_resnext(
- model,
- data,
- num_input_channels,
- num_labels,
- num_layers,
- num_groups,
- num_width_per_group,
- label=None,
- is_test=False,
- no_loss=False,
- no_bias=1,
- conv1_kernel=7,
- conv1_stride=2,
- final_avg_kernel=7,
- log=None,
- bn_epsilon=1e-5,
- bn_momentum=0.9,
- ):
- if num_layers not in RESNEXT_BLOCK_CONFIG:
- log.error("{}-layer is invalid for resnext config".format(num_layers))
- num_blocks = RESNEXT_BLOCK_CONFIG[num_layers]
- strides = RESNEXT_STRIDES
- num_filters = [64, 256, 512, 1024, 2048]
- if num_layers in [18, 34]:
- num_filters = [64, 64, 128, 256, 512]
- # the number of features before the last FC layer
- num_features = num_filters[-1]
- # conv1 + maxpool
- conv_blob = brew.conv(
- model,
- data,
- 'conv1',
- num_input_channels,
- num_filters[0],
- weight_init=("MSRAFill", {}),
- kernel=conv1_kernel,
- stride=conv1_stride,
- pad=3,
- no_bias=no_bias
- )
- bn_blob = brew.spatial_bn(
- model,
- conv_blob,
- 'conv1_spatbn_relu',
- num_filters[0],
- epsilon=bn_epsilon,
- momentum=bn_momentum,
- is_test=is_test
- )
- relu_blob = brew.relu(model, bn_blob, bn_blob)
- max_pool = brew.max_pool(model, relu_blob, 'pool1', kernel=3, stride=2, pad=1)
- # Residual blocks...
- builder = ResNetBuilder(model, max_pool, no_bias=no_bias,
- is_test=is_test, bn_epsilon=1e-5, bn_momentum=0.9)
- inner_dim = num_groups * num_width_per_group
- # 4 different kinds of residual blocks
- for residual_idx in range(4):
- residual_num = num_blocks[residual_idx]
- residual_stride = strides[residual_idx]
- dim_in = num_filters[residual_idx]
- for blk_idx in range(residual_num):
- dim_in = builder.add_bottleneck(
- dim_in,
- inner_dim,
- num_filters[residual_idx + 1], # dim out
- stride=residual_stride if blk_idx == 0 else 1,
- group=num_groups,
- )
- inner_dim *= 2
- # Final layers
- final_avg = brew.average_pool(
- model,
- builder.prev_blob,
- 'final_avg',
- kernel=final_avg_kernel,
- stride=1,
- global_pooling=True,
- )
- # Final dimension of the "image" is reduced to 7x7
- last_out = brew.fc(
- model, final_avg, 'last_out_L{}'.format(num_labels), num_features, num_labels
- )
- if no_loss:
- return last_out
- # If we create model for training, use softmax-with-loss
- if (label is not None):
- (softmax, loss) = model.SoftmaxWithLoss(
- [last_out, label],
- ["softmax", "loss"],
- )
- return (softmax, loss)
- else:
- # For inference, we just return softmax
- return brew.softmax(model, last_out, "softmax")
- # The conv1 and final_avg kernel/stride args provide a basic mechanism for
- # adapting resnet50 for different sizes of input images.
- def create_resnet50(
- model,
- data,
- num_input_channels,
- num_labels,
- label=None,
- is_test=False,
- no_loss=False,
- no_bias=0,
- conv1_kernel=7,
- conv1_stride=2,
- final_avg_kernel=7,
- ):
- # resnet50 is a special case for ResNeXt50-1x64d
- return create_resnext(
- model,
- data,
- num_input_channels,
- num_labels,
- num_layers=50,
- num_groups=1,
- num_width_per_group=64,
- label=label,
- is_test=is_test,
- no_loss=no_loss,
- no_bias=no_bias,
- conv1_kernel=conv1_kernel,
- conv1_stride=conv1_stride,
- final_avg_kernel=final_avg_kernel,
- )
|