| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124 |
- # @package homotopy_weight
- # Module caffe2.fb.python.layers.homotopy_weight
- from caffe2.python import core, schema
- from caffe2.python.layers.layers import ModelLayer
- import numpy as np
- import logging
- logger = logging.getLogger(__name__)
- '''
- Homotopy Weighting between two weights x, y by doing:
- alpha x + beta y
- where alpha is a decreasing scalar parameter ranging from [min, max] (default,
- [0, 1]), and alpha + beta = max + min, which means that beta is increasing in
- the range [min, max];
- Homotopy methods first solves an "easy" problem (one to which the solution is
- well known), and is gradually transformed into the target problem
- '''
- class HomotopyWeight(ModelLayer):
- def __init__(
- self,
- model,
- input_record,
- name='homotopy_weight',
- min_weight=0.,
- max_weight=1.,
- half_life=1e6,
- quad_life=3e6,
- atomic_iter=None,
- **kwargs
- ):
- super(HomotopyWeight,
- self).__init__(model, name, input_record, **kwargs)
- self.output_schema = schema.Scalar(
- np.float32, self.get_next_blob_reference('homotopy_weight')
- )
- data = self.input_record.field_blobs()
- assert len(data) == 2
- self.x = data[0]
- self.y = data[1]
- # TODO: currently model building does not have access to iter counter or
- # learning rate; it's added at optimization time;
- self.use_external_iter = (atomic_iter is not None)
- self.atomic_iter = (
- atomic_iter if self.use_external_iter else self.create_atomic_iter()
- )
- # to map lr to [min, max]; alpha = scale * lr + offset
- assert max_weight > min_weight
- self.scale = float(max_weight - min_weight)
- self.offset = self.model.add_global_constant(
- '%s_offset_1dfloat' % self.name, float(min_weight)
- )
- self.gamma, self.power = self.solve_inv_lr_params(half_life, quad_life)
- def solve_inv_lr_params(self, half_life, quad_life):
- # ensure that the gamma, power is solvable
- assert half_life > 0
- # convex monotonically decreasing
- assert quad_life > 2 * half_life
- t = float(quad_life) / float(half_life)
- x = t * (1.0 + np.sqrt(2.0)) / 2.0 - np.sqrt(2.0)
- gamma = (x - 1.0) / float(half_life)
- power = np.log(2.0) / np.log(x)
- logger.info(
- 'homotopy_weighting: found lr param: gamma=%g, power=%g' %
- (gamma, power)
- )
- return gamma, power
- def create_atomic_iter(self):
- self.mutex = self.create_param(
- param_name=('%s_mutex' % self.name),
- shape=None,
- initializer=('CreateMutex', ),
- optimizer=self.model.NoOptim,
- )
- self.atomic_iter = self.create_param(
- param_name=('%s_atomic_iter' % self.name),
- shape=[1],
- initializer=(
- 'ConstantFill', {
- 'value': 0,
- 'dtype': core.DataType.INT64
- }
- ),
- optimizer=self.model.NoOptim,
- )
- return self.atomic_iter
- def update_weight(self, net):
- alpha = net.NextScopedBlob('alpha')
- beta = net.NextScopedBlob('beta')
- lr = net.NextScopedBlob('lr')
- comp_lr = net.NextScopedBlob('complementary_lr')
- scaled_lr = net.NextScopedBlob('scaled_lr')
- scaled_comp_lr = net.NextScopedBlob('scaled_complementary_lr')
- if not self.use_external_iter:
- net.AtomicIter([self.mutex, self.atomic_iter], [self.atomic_iter])
- net.LearningRate(
- [self.atomic_iter],
- [lr],
- policy='inv',
- gamma=self.gamma,
- power=self.power,
- base_lr=1.0,
- )
- net.Sub([self.model.global_constants['ONE'], lr], [comp_lr])
- net.Scale([lr], [scaled_lr], scale=self.scale)
- net.Scale([comp_lr], [scaled_comp_lr], scale=self.scale)
- net.Add([scaled_lr, self.offset], [alpha])
- net.Add([scaled_comp_lr, self.offset], [beta])
- return alpha, beta
- def add_ops(self, net):
- alpha, beta = self.update_weight(net)
- # alpha x + beta y
- net.WeightedSum([self.x, alpha, self.y, beta], self.output_schema())
|