| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699 |
- # Copyright (c) 2016-present, Facebook, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- ##############################################################################
- ## @package convnet_benchmarks
- # Module caffe2.experiments.python.convnet_benchmarks
- """
- Benchmark for common convnets.
- (NOTE: Numbers below prior with missing parameter=update step, TODO to update)
- Speed on Titan X, with 10 warmup steps and 10 main steps and with different
- versions of cudnn, are as follows (time reported below is per-batch time,
- forward / forward+backward):
- CuDNN V3 CuDNN v4
- AlexNet 32.5 / 108.0 27.4 / 90.1
- OverFeat 113.0 / 342.3 91.7 / 276.5
- Inception 134.5 / 485.8 125.7 / 450.6
- VGG (batch 64) 200.8 / 650.0 164.1 / 551.7
- Speed on Inception with varied batch sizes and CuDNN v4 is as follows:
- Batch Size Speed per batch Speed per image
- 16 22.8 / 72.7 1.43 / 4.54
- 32 38.0 / 127.5 1.19 / 3.98
- 64 67.2 / 233.6 1.05 / 3.65
- 128 125.7 / 450.6 0.98 / 3.52
- Speed on Tesla M40, which 10 warmup steps and 10 main steps and with cudnn
- v4, is as follows:
- AlexNet 68.4 / 218.1
- OverFeat 210.5 / 630.3
- Inception 300.2 / 1122.2
- VGG (batch 64) 405.8 / 1327.7
- (Note that these numbers involve a "full" backprop, i.e. the gradient
- with respect to the input image is also computed.)
- To get the numbers, simply run:
- for MODEL in AlexNet OverFeat Inception; do
- PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
- --batch_size 128 --model $MODEL --forward_only True
- done
- for MODEL in AlexNet OverFeat Inception; do
- PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
- --batch_size 128 --model $MODEL
- done
- PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
- --batch_size 64 --model VGGA --forward_only True
- PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
- --batch_size 64 --model VGGA
- for BS in 16 32 64 128; do
- PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
- --batch_size $BS --model Inception --forward_only True
- PYTHONPATH=../gen:$PYTHONPATH python convnet_benchmarks.py \
- --batch_size $BS --model Inception
- done
- Note that VGG needs to be run at batch 64 due to memory limit on the backward
- pass.
- """
- import argparse
- import time
- from caffe2.python import cnn, workspace, core
- import caffe2.python.SparseTransformer as SparseTransformer # type: ignore[import]
- def MLP(order):
- model = cnn.CNNModelHelper()
- d = 256
- depth = 20
- width = 3
- for i in range(depth):
- for j in range(width):
- current = "fc_{}_{}".format(i, j) if i > 0 else "data"
- next_ = "fc_{}_{}".format(i + 1, j)
- model.FC(
- current, next_,
- dim_in=d, dim_out=d,
- weight_init=model.XavierInit,
- bias_init=model.XavierInit)
- model.Sum(["fc_{}_{}".format(depth, j)
- for j in range(width)], ["sum"])
- model.FC("sum", "last",
- dim_in=d, dim_out=1000,
- weight_init=model.XavierInit,
- bias_init=model.XavierInit)
- xent = model.LabelCrossEntropy(["last", "label"], "xent")
- model.AveragedLoss(xent, "loss")
- return model, d
- def AlexNet(order):
- model = cnn.CNNModelHelper(order, name="alexnet",
- use_cudnn=True, cudnn_exhaustive_search=True)
- conv1 = model.Conv(
- "data",
- "conv1",
- 3,
- 64,
- 11,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- stride=4,
- pad=2
- )
- relu1 = model.Relu(conv1, "conv1")
- pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2)
- conv2 = model.Conv(
- pool1,
- "conv2",
- 64,
- 192,
- 5,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=2
- )
- relu2 = model.Relu(conv2, "conv2")
- pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2)
- conv3 = model.Conv(
- pool2,
- "conv3",
- 192,
- 384,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu3 = model.Relu(conv3, "conv3")
- conv4 = model.Conv(
- relu3,
- "conv4",
- 384,
- 256,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu4 = model.Relu(conv4, "conv4")
- conv5 = model.Conv(
- relu4,
- "conv5",
- 256,
- 256,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu5 = model.Relu(conv5, "conv5")
- pool5 = model.MaxPool(relu5, "pool5", kernel=3, stride=2)
- fc6 = model.FC(
- pool5, "fc6", 256 * 6 * 6, 4096, ('XavierFill', {}),
- ('ConstantFill', {})
- )
- relu6 = model.Relu(fc6, "fc6")
- fc7 = model.FC(
- relu6, "fc7", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
- )
- relu7 = model.Relu(fc7, "fc7")
- fc8 = model.FC(
- relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
- )
- pred = model.Softmax(fc8, "pred")
- xent = model.LabelCrossEntropy([pred, "label"], "xent")
- model.AveragedLoss(xent, "loss")
- return model, 224
- def OverFeat(order):
- model = cnn.CNNModelHelper(order, name="overfeat",
- use_cudnn=True, cudnn_exhaustive_search=True)
- conv1 = model.Conv(
- "data",
- "conv1",
- 3,
- 96,
- 11,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- stride=4
- )
- relu1 = model.Relu(conv1, "conv1")
- pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
- conv2 = model.Conv(
- pool1, "conv2", 96, 256, 5, ('XavierFill', {}), ('ConstantFill', {})
- )
- relu2 = model.Relu(conv2, "conv2")
- pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
- conv3 = model.Conv(
- pool2,
- "conv3",
- 256,
- 512,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu3 = model.Relu(conv3, "conv3")
- conv4 = model.Conv(
- relu3,
- "conv4",
- 512,
- 1024,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu4 = model.Relu(conv4, "conv4")
- conv5 = model.Conv(
- relu4,
- "conv5",
- 1024,
- 1024,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu5 = model.Relu(conv5, "conv5")
- pool5 = model.MaxPool(relu5, "pool5", kernel=2, stride=2)
- fc6 = model.FC(
- pool5, "fc6", 1024 * 6 * 6, 3072, ('XavierFill', {}),
- ('ConstantFill', {})
- )
- relu6 = model.Relu(fc6, "fc6")
- fc7 = model.FC(
- relu6, "fc7", 3072, 4096, ('XavierFill', {}), ('ConstantFill', {})
- )
- relu7 = model.Relu(fc7, "fc7")
- fc8 = model.FC(
- relu7, "fc8", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
- )
- pred = model.Softmax(fc8, "pred")
- xent = model.LabelCrossEntropy([pred, "label"], "xent")
- model.AveragedLoss(xent, "loss")
- return model, 231
- def VGGA(order):
- model = cnn.CNNModelHelper(order, name='vgg-a',
- use_cudnn=True, cudnn_exhaustive_search=True)
- conv1 = model.Conv(
- "data",
- "conv1",
- 3,
- 64,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu1 = model.Relu(conv1, "conv1")
- pool1 = model.MaxPool(relu1, "pool1", kernel=2, stride=2)
- conv2 = model.Conv(
- pool1,
- "conv2",
- 64,
- 128,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu2 = model.Relu(conv2, "conv2")
- pool2 = model.MaxPool(relu2, "pool2", kernel=2, stride=2)
- conv3 = model.Conv(
- pool2,
- "conv3",
- 128,
- 256,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu3 = model.Relu(conv3, "conv3")
- conv4 = model.Conv(
- relu3,
- "conv4",
- 256,
- 256,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu4 = model.Relu(conv4, "conv4")
- pool4 = model.MaxPool(relu4, "pool4", kernel=2, stride=2)
- conv5 = model.Conv(
- pool4,
- "conv5",
- 256,
- 512,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu5 = model.Relu(conv5, "conv5")
- conv6 = model.Conv(
- relu5,
- "conv6",
- 512,
- 512,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu6 = model.Relu(conv6, "conv6")
- pool6 = model.MaxPool(relu6, "pool6", kernel=2, stride=2)
- conv7 = model.Conv(
- pool6,
- "conv7",
- 512,
- 512,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu7 = model.Relu(conv7, "conv7")
- conv8 = model.Conv(
- relu7,
- "conv8",
- 512,
- 512,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu8 = model.Relu(conv8, "conv8")
- pool8 = model.MaxPool(relu8, "pool8", kernel=2, stride=2)
- fcix = model.FC(
- pool8, "fcix", 512 * 7 * 7, 4096, ('XavierFill', {}),
- ('ConstantFill', {})
- )
- reluix = model.Relu(fcix, "fcix")
- fcx = model.FC(
- reluix, "fcx", 4096, 4096, ('XavierFill', {}), ('ConstantFill', {})
- )
- relux = model.Relu(fcx, "fcx")
- fcxi = model.FC(
- relux, "fcxi", 4096, 1000, ('XavierFill', {}), ('ConstantFill', {})
- )
- pred = model.Softmax(fcxi, "pred")
- xent = model.LabelCrossEntropy([pred, "label"], "xent")
- model.AveragedLoss(xent, "loss")
- return model, 231
- def net_DAG_Builder(model):
- print("====================================================")
- print(" Start Building DAG ")
- print("====================================================")
- net_root = SparseTransformer.netbuilder(model)
- return net_root
- def _InceptionModule(
- model, input_blob, input_depth, output_name, conv1_depth, conv3_depths,
- conv5_depths, pool_depth
- ):
- # path 1: 1x1 conv
- conv1 = model.Conv(
- input_blob, output_name + ":conv1", input_depth, conv1_depth, 1,
- ('XavierFill', {}), ('ConstantFill', {})
- )
- conv1 = model.Relu(conv1, conv1)
- # path 2: 1x1 conv + 3x3 conv
- conv3_reduce = model.Conv(
- input_blob, output_name +
- ":conv3_reduce", input_depth, conv3_depths[0],
- 1, ('XavierFill', {}), ('ConstantFill', {})
- )
- conv3_reduce = model.Relu(conv3_reduce, conv3_reduce)
- conv3 = model.Conv(
- conv3_reduce,
- output_name + ":conv3",
- conv3_depths[0],
- conv3_depths[1],
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- conv3 = model.Relu(conv3, conv3)
- # path 3: 1x1 conv + 5x5 conv
- conv5_reduce = model.Conv(
- input_blob, output_name +
- ":conv5_reduce", input_depth, conv5_depths[0],
- 1, ('XavierFill', {}), ('ConstantFill', {})
- )
- conv5_reduce = model.Relu(conv5_reduce, conv5_reduce)
- conv5 = model.Conv(
- conv5_reduce,
- output_name + ":conv5",
- conv5_depths[0],
- conv5_depths[1],
- 5,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=2
- )
- conv5 = model.Relu(conv5, conv5)
- # path 4: pool + 1x1 conv
- pool = model.MaxPool(
- input_blob,
- output_name + ":pool",
- kernel=3,
- stride=1,
- pad=1
- )
- pool_proj = model.Conv(
- pool, output_name + ":pool_proj", input_depth, pool_depth, 1,
- ('XavierFill', {}), ('ConstantFill', {})
- )
- pool_proj = model.Relu(pool_proj, pool_proj)
- output = model.Concat([conv1, conv3, conv5, pool_proj], output_name)
- return output
- def Inception(order):
- model = cnn.CNNModelHelper(order, name="inception",
- use_cudnn=True, cudnn_exhaustive_search=True)
- conv1 = model.Conv(
- "data",
- "conv1",
- 3,
- 64,
- 7,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- stride=2,
- pad=3
- )
- relu1 = model.Relu(conv1, "conv1")
- pool1 = model.MaxPool(relu1, "pool1", kernel=3, stride=2, pad=1)
- conv2a = model.Conv(
- pool1, "conv2a", 64, 64, 1, ('XavierFill', {}), ('ConstantFill', {})
- )
- conv2a = model.Relu(conv2a, conv2a)
- conv2 = model.Conv(
- conv2a,
- "conv2",
- 64,
- 192,
- 3,
- ('XavierFill', {}),
- ('ConstantFill', {}),
- pad=1
- )
- relu2 = model.Relu(conv2, "conv2")
- pool2 = model.MaxPool(relu2, "pool2", kernel=3, stride=2, pad=1)
- # Inception modules
- inc3 = _InceptionModule(
- model, pool2, 192, "inc3", 64, [96, 128], [16, 32], 32
- )
- inc4 = _InceptionModule(
- model, inc3, 256, "inc4", 128, [128, 192], [32, 96], 64
- )
- pool5 = model.MaxPool(inc4, "pool5", kernel=3, stride=2, pad=1)
- inc5 = _InceptionModule(
- model, pool5, 480, "inc5", 192, [96, 208], [16, 48], 64
- )
- inc6 = _InceptionModule(
- model, inc5, 512, "inc6", 160, [112, 224], [24, 64], 64
- )
- inc7 = _InceptionModule(
- model, inc6, 512, "inc7", 128, [128, 256], [24, 64], 64
- )
- inc8 = _InceptionModule(
- model, inc7, 512, "inc8", 112, [144, 288], [32, 64], 64
- )
- inc9 = _InceptionModule(
- model, inc8, 528, "inc9", 256, [160, 320], [32, 128], 128
- )
- pool9 = model.MaxPool(inc9, "pool9", kernel=3, stride=2, pad=1)
- inc10 = _InceptionModule(
- model, pool9, 832, "inc10", 256, [160, 320], [32, 128], 128
- )
- inc11 = _InceptionModule(
- model, inc10, 832, "inc11", 384, [192, 384], [48, 128], 128
- )
- pool11 = model.AveragePool(inc11, "pool11", kernel=7, stride=1)
- fc = model.FC(
- pool11, "fc", 1024, 1000, ('XavierFill', {}), ('ConstantFill', {})
- )
- # It seems that Soumith's benchmark does not have softmax on top
- # for Inception. We will add it anyway so we can have a proper
- # backward pass.
- pred = model.Softmax(fc, "pred")
- xent = model.LabelCrossEntropy([pred, "label"], "xent")
- model.AveragedLoss(xent, "loss")
- return model, 224
- def AddInput(model, batch_size, db, db_type):
- """Adds the data input part."""
- data_uint8, label = model.TensorProtosDBInput(
- [], ["data_uint8", "label"], batch_size=batch_size,
- db=db, db_type=db_type
- )
- data = model.Cast(data_uint8, "data_nhwc", to=core.DataType.FLOAT)
- data = model.NHWC2NCHW(data, "data")
- data = model.Scale(data, data, scale=float(1. / 256))
- data = model.StopGradient(data, data)
- return data, label
- def AddParameterUpdate(model):
- """ Simple plain SGD update -- not tuned to actually train the models """
- ITER = model.Iter("iter")
- LR = model.LearningRate(
- ITER, "LR", base_lr=-1e-8, policy="step", stepsize=10000, gamma=0.999)
- ONE = model.param_init_net.ConstantFill([], "ONE", shape=[1], value=1.0)
- for param in model.params:
- param_grad = model.param_to_grad[param]
- model.WeightedSum([param, ONE, param_grad, LR], param)
- def Benchmark(model_gen, arg):
- model, input_size = model_gen(arg.order)
- model.Proto().type = arg.net_type
- model.Proto().num_workers = arg.num_workers
- # In order to be able to run everything without feeding more stuff, let's
- # add the data and label blobs to the parameter initialization net as well.
- if arg.order == "NCHW":
- input_shape = [arg.batch_size, 3, input_size, input_size]
- else:
- input_shape = [arg.batch_size, input_size, input_size, 3]
- if arg.model == "MLP":
- input_shape = [arg.batch_size, input_size]
- model.param_init_net.GaussianFill(
- [],
- "data",
- shape=input_shape,
- mean=0.0,
- std=1.0
- )
- model.param_init_net.UniformIntFill(
- [],
- "label",
- shape=[arg.batch_size, ],
- min=0,
- max=999
- )
- if arg.forward_only:
- print('{}: running forward only.'.format(arg.model))
- else:
- print('{}: running forward-backward.'.format(arg.model))
- model.AddGradientOperators(["loss"])
- AddParameterUpdate(model)
- if arg.order == 'NHWC':
- print(
- '==WARNING==\n'
- 'NHWC order with CuDNN may not be supported yet, so I might\n'
- 'exit suddenly.'
- )
- if not arg.cpu:
- model.param_init_net.RunAllOnGPU()
- model.net.RunAllOnGPU()
- if arg.dump_model:
- # Writes out the pbtxt for benchmarks on e.g. Android
- with open(
- "{0}_init_batch_{1}.pbtxt".format(arg.model, arg.batch_size), "w"
- ) as fid:
- fid.write(str(model.param_init_net.Proto()))
- with open("{0}.pbtxt".format(arg.model), "w") as fid:
- fid.write(str(model.net.Proto()))
- workspace.RunNetOnce(model.param_init_net)
- workspace.CreateNet(model.net)
- for i in range(arg.warmup_iterations):
- workspace.RunNet(model.net.Proto().name)
- plan = core.Plan("plan")
- plan.AddStep(core.ExecutionStep("run", model.net, arg.iterations))
- start = time.time()
- workspace.RunPlan(plan)
- print('Spent: {}'.format((time.time() - start) / arg.iterations))
- if arg.layer_wise_benchmark:
- print('Layer-wise benchmark.')
- workspace.BenchmarkNet(model.net.Proto().name, 1, arg.iterations, True)
- def GetArgumentParser():
- parser = argparse.ArgumentParser(description="Caffe2 benchmark.")
- parser.add_argument(
- "--batch_size",
- type=int,
- default=128,
- help="The batch size."
- )
- parser.add_argument("--model", type=str, help="The model to benchmark.")
- parser.add_argument(
- "--order",
- type=str,
- default="NCHW",
- help="The order to evaluate."
- )
- parser.add_argument(
- "--cudnn_ws",
- type=int,
- default=-1,
- help="The cudnn workspace size."
- )
- parser.add_argument(
- "--iterations",
- type=int,
- default=10,
- help="Number of iterations to run the network."
- )
- parser.add_argument(
- "--warmup_iterations",
- type=int,
- default=10,
- help="Number of warm-up iterations before benchmarking."
- )
- parser.add_argument(
- "--forward_only",
- action='store_true',
- help="If set, only run the forward pass."
- )
- parser.add_argument(
- "--layer_wise_benchmark",
- action='store_true',
- help="If True, run the layer-wise benchmark as well."
- )
- parser.add_argument(
- "--cpu",
- action='store_true',
- help="If True, run testing on CPU instead of GPU."
- )
- parser.add_argument(
- "--dump_model",
- action='store_true',
- help="If True, dump the model prototxts to disk."
- )
- parser.add_argument("--net_type", type=str, default="dag")
- parser.add_argument("--num_workers", type=int, default=2)
- return parser
- if __name__ == '__main__':
- args = GetArgumentParser().parse_args()
- if (
- not args.batch_size or not args.model or not args.order or
- not args.cudnn_ws
- ):
- GetArgumentParser().print_help()
- workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
- model_map = {
- 'AlexNet': AlexNet,
- 'OverFeat': OverFeat,
- 'VGGA': VGGA,
- 'Inception': Inception,
- 'MLP': MLP,
- }
- Benchmark(model_map[args.model], args)
|