| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726 |
- # Module caffe2.python.examples.resnet50_trainer
- import argparse
- import logging
- import numpy as np
- import time
- import os
- from caffe2.python import core, workspace, experiment_util, data_parallel_model
- from caffe2.python import dyndep, optimizer
- from caffe2.python import timeout_guard, model_helper, brew
- from caffe2.proto import caffe2_pb2
- import caffe2.python.models.resnet as resnet
- import caffe2.python.models.shufflenet as shufflenet
- from caffe2.python.modeling.initializers import Initializer, PseudoFP16Initializer
- import caffe2.python.predictor.predictor_exporter as pred_exp
- import caffe2.python.predictor.predictor_py_utils as pred_utils
- from caffe2.python.predictor_constants import predictor_constants
- '''
- Parallelized multi-GPU distributed trainer for Resne(X)t & Shufflenet.
- Can be used to train on imagenet data, for example.
- The default parameters can train a standard Resnet-50 (1x64d), and parameters
- can be provided to train ResNe(X)t models (e.g., ResNeXt-101 32x4d).
- To run the trainer in single-machine multi-gpu mode by setting num_shards = 1.
- To run the trainer in multi-machine multi-gpu mode with M machines,
- run the same program on all machines, specifying num_shards = M, and
- shard_id = a unique integer in the set [0, M-1].
- For rendezvous (the trainer processes have to know about each other),
- you can either use a directory path that is visible to all processes
- (e.g. NFS directory), or use a Redis instance. Use the former by
- passing the `file_store_path` argument. Use the latter by passing the
- `redis_host` and `redis_port` arguments.
- '''
- logging.basicConfig()
- log = logging.getLogger("Imagenet_trainer")
- log.setLevel(logging.DEBUG)
- dyndep.InitOpsLibrary('@/caffe2/caffe2/distributed:file_store_handler_ops')
- dyndep.InitOpsLibrary('@/caffe2/caffe2/distributed:redis_store_handler_ops')
- def AddImageInput(
- model,
- reader,
- batch_size,
- img_size,
- dtype,
- is_test,
- mean_per_channel=None,
- std_per_channel=None,
- ):
- '''
- The image input operator loads image and label data from the reader and
- applies transformations to the images (random cropping, mirroring, ...).
- '''
- data, label = brew.image_input(
- model,
- reader, ["data", "label"],
- batch_size=batch_size,
- output_type=dtype,
- use_gpu_transform=True if core.IsGPUDeviceType(model._device_type) else False,
- use_caffe_datum=True,
- mean_per_channel=mean_per_channel,
- std_per_channel=std_per_channel,
- # mean_per_channel takes precedence over mean
- mean=128.,
- std=128.,
- scale=256,
- crop=img_size,
- mirror=1,
- is_test=is_test,
- )
- data = model.StopGradient(data, data)
- def AddNullInput(model, reader, batch_size, img_size, dtype):
- '''
- The null input function uses a gaussian fill operator to emulate real image
- input. A label blob is hardcoded to a single value. This is useful if you
- want to test compute throughput or don't have a dataset available.
- '''
- suffix = "_fp16" if dtype == "float16" else ""
- model.param_init_net.GaussianFill(
- [],
- ["data" + suffix],
- shape=[batch_size, 3, img_size, img_size],
- )
- if dtype == "float16":
- model.param_init_net.FloatToHalf("data" + suffix, "data")
- model.param_init_net.ConstantFill(
- [],
- ["label"],
- shape=[batch_size],
- value=1,
- dtype=core.DataType.INT32,
- )
- def SaveModel(args, train_model, epoch, use_ideep):
- prefix = "[]_{}".format(train_model._device_prefix, train_model._devices[0])
- predictor_export_meta = pred_exp.PredictorExportMeta(
- predict_net=train_model.net.Proto(),
- parameters=data_parallel_model.GetCheckpointParams(train_model),
- inputs=[prefix + "/data"],
- outputs=[prefix + "/softmax"],
- shapes={
- prefix + "/softmax": (1, args.num_labels),
- prefix + "/data": (args.num_channels, args.image_size, args.image_size)
- }
- )
- # save the train_model for the current epoch
- model_path = "%s/%s_%d.mdl" % (
- args.file_store_path,
- args.save_model_name,
- epoch,
- )
- # set db_type to be "minidb" instead of "log_file_db", which breaks
- # the serialization in save_to_db. Need to switch back to log_file_db
- # after migration
- pred_exp.save_to_db(
- db_type="minidb",
- db_destination=model_path,
- predictor_export_meta=predictor_export_meta,
- use_ideep=use_ideep
- )
- def LoadModel(path, model, use_ideep):
- '''
- Load pretrained model from file
- '''
- log.info("Loading path: {}".format(path))
- meta_net_def = pred_exp.load_from_db(path, 'minidb')
- init_net = core.Net(pred_utils.GetNet(
- meta_net_def, predictor_constants.GLOBAL_INIT_NET_TYPE))
- predict_init_net = core.Net(pred_utils.GetNet(
- meta_net_def, predictor_constants.PREDICT_INIT_NET_TYPE))
- if use_ideep:
- predict_init_net.RunAllOnIDEEP()
- else:
- predict_init_net.RunAllOnGPU()
- if use_ideep:
- init_net.RunAllOnIDEEP()
- else:
- init_net.RunAllOnGPU()
- assert workspace.RunNetOnce(predict_init_net)
- assert workspace.RunNetOnce(init_net)
- # Hack: fix iteration counter which is in CUDA context after load model
- itercnt = workspace.FetchBlob("optimizer_iteration")
- workspace.FeedBlob(
- "optimizer_iteration",
- itercnt,
- device_option=core.DeviceOption(caffe2_pb2.CPU, 0)
- )
- def RunEpoch(
- args,
- epoch,
- train_model,
- test_model,
- total_batch_size,
- num_shards,
- expname,
- explog,
- ):
- '''
- Run one epoch of the trainer.
- TODO: add checkpointing here.
- '''
- # TODO: add loading from checkpoint
- log.info("Starting epoch {}/{}".format(epoch, args.num_epochs))
- epoch_iters = int(args.epoch_size / total_batch_size / num_shards)
- test_epoch_iters = int(args.test_epoch_size / total_batch_size / num_shards)
- for i in range(epoch_iters):
- # This timeout is required (temporarily) since CUDA-NCCL
- # operators might deadlock when synchronizing between GPUs.
- timeout = args.first_iter_timeout if i == 0 else args.timeout
- with timeout_guard.CompleteInTimeOrDie(timeout):
- t1 = time.time()
- workspace.RunNet(train_model.net.Proto().name)
- t2 = time.time()
- dt = t2 - t1
- fmt = "Finished iteration {}/{} of epoch {} ({:.2f} images/sec)"
- log.info(fmt.format(i + 1, epoch_iters, epoch, total_batch_size / dt))
- prefix = "{}_{}".format(
- train_model._device_prefix,
- train_model._devices[0])
- accuracy = workspace.FetchBlob(prefix + '/accuracy')
- loss = workspace.FetchBlob(prefix + '/loss')
- train_fmt = "Training loss: {}, accuracy: {}"
- log.info(train_fmt.format(loss, accuracy))
- num_images = epoch * epoch_iters * total_batch_size
- prefix = "{}_{}".format(train_model._device_prefix, train_model._devices[0])
- accuracy = workspace.FetchBlob(prefix + '/accuracy')
- loss = workspace.FetchBlob(prefix + '/loss')
- learning_rate = workspace.FetchBlob(
- data_parallel_model.GetLearningRateBlobNames(train_model)[0]
- )
- test_accuracy = 0
- test_accuracy_top5 = 0
- if test_model is not None:
- # Run 100 iters of testing
- ntests = 0
- for _ in range(test_epoch_iters):
- workspace.RunNet(test_model.net.Proto().name)
- for g in test_model._devices:
- test_accuracy += np.asscalar(workspace.FetchBlob(
- "{}_{}".format(test_model._device_prefix, g) + '/accuracy'
- ))
- test_accuracy_top5 += np.asscalar(workspace.FetchBlob(
- "{}_{}".format(test_model._device_prefix, g) + '/accuracy_top5'
- ))
- ntests += 1
- test_accuracy /= ntests
- test_accuracy_top5 /= ntests
- else:
- test_accuracy = (-1)
- test_accuracy_top5 = (-1)
- explog.log(
- input_count=num_images,
- batch_count=(i + epoch * epoch_iters),
- additional_values={
- 'accuracy': accuracy,
- 'loss': loss,
- 'learning_rate': learning_rate,
- 'epoch': epoch,
- 'top1_test_accuracy': test_accuracy,
- 'top5_test_accuracy': test_accuracy_top5,
- }
- )
- assert loss < 40, "Exploded gradients :("
- # TODO: add checkpointing
- return epoch + 1
- def Train(args):
- if args.model == "resnext":
- model_name = "resnext" + str(args.num_layers)
- elif args.model == "shufflenet":
- model_name = "shufflenet"
- # Either use specified device list or generate one
- if args.gpus is not None:
- gpus = [int(x) for x in args.gpus.split(',')]
- num_gpus = len(gpus)
- else:
- gpus = list(range(args.num_gpus))
- num_gpus = args.num_gpus
- log.info("Running on GPUs: {}".format(gpus))
- # Verify valid batch size
- total_batch_size = args.batch_size
- batch_per_device = total_batch_size // num_gpus
- assert \
- total_batch_size % num_gpus == 0, \
- "Number of GPUs must divide batch size"
- # Verify valid image mean/std per channel
- if args.image_mean_per_channel:
- assert \
- len(args.image_mean_per_channel) == args.num_channels, \
- "The number of channels of image mean doesn't match input"
- if args.image_std_per_channel:
- assert \
- len(args.image_std_per_channel) == args.num_channels, \
- "The number of channels of image std doesn't match input"
- # Round down epoch size to closest multiple of batch size across machines
- global_batch_size = total_batch_size * args.num_shards
- epoch_iters = int(args.epoch_size / global_batch_size)
- assert \
- epoch_iters > 0, \
- "Epoch size must be larger than batch size times shard count"
- args.epoch_size = epoch_iters * global_batch_size
- log.info("Using epoch size: {}".format(args.epoch_size))
- # Create ModelHelper object
- if args.use_ideep:
- train_arg_scope = {
- 'use_cudnn': False,
- 'cudnn_exhaustive_search': False,
- 'training_mode': 1
- }
- else:
- train_arg_scope = {
- 'order': 'NCHW',
- 'use_cudnn': True,
- 'cudnn_exhaustive_search': True,
- 'ws_nbytes_limit': (args.cudnn_workspace_limit_mb * 1024 * 1024),
- }
- train_model = model_helper.ModelHelper(
- name=model_name, arg_scope=train_arg_scope
- )
- num_shards = args.num_shards
- shard_id = args.shard_id
- # Expect interfaces to be comma separated.
- # Use of multiple network interfaces is not yet complete,
- # so simply use the first one in the list.
- interfaces = args.distributed_interfaces.split(",")
- # Rendezvous using MPI when run with mpirun
- if os.getenv("OMPI_COMM_WORLD_SIZE") is not None:
- num_shards = int(os.getenv("OMPI_COMM_WORLD_SIZE", 1))
- shard_id = int(os.getenv("OMPI_COMM_WORLD_RANK", 0))
- if num_shards > 1:
- rendezvous = dict(
- kv_handler=None,
- num_shards=num_shards,
- shard_id=shard_id,
- engine="GLOO",
- transport=args.distributed_transport,
- interface=interfaces[0],
- mpi_rendezvous=True,
- exit_nets=None)
- elif num_shards > 1:
- # Create rendezvous for distributed computation
- store_handler = "store_handler"
- if args.redis_host is not None:
- # Use Redis for rendezvous if Redis host is specified
- workspace.RunOperatorOnce(
- core.CreateOperator(
- "RedisStoreHandlerCreate", [], [store_handler],
- host=args.redis_host,
- port=args.redis_port,
- prefix=args.run_id,
- )
- )
- else:
- # Use filesystem for rendezvous otherwise
- workspace.RunOperatorOnce(
- core.CreateOperator(
- "FileStoreHandlerCreate", [], [store_handler],
- path=args.file_store_path,
- prefix=args.run_id,
- )
- )
- rendezvous = dict(
- kv_handler=store_handler,
- shard_id=shard_id,
- num_shards=num_shards,
- engine="GLOO",
- transport=args.distributed_transport,
- interface=interfaces[0],
- exit_nets=None)
- else:
- rendezvous = None
- # Model building functions
- def create_resnext_model_ops(model, loss_scale):
- initializer = (PseudoFP16Initializer if args.dtype == 'float16'
- else Initializer)
- with brew.arg_scope([brew.conv, brew.fc],
- WeightInitializer=initializer,
- BiasInitializer=initializer,
- enable_tensor_core=args.enable_tensor_core,
- float16_compute=args.float16_compute):
- pred = resnet.create_resnext(
- model,
- "data",
- num_input_channels=args.num_channels,
- num_labels=args.num_labels,
- num_layers=args.num_layers,
- num_groups=args.resnext_num_groups,
- num_width_per_group=args.resnext_width_per_group,
- no_bias=True,
- no_loss=True,
- )
- if args.dtype == 'float16':
- pred = model.net.HalfToFloat(pred, pred + '_fp32')
- softmax, loss = model.SoftmaxWithLoss([pred, 'label'],
- ['softmax', 'loss'])
- loss = model.Scale(loss, scale=loss_scale)
- brew.accuracy(model, [softmax, "label"], "accuracy", top_k=1)
- brew.accuracy(model, [softmax, "label"], "accuracy_top5", top_k=5)
- return [loss]
- def create_shufflenet_model_ops(model, loss_scale):
- initializer = (PseudoFP16Initializer if args.dtype == 'float16'
- else Initializer)
- with brew.arg_scope([brew.conv, brew.fc],
- WeightInitializer=initializer,
- BiasInitializer=initializer,
- enable_tensor_core=args.enable_tensor_core,
- float16_compute=args.float16_compute):
- pred = shufflenet.create_shufflenet(
- model,
- "data",
- num_input_channels=args.num_channels,
- num_labels=args.num_labels,
- no_loss=True,
- )
- if args.dtype == 'float16':
- pred = model.net.HalfToFloat(pred, pred + '_fp32')
- softmax, loss = model.SoftmaxWithLoss([pred, 'label'],
- ['softmax', 'loss'])
- loss = model.Scale(loss, scale=loss_scale)
- brew.accuracy(model, [softmax, "label"], "accuracy", top_k=1)
- brew.accuracy(model, [softmax, "label"], "accuracy_top5", top_k=5)
- return [loss]
- def add_optimizer(model):
- stepsz = int(30 * args.epoch_size / total_batch_size / num_shards)
- if args.float16_compute:
- # TODO: merge with multi-precision optimizer
- opt = optimizer.build_fp16_sgd(
- model,
- args.base_learning_rate,
- momentum=0.9,
- nesterov=1,
- weight_decay=args.weight_decay, # weight decay included
- policy="step",
- stepsize=stepsz,
- gamma=0.1
- )
- else:
- optimizer.add_weight_decay(model, args.weight_decay)
- opt = optimizer.build_multi_precision_sgd(
- model,
- args.base_learning_rate,
- momentum=0.9,
- nesterov=1,
- policy="step",
- stepsize=stepsz,
- gamma=0.1
- )
- return opt
- # Define add_image_input function.
- # Depends on the "train_data" argument.
- # Note that the reader will be shared with between all GPUS.
- if args.train_data == "null":
- def add_image_input(model):
- AddNullInput(
- model,
- None,
- batch_size=batch_per_device,
- img_size=args.image_size,
- dtype=args.dtype,
- )
- else:
- reader = train_model.CreateDB(
- "reader",
- db=args.train_data,
- db_type=args.db_type,
- num_shards=num_shards,
- shard_id=shard_id,
- )
- def add_image_input(model):
- AddImageInput(
- model,
- reader,
- batch_size=batch_per_device,
- img_size=args.image_size,
- dtype=args.dtype,
- is_test=False,
- mean_per_channel=args.image_mean_per_channel,
- std_per_channel=args.image_std_per_channel,
- )
- def add_post_sync_ops(model):
- """Add ops applied after initial parameter sync."""
- for param_info in model.GetOptimizationParamInfo(model.GetParams()):
- if param_info.blob_copy is not None:
- model.param_init_net.HalfToFloat(
- param_info.blob,
- param_info.blob_copy[core.DataType.FLOAT]
- )
- data_parallel_model.Parallelize(
- train_model,
- input_builder_fun=add_image_input,
- forward_pass_builder_fun=create_resnext_model_ops
- if args.model == "resnext" else create_shufflenet_model_ops,
- optimizer_builder_fun=add_optimizer,
- post_sync_builder_fun=add_post_sync_ops,
- devices=gpus,
- rendezvous=rendezvous,
- optimize_gradient_memory=False,
- use_nccl=args.use_nccl,
- cpu_device=args.use_cpu,
- ideep=args.use_ideep,
- shared_model=args.use_cpu,
- combine_spatial_bn=args.use_cpu,
- )
- data_parallel_model.OptimizeGradientMemory(train_model, {}, set(), False)
- workspace.RunNetOnce(train_model.param_init_net)
- workspace.CreateNet(train_model.net)
- # Add test model, if specified
- test_model = None
- if (args.test_data is not None):
- log.info("----- Create test net ----")
- if args.use_ideep:
- test_arg_scope = {
- 'use_cudnn': False,
- 'cudnn_exhaustive_search': False,
- }
- else:
- test_arg_scope = {
- 'order': "NCHW",
- 'use_cudnn': True,
- 'cudnn_exhaustive_search': True,
- }
- test_model = model_helper.ModelHelper(
- name=model_name + "_test",
- arg_scope=test_arg_scope,
- init_params=False,
- )
- test_reader = test_model.CreateDB(
- "test_reader",
- db=args.test_data,
- db_type=args.db_type,
- )
- def test_input_fn(model):
- AddImageInput(
- model,
- test_reader,
- batch_size=batch_per_device,
- img_size=args.image_size,
- dtype=args.dtype,
- is_test=True,
- mean_per_channel=args.image_mean_per_channel,
- std_per_channel=args.image_std_per_channel,
- )
- data_parallel_model.Parallelize(
- test_model,
- input_builder_fun=test_input_fn,
- forward_pass_builder_fun=create_resnext_model_ops
- if args.model == "resnext" else create_shufflenet_model_ops,
- post_sync_builder_fun=add_post_sync_ops,
- param_update_builder_fun=None,
- devices=gpus,
- use_nccl=args.use_nccl,
- cpu_device=args.use_cpu,
- )
- workspace.RunNetOnce(test_model.param_init_net)
- workspace.CreateNet(test_model.net)
- epoch = 0
- # load the pre-trained model and reset epoch
- if args.load_model_path is not None:
- LoadModel(args.load_model_path, train_model, args.use_ideep)
- # Sync the model params
- data_parallel_model.FinalizeAfterCheckpoint(train_model)
- # reset epoch. load_model_path should end with *_X.mdl,
- # where X is the epoch number
- last_str = args.load_model_path.split('_')[-1]
- if last_str.endswith('.mdl'):
- epoch = int(last_str[:-4])
- log.info("Reset epoch to {}".format(epoch))
- else:
- log.warning("The format of load_model_path doesn't match!")
- expname = "%s_gpu%d_b%d_L%d_lr%.2f_v2" % (
- model_name,
- args.num_gpus,
- total_batch_size,
- args.num_labels,
- args.base_learning_rate,
- )
- explog = experiment_util.ModelTrainerLog(expname, args)
- # Run the training one epoch a time
- while epoch < args.num_epochs:
- epoch = RunEpoch(
- args,
- epoch,
- train_model,
- test_model,
- total_batch_size,
- num_shards,
- expname,
- explog
- )
- # Save the model for each epoch
- SaveModel(args, train_model, epoch, args.use_ideep)
- model_path = "%s/%s_" % (
- args.file_store_path,
- args.save_model_name
- )
- # remove the saved model from the previous epoch if it exists
- if os.path.isfile(model_path + str(epoch - 1) + ".mdl"):
- os.remove(model_path + str(epoch - 1) + ".mdl")
- def main():
- # TODO: use argv
- parser = argparse.ArgumentParser(
- description="Caffe2: ImageNet Trainer"
- )
- parser.add_argument("--train_data", type=str, default=None, required=True,
- help="Path to training data (or 'null' to simulate)")
- parser.add_argument("--num_layers", type=int, default=50,
- help="The number of layers in ResNe(X)t model")
- parser.add_argument("--resnext_num_groups", type=int, default=1,
- help="The cardinality of resnext")
- parser.add_argument("--resnext_width_per_group", type=int, default=64,
- help="The cardinality of resnext")
- parser.add_argument("--test_data", type=str, default=None,
- help="Path to test data")
- parser.add_argument("--image_mean_per_channel", type=float, nargs='+',
- help="The per channel mean for the images")
- parser.add_argument("--image_std_per_channel", type=float, nargs='+',
- help="The per channel standard deviation for the images")
- parser.add_argument("--test_epoch_size", type=int, default=50000,
- help="Number of test images")
- parser.add_argument("--db_type", type=str, default="lmdb",
- help="Database type (such as lmdb or leveldb)")
- parser.add_argument("--gpus", type=str,
- help="Comma separated list of GPU devices to use")
- parser.add_argument("--num_gpus", type=int, default=1,
- help="Number of GPU devices (instead of --gpus)")
- parser.add_argument("--num_channels", type=int, default=3,
- help="Number of color channels")
- parser.add_argument("--image_size", type=int, default=224,
- help="Input image size (to crop to)")
- parser.add_argument("--num_labels", type=int, default=1000,
- help="Number of labels")
- parser.add_argument("--batch_size", type=int, default=32,
- help="Batch size, total over all GPUs")
- parser.add_argument("--epoch_size", type=int, default=1500000,
- help="Number of images/epoch, total over all machines")
- parser.add_argument("--num_epochs", type=int, default=1000,
- help="Num epochs.")
- parser.add_argument("--base_learning_rate", type=float, default=0.1,
- help="Initial learning rate.")
- parser.add_argument("--weight_decay", type=float, default=1e-4,
- help="Weight decay (L2 regularization)")
- parser.add_argument("--cudnn_workspace_limit_mb", type=int, default=64,
- help="CuDNN workspace limit in MBs")
- parser.add_argument("--num_shards", type=int, default=1,
- help="Number of machines in distributed run")
- parser.add_argument("--shard_id", type=int, default=0,
- help="Shard id.")
- parser.add_argument("--run_id", type=str,
- help="Unique run identifier (e.g. uuid)")
- parser.add_argument("--redis_host", type=str,
- help="Host of Redis server (for rendezvous)")
- parser.add_argument("--redis_port", type=int, default=6379,
- help="Port of Redis server (for rendezvous)")
- parser.add_argument("--file_store_path", type=str, default="/tmp",
- help="Path to directory to use for rendezvous")
- parser.add_argument("--save_model_name", type=str, default="resnext_model",
- help="Save the trained model to a given name")
- parser.add_argument("--load_model_path", type=str, default=None,
- help="Load previously saved model to continue training")
- parser.add_argument("--use_cpu", action="store_true",
- help="Use CPU instead of GPU")
- parser.add_argument("--use_nccl", action="store_true",
- help="Use nccl for inter-GPU collectives")
- parser.add_argument("--use_ideep", type=bool, default=False,
- help="Use ideep")
- parser.add_argument('--dtype', default='float',
- choices=['float', 'float16'],
- help='Data type used for training')
- parser.add_argument('--float16_compute', action='store_true',
- help="Use float 16 compute, if available")
- parser.add_argument('--enable_tensor_core', action='store_true',
- help='Enable Tensor Core math for Conv and FC ops')
- parser.add_argument("--distributed_transport", type=str, default="tcp",
- help="Transport to use for distributed run [tcp|ibverbs]")
- parser.add_argument("--distributed_interfaces", type=str, default="",
- help="Network interfaces to use for distributed run")
- parser.add_argument("--first_iter_timeout", type=int, default=1200,
- help="Timeout (secs) of the first iteration "
- "(default: %(default)s)")
- parser.add_argument("--timeout", type=int, default=60,
- help="Timeout (secs) of each (except the first) iteration "
- "(default: %(default)s)")
- parser.add_argument("--model",
- default="resnext", const="resnext", nargs="?",
- choices=["shufflenet", "resnext"],
- help="List of models which can be run")
- args = parser.parse_args()
- Train(args)
- if __name__ == '__main__':
- workspace.GlobalInit(['caffe2', '--caffe2_log_level=2'])
- main()
|