|
|
|
|
|
""" |
|
A script to benchmark builtin models. |
|
|
|
Note: this script has an extra dependency of psutil. |
|
""" |
|
|
|
import itertools |
|
import logging |
|
import psutil |
|
import torch |
|
import tqdm |
|
from fvcore.common.timer import Timer |
|
from torch.nn.parallel import DistributedDataParallel |
|
|
|
from detectron2.checkpoint import DetectionCheckpointer |
|
from detectron2.config import LazyConfig, get_cfg, instantiate |
|
from detectron2.data import ( |
|
DatasetFromList, |
|
build_detection_test_loader, |
|
build_detection_train_loader, |
|
) |
|
from detectron2.data.benchmark import DataLoaderBenchmark |
|
from detectron2.engine import AMPTrainer, SimpleTrainer, default_argument_parser, hooks, launch |
|
from detectron2.modeling import build_model |
|
from detectron2.solver import build_optimizer |
|
from detectron2.utils import comm |
|
from detectron2.utils.collect_env import collect_env_info |
|
from detectron2.utils.events import CommonMetricPrinter |
|
from detectron2.utils.logger import setup_logger |
|
|
|
logger = logging.getLogger("detectron2") |
|
|
|
|
|
def setup(args): |
|
if args.config_file.endswith(".yaml"): |
|
cfg = get_cfg() |
|
cfg.merge_from_file(args.config_file) |
|
cfg.SOLVER.BASE_LR = 0.001 |
|
cfg.merge_from_list(args.opts) |
|
cfg.freeze() |
|
else: |
|
cfg = LazyConfig.load(args.config_file) |
|
cfg = LazyConfig.apply_overrides(cfg, args.opts) |
|
setup_logger(distributed_rank=comm.get_rank()) |
|
return cfg |
|
|
|
|
|
def create_data_benchmark(cfg, args): |
|
if args.config_file.endswith(".py"): |
|
dl_cfg = cfg.dataloader.train |
|
dl_cfg._target_ = DataLoaderBenchmark |
|
return instantiate(dl_cfg) |
|
else: |
|
kwargs = build_detection_train_loader.from_config(cfg) |
|
kwargs.pop("aspect_ratio_grouping", None) |
|
kwargs["_target_"] = DataLoaderBenchmark |
|
return instantiate(kwargs) |
|
|
|
|
|
def RAM_msg(): |
|
vram = psutil.virtual_memory() |
|
return "RAM Usage: {:.2f}/{:.2f} GB".format( |
|
(vram.total - vram.available) / 1024**3, vram.total / 1024**3 |
|
) |
|
|
|
|
|
def benchmark_data(args): |
|
cfg = setup(args) |
|
logger.info("After spawning " + RAM_msg()) |
|
|
|
benchmark = create_data_benchmark(cfg, args) |
|
benchmark.benchmark_distributed(250, 10) |
|
|
|
for k in range(10): |
|
logger.info(f"Iteration {k} " + RAM_msg()) |
|
benchmark.benchmark_distributed(250, 1) |
|
|
|
|
|
def benchmark_data_advanced(args): |
|
|
|
cfg = setup(args) |
|
benchmark = create_data_benchmark(cfg, args) |
|
|
|
if comm.get_rank() == 0: |
|
benchmark.benchmark_dataset(100) |
|
benchmark.benchmark_mapper(100) |
|
benchmark.benchmark_workers(100, warmup=10) |
|
benchmark.benchmark_IPC(100, warmup=10) |
|
if comm.get_world_size() > 1: |
|
benchmark.benchmark_distributed(100) |
|
logger.info("Rerun ...") |
|
benchmark.benchmark_distributed(100) |
|
|
|
|
|
def benchmark_train(args): |
|
cfg = setup(args) |
|
model = build_model(cfg) |
|
logger.info("Model:\n{}".format(model)) |
|
if comm.get_world_size() > 1: |
|
model = DistributedDataParallel( |
|
model, device_ids=[comm.get_local_rank()], broadcast_buffers=False |
|
) |
|
optimizer = build_optimizer(cfg, model) |
|
checkpointer = DetectionCheckpointer(model, optimizer=optimizer) |
|
checkpointer.load(cfg.MODEL.WEIGHTS) |
|
|
|
cfg.defrost() |
|
cfg.DATALOADER.NUM_WORKERS = 2 |
|
data_loader = build_detection_train_loader(cfg) |
|
dummy_data = list(itertools.islice(data_loader, 100)) |
|
|
|
def f(): |
|
data = DatasetFromList(dummy_data, copy=False, serialize=False) |
|
while True: |
|
yield from data |
|
|
|
max_iter = 400 |
|
trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(model, f(), optimizer) |
|
trainer.register_hooks( |
|
[ |
|
hooks.IterationTimer(), |
|
hooks.PeriodicWriter([CommonMetricPrinter(max_iter)]), |
|
hooks.TorchProfiler( |
|
lambda trainer: trainer.iter == max_iter - 1, cfg.OUTPUT_DIR, save_tensorboard=True |
|
), |
|
] |
|
) |
|
trainer.train(1, max_iter) |
|
|
|
|
|
@torch.no_grad() |
|
def benchmark_eval(args): |
|
cfg = setup(args) |
|
if args.config_file.endswith(".yaml"): |
|
model = build_model(cfg) |
|
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) |
|
|
|
cfg.defrost() |
|
cfg.DATALOADER.NUM_WORKERS = 0 |
|
data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) |
|
else: |
|
model = instantiate(cfg.model) |
|
model.to(cfg.train.device) |
|
DetectionCheckpointer(model).load(cfg.train.init_checkpoint) |
|
|
|
cfg.dataloader.num_workers = 0 |
|
data_loader = instantiate(cfg.dataloader.test) |
|
|
|
model.eval() |
|
logger.info("Model:\n{}".format(model)) |
|
dummy_data = DatasetFromList(list(itertools.islice(data_loader, 100)), copy=False) |
|
|
|
def f(): |
|
while True: |
|
yield from dummy_data |
|
|
|
for k in range(5): |
|
model(dummy_data[k]) |
|
|
|
max_iter = 300 |
|
timer = Timer() |
|
with tqdm.tqdm(total=max_iter) as pbar: |
|
for idx, d in enumerate(f()): |
|
if idx == max_iter: |
|
break |
|
model(d) |
|
pbar.update() |
|
logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds())) |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = default_argument_parser() |
|
parser.add_argument("--task", choices=["train", "eval", "data", "data_advanced"], required=True) |
|
args = parser.parse_args() |
|
assert not args.eval_only |
|
|
|
logger.info("Environment info:\n" + collect_env_info()) |
|
if "data" in args.task: |
|
print("Initial " + RAM_msg()) |
|
if args.task == "data": |
|
f = benchmark_data |
|
if args.task == "data_advanced": |
|
f = benchmark_data_advanced |
|
elif args.task == "train": |
|
""" |
|
Note: training speed may not be representative. |
|
The training cost of a R-CNN model varies with the content of the data |
|
and the quality of the model. |
|
""" |
|
f = benchmark_train |
|
elif args.task == "eval": |
|
f = benchmark_eval |
|
|
|
assert args.num_gpus == 1 and args.num_machines == 1 |
|
launch(f, args.num_gpus, args.num_machines, args.machine_rank, args.dist_url, args=(args,)) |
|
|