Spaces:
Runtime error
Runtime error
# YOLOv5 🚀 by Ultralytics, GPL-3.0 license | |
""" | |
Train a YOLOv5 classifier model on a classification dataset | |
Usage - Single-GPU training: | |
$ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224 | |
Usage - Multi-GPU DDP training: | |
$ python -m torch.distributed.run --nproc_per_node 4 --master_port 2022 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3 | |
Datasets: --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data' | |
YOLOv5-cls models: --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt | |
Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html | |
""" | |
import argparse | |
import os | |
import subprocess | |
import sys | |
import time | |
from copy import deepcopy | |
from datetime import datetime | |
from pathlib import Path | |
import torch | |
import torch.distributed as dist | |
import torch.hub as hub | |
import torch.optim.lr_scheduler as lr_scheduler | |
import torchvision | |
from torch.cuda import amp | |
from tqdm import tqdm | |
FILE = Path(__file__).resolve() | |
ROOT = FILE.parents[1] # YOLOv5 root directory | |
if str(ROOT) not in sys.path: | |
sys.path.append(str(ROOT)) # add ROOT to PATH | |
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative | |
from classify import val as validate | |
from models.experimental import attempt_load | |
from models.yolo import ClassificationModel, DetectionModel | |
from utils.dataloaders import create_classification_dataloader | |
from utils.general import ( | |
DATASETS_DIR, | |
LOGGER, | |
TQDM_BAR_FORMAT, | |
WorkingDirectory, | |
check_git_info, | |
check_git_status, | |
check_requirements, | |
colorstr, | |
download, | |
increment_path, | |
init_seeds, | |
print_args, | |
yaml_save, | |
) | |
from utils.loggers import GenericLogger | |
from utils.plots import imshow_cls | |
from utils.torch_utils import ( | |
ModelEMA, | |
model_info, | |
reshape_classifier_output, | |
select_device, | |
smart_DDP, | |
smart_optimizer, | |
smartCrossEntropyLoss, | |
torch_distributed_zero_first, | |
) | |
LOCAL_RANK = int( | |
os.getenv("LOCAL_RANK", -1) | |
) # https://pytorch.org/docs/stable/elastic/run.html | |
RANK = int(os.getenv("RANK", -1)) | |
WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1)) | |
GIT_INFO = check_git_info() | |
def train(opt, device): | |
init_seeds(opt.seed + 1 + RANK, deterministic=True) | |
save_dir, data, bs, epochs, nw, imgsz, pretrained = ( | |
opt.save_dir, | |
Path(opt.data), | |
opt.batch_size, | |
opt.epochs, | |
min(os.cpu_count() - 1, opt.workers), | |
opt.imgsz, | |
str(opt.pretrained).lower() == "true", | |
) | |
cuda = device.type != "cpu" | |
# Directories | |
wdir = save_dir / "weights" | |
wdir.mkdir(parents=True, exist_ok=True) # make dir | |
last, best = wdir / "last.pt", wdir / "best.pt" | |
# Save run settings | |
yaml_save(save_dir / "opt.yaml", vars(opt)) | |
# Logger | |
logger = ( | |
GenericLogger(opt=opt, console_logger=LOGGER) | |
if RANK in {-1, 0} | |
else None | |
) | |
# Download Dataset | |
with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT): | |
data_dir = data if data.is_dir() else (DATASETS_DIR / data) | |
if not data_dir.is_dir(): | |
LOGGER.info( | |
f"\nDataset not found ⚠️, missing path {data_dir}, attempting download..." | |
) | |
t = time.time() | |
if str(data) == "imagenet": | |
subprocess.run( | |
f"bash {ROOT / 'data/scripts/get_imagenet.sh'}", | |
shell=True, | |
check=True, | |
) | |
else: | |
url = f"https://github.com/ultralytics/yolov5/releases/download/v1.0/{data}.zip" | |
download(url, dir=data_dir.parent) | |
s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n" | |
LOGGER.info(s) | |
# Dataloaders | |
nc = len( | |
[x for x in (data_dir / "train").glob("*") if x.is_dir()] | |
) # number of classes | |
trainloader = create_classification_dataloader( | |
path=data_dir / "train", | |
imgsz=imgsz, | |
batch_size=bs // WORLD_SIZE, | |
augment=True, | |
cache=opt.cache, | |
rank=LOCAL_RANK, | |
workers=nw, | |
) | |
test_dir = ( | |
data_dir / "test" if (data_dir / "test").exists() else data_dir / "val" | |
) # data/test or data/val | |
if RANK in {-1, 0}: | |
testloader = create_classification_dataloader( | |
path=test_dir, | |
imgsz=imgsz, | |
batch_size=bs // WORLD_SIZE * 2, | |
augment=False, | |
cache=opt.cache, | |
rank=-1, | |
workers=nw, | |
) | |
# Model | |
with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT): | |
if Path(opt.model).is_file() or opt.model.endswith(".pt"): | |
model = attempt_load(opt.model, device="cpu", fuse=False) | |
elif ( | |
opt.model in torchvision.models.__dict__ | |
): # TorchVision models i.e. resnet50, efficientnet_b0 | |
model = torchvision.models.__dict__[opt.model]( | |
weights="IMAGENET1K_V1" if pretrained else None | |
) | |
else: | |
m = hub.list( | |
"ultralytics/yolov5" | |
) # + hub.list('pytorch/vision') # models | |
raise ModuleNotFoundError( | |
f"--model {opt.model} not found. Available models are: \n" | |
+ "\n".join(m) | |
) | |
if isinstance(model, DetectionModel): | |
LOGGER.warning( | |
"WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'" | |
) | |
model = ClassificationModel( | |
model=model, nc=nc, cutoff=opt.cutoff or 10 | |
) # convert to classification model | |
reshape_classifier_output(model, nc) # update class count | |
for m in model.modules(): | |
if not pretrained and hasattr(m, "reset_parameters"): | |
m.reset_parameters() | |
if isinstance(m, torch.nn.Dropout) and opt.dropout is not None: | |
m.p = opt.dropout # set dropout | |
for p in model.parameters(): | |
p.requires_grad = True # for training | |
model = model.to(device) | |
# Info | |
if RANK in {-1, 0}: | |
model.names = trainloader.dataset.classes # attach class names | |
model.transforms = ( | |
testloader.dataset.torch_transforms | |
) # attach inference transforms | |
model_info(model) | |
if opt.verbose: | |
LOGGER.info(model) | |
images, labels = next(iter(trainloader)) | |
file = imshow_cls( | |
images[:25], | |
labels[:25], | |
names=model.names, | |
f=save_dir / "train_images.jpg", | |
) | |
logger.log_images(file, name="Train Examples") | |
logger.log_graph(model, imgsz) # log model | |
# Optimizer | |
optimizer = smart_optimizer( | |
model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay | |
) | |
# Scheduler | |
lrf = 0.01 # final lr (fraction of lr0) | |
# lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf # cosine | |
lf = lambda x: (1 - x / epochs) * (1 - lrf) + lrf # linear | |
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) | |
# scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1, | |
# final_div_factor=1 / 25 / lrf) | |
# EMA | |
ema = ModelEMA(model) if RANK in {-1, 0} else None | |
# DDP mode | |
if cuda and RANK != -1: | |
model = smart_DDP(model) | |
# Train | |
t0 = time.time() | |
criterion = smartCrossEntropyLoss( | |
label_smoothing=opt.label_smoothing | |
) # loss function | |
best_fitness = 0.0 | |
scaler = amp.GradScaler(enabled=cuda) | |
val = test_dir.stem # 'val' or 'test' | |
LOGGER.info( | |
f"Image sizes {imgsz} train, {imgsz} test\n" | |
f"Using {nw * WORLD_SIZE} dataloader workers\n" | |
f"Logging results to {colorstr('bold', save_dir)}\n" | |
f"Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n" | |
f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}" | |
) | |
for epoch in range(epochs): # loop over the dataset multiple times | |
tloss, vloss, fitness = 0.0, 0.0, 0.0 # train loss, val loss, fitness | |
model.train() | |
if RANK != -1: | |
trainloader.sampler.set_epoch(epoch) | |
pbar = enumerate(trainloader) | |
if RANK in {-1, 0}: | |
pbar = tqdm( | |
enumerate(trainloader), | |
total=len(trainloader), | |
bar_format=TQDM_BAR_FORMAT, | |
) | |
for i, (images, labels) in pbar: # progress bar | |
images, labels = images.to(device, non_blocking=True), labels.to( | |
device | |
) | |
# Forward | |
with amp.autocast(enabled=cuda): # stability issues when enabled | |
loss = criterion(model(images), labels) | |
# Backward | |
scaler.scale(loss).backward() | |
# Optimize | |
scaler.unscale_(optimizer) # unscale gradients | |
torch.nn.utils.clip_grad_norm_( | |
model.parameters(), max_norm=10.0 | |
) # clip gradients | |
scaler.step(optimizer) | |
scaler.update() | |
optimizer.zero_grad() | |
if ema: | |
ema.update(model) | |
if RANK in {-1, 0}: | |
tloss = (tloss * i + loss.item()) / ( | |
i + 1 | |
) # update mean losses | |
mem = "%.3gG" % ( | |
torch.cuda.memory_reserved() / 1e9 | |
if torch.cuda.is_available() | |
else 0 | |
) # (GB) | |
pbar.desc = ( | |
f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" | |
+ " " * 36 | |
) | |
# Test | |
if i == len(pbar) - 1: # last batch | |
top1, top5, vloss = validate.run( | |
model=ema.ema, | |
dataloader=testloader, | |
criterion=criterion, | |
pbar=pbar, | |
) # test accuracy, loss | |
fitness = top1 # define fitness as top1 accuracy | |
# Scheduler | |
scheduler.step() | |
# Log metrics | |
if RANK in {-1, 0}: | |
# Best fitness | |
if fitness > best_fitness: | |
best_fitness = fitness | |
# Log | |
metrics = { | |
"train/loss": tloss, | |
f"{val}/loss": vloss, | |
"metrics/accuracy_top1": top1, | |
"metrics/accuracy_top5": top5, | |
"lr/0": optimizer.param_groups[0]["lr"], | |
} # learning rate | |
logger.log_metrics(metrics, epoch) | |
# Save model | |
final_epoch = epoch + 1 == epochs | |
if (not opt.nosave) or final_epoch: | |
ckpt = { | |
"epoch": epoch, | |
"best_fitness": best_fitness, | |
"model": deepcopy( | |
ema.ema | |
).half(), # deepcopy(de_parallel(model)).half(), | |
"ema": None, # deepcopy(ema.ema).half(), | |
"updates": ema.updates, | |
"optimizer": None, # optimizer.state_dict(), | |
"opt": vars(opt), | |
"git": GIT_INFO, # {remote, branch, commit} if a git repo | |
"date": datetime.now().isoformat(), | |
} | |
# Save last, best and delete | |
torch.save(ckpt, last) | |
if best_fitness == fitness: | |
torch.save(ckpt, best) | |
del ckpt | |
# Train complete | |
if RANK in {-1, 0} and final_epoch: | |
LOGGER.info( | |
f"\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)" | |
f"\nResults saved to {colorstr('bold', save_dir)}" | |
f"\nPredict: python classify/predict.py --weights {best} --source im.jpg" | |
f"\nValidate: python classify/val.py --weights {best} --data {data_dir}" | |
f"\nExport: python export.py --weights {best} --include onnx" | |
f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')" | |
f"\nVisualize: https://netron.app\n" | |
) | |
# Plot examples | |
images, labels = ( | |
x[:25] for x in next(iter(testloader)) | |
) # first 25 images and labels | |
pred = torch.max(ema.ema(images.to(device)), 1)[1] | |
file = imshow_cls( | |
images, | |
labels, | |
pred, | |
model.names, | |
verbose=False, | |
f=save_dir / "test_images.jpg", | |
) | |
# Log results | |
meta = { | |
"epochs": epochs, | |
"top1_acc": best_fitness, | |
"date": datetime.now().isoformat(), | |
} | |
logger.log_images( | |
file, name="Test Examples (true-predicted)", epoch=epoch | |
) | |
logger.log_model(best, epochs, metadata=meta) | |
def parse_opt(known=False): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--model", | |
type=str, | |
default="yolov5s-cls.pt", | |
help="initial weights path", | |
) | |
parser.add_argument( | |
"--data", | |
type=str, | |
default="imagenette160", | |
help="cifar10, cifar100, mnist, imagenet, ...", | |
) | |
parser.add_argument( | |
"--epochs", type=int, default=10, help="total training epochs" | |
) | |
parser.add_argument( | |
"--batch-size", | |
type=int, | |
default=64, | |
help="total batch size for all GPUs", | |
) | |
parser.add_argument( | |
"--imgsz", | |
"--img", | |
"--img-size", | |
type=int, | |
default=224, | |
help="train, val image size (pixels)", | |
) | |
parser.add_argument( | |
"--nosave", action="store_true", help="only save final checkpoint" | |
) | |
parser.add_argument( | |
"--cache", | |
type=str, | |
nargs="?", | |
const="ram", | |
help='--cache images in "ram" (default) or "disk"', | |
) | |
parser.add_argument( | |
"--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu" | |
) | |
parser.add_argument( | |
"--workers", | |
type=int, | |
default=8, | |
help="max dataloader workers (per RANK in DDP mode)", | |
) | |
parser.add_argument( | |
"--project", | |
default=ROOT / "runs/train-cls", | |
help="save to project/name", | |
) | |
parser.add_argument("--name", default="exp", help="save to project/name") | |
parser.add_argument( | |
"--exist-ok", | |
action="store_true", | |
help="existing project/name ok, do not increment", | |
) | |
parser.add_argument( | |
"--pretrained", | |
nargs="?", | |
const=True, | |
default=True, | |
help="start from i.e. --pretrained False", | |
) | |
parser.add_argument( | |
"--optimizer", | |
choices=["SGD", "Adam", "AdamW", "RMSProp"], | |
default="Adam", | |
help="optimizer", | |
) | |
parser.add_argument( | |
"--lr0", type=float, default=0.001, help="initial learning rate" | |
) | |
parser.add_argument( | |
"--decay", type=float, default=5e-5, help="weight decay" | |
) | |
parser.add_argument( | |
"--label-smoothing", | |
type=float, | |
default=0.1, | |
help="Label smoothing epsilon", | |
) | |
parser.add_argument( | |
"--cutoff", | |
type=int, | |
default=None, | |
help="Model layer cutoff index for Classify() head", | |
) | |
parser.add_argument( | |
"--dropout", type=float, default=None, help="Dropout (fraction)" | |
) | |
parser.add_argument("--verbose", action="store_true", help="Verbose mode") | |
parser.add_argument( | |
"--seed", type=int, default=0, help="Global training seed" | |
) | |
parser.add_argument( | |
"--local_rank", | |
type=int, | |
default=-1, | |
help="Automatic DDP Multi-GPU argument, do not modify", | |
) | |
return parser.parse_known_args()[0] if known else parser.parse_args() | |
def main(opt): | |
# Checks | |
if RANK in {-1, 0}: | |
print_args(vars(opt)) | |
check_git_status() | |
check_requirements() | |
# DDP mode | |
device = select_device(opt.device, batch_size=opt.batch_size) | |
if LOCAL_RANK != -1: | |
assert ( | |
opt.batch_size != -1 | |
), "AutoBatch is coming soon for classification, please pass a valid --batch-size" | |
assert ( | |
opt.batch_size % WORLD_SIZE == 0 | |
), f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE" | |
assert ( | |
torch.cuda.device_count() > LOCAL_RANK | |
), "insufficient CUDA devices for DDP command" | |
torch.cuda.set_device(LOCAL_RANK) | |
device = torch.device("cuda", LOCAL_RANK) | |
dist.init_process_group( | |
backend="nccl" if dist.is_nccl_available() else "gloo" | |
) | |
# Parameters | |
opt.save_dir = increment_path( | |
Path(opt.project) / opt.name, exist_ok=opt.exist_ok | |
) # increment run | |
# Train | |
train(opt, device) | |
def run(**kwargs): | |
# Usage: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m') | |
opt = parse_opt(True) | |
for k, v in kwargs.items(): | |
setattr(opt, k, v) | |
main(opt) | |
return opt | |
if __name__ == "__main__": | |
opt = parse_opt() | |
main(opt) | |