workshop / train_full_SSL.py
qiushuocheng's picture
Initial upload
a39be45 verified
"""
Copyright 2023 LINE Corporation
LINE Corporation licenses this file to you under the Apache License,
version 2.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at:
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
License for the specific language governing permissions and limitations
under the License.
"""
from __future__ import print_function
import argparse
import inspect
import os
import pdb
import pickle
import random
import re
import shutil
import time
from collections import *
import ipdb
import numpy as np
# torch
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import yaml
from einops import rearrange, reduce, repeat
from evaluation.classificationMAP import getClassificationMAP as cmAP
from evaluation.detectionMAP import getSingleStreamDetectionMAP as dsmAP
from feeders.tools import collate_with_padding_multi_joint
from model.losses import cross_entropy_loss, mvl_loss
from sklearn.metrics import f1_score
# Custom
from tensorboardX import SummaryWriter
from torch.autograd import Variable
from torch.optim.lr_scheduler import _LRScheduler
from tqdm import tqdm
from utils.logger import Logger
def remove_prefix_from_state_dict(state_dict, prefix):
new_state_dict = {}
for k, v in state_dict.items():
if k.startswith(prefix):
new_k = k[len(prefix):] # strip the prefix
else:
new_k = k
new_state_dict[new_k] = v
return new_state_dict
def init_seed(seed):
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
def get_parser():
# parameter priority: command line > config > default
parser = argparse.ArgumentParser(
description="Spatial Temporal Graph Convolution Network"
)
parser.add_argument(
"--work-dir",
default="./work_dir/temp",
help="the work folder for storing results",
)
parser.add_argument("-model_saved_name", default="")
parser.add_argument(
"--config",
default="./config/nturgbd-cross-view/test_bone.yaml",
help="path to the configuration file",
)
# processor
parser.add_argument("--phase", default="train", help="must be train or test")
# visulize and debug
parser.add_argument("--seed", type=int, default=5, help="random seed for pytorch")
parser.add_argument(
"--log-interval",
type=int,
default=100,
help="the interval for printing messages (#iteration)",
)
parser.add_argument(
"--save-interval",
type=int,
default=2,
help="the interval for storing models (#iteration)",
)
parser.add_argument(
"--eval-interval",
type=int,
default=5,
help="the interval for evaluating models (#iteration)",
)
parser.add_argument(
"--print-log", type=str2bool, default=True, help="print logging or not"
)
parser.add_argument(
"--show-topk",
type=int,
default=[1, 5],
nargs="+",
help="which Top K accuracy will be shown",
)
# feeder
parser.add_argument(
"--feeder", default="feeder.feeder", help="data loader will be used"
)
parser.add_argument(
"--num-worker",
type=int,
default=32,
help="the number of worker for data loader",
)
parser.add_argument(
"--train-feeder-args",
default=dict(),
help="the arguments of data loader for training",
)
parser.add_argument(
"--test-feeder-args",
default=dict(),
help="the arguments of data loader for test",
)
# model
parser.add_argument("--model", default=None, help="the model will be used")
parser.add_argument(
"--model-args", type=dict, default=dict(), help="the arguments of model"
)
parser.add_argument(
"--weights", default=None, help="the weights for network initialization"
)
parser.add_argument(
"--ignore-weights",
type=str,
default=[],
nargs="+",
help="the name of weights which will be ignored in the initialization",
)
# optim
parser.add_argument(
"--base-lr", type=float, default=0.01, help="initial learning rate"
)
parser.add_argument(
"--step",
type=int,
default=[200],
nargs="+",
help="the epoch where optimizer reduce the learning rate",
)
# training
parser.add_argument(
"--device",
type=int,
default=0,
nargs="+",
help="the indexes of GPUs for training or testing",
)
parser.add_argument("--optimizer", default="SGD", help="type of optimizer")
parser.add_argument(
"--nesterov", type=str2bool, default=False, help="use nesterov or not"
)
parser.add_argument(
"--batch-size", type=int, default=256, help="training batch size"
)
parser.add_argument(
"--test-batch-size", type=int, default=256, help="test batch size"
)
parser.add_argument(
"--start-epoch", type=int, default=0, help="start training from which epoch"
)
parser.add_argument(
"--num-epoch", type=int, default=80, help="stop training in which epoch"
)
parser.add_argument(
"--weight-decay", type=float, default=0.0005, help="weight decay for optimizer"
)
# loss
parser.add_argument("--loss", type=str, default="CE", help="loss type(CE or focal)")
parser.add_argument(
"--label_count_path",
default=None,
type=str,
help="Path to label counts (used in loss weighting)",
)
parser.add_argument(
"---beta",
type=float,
default=0.9999,
help="Hyperparameter for Class balanced loss",
)
parser.add_argument(
"--gamma", type=float, default=2.0, help="Hyperparameter for Focal loss"
)
parser.add_argument("--only_train_part", default=False)
parser.add_argument("--only_train_epoch", default=0)
parser.add_argument("--warm_up_epoch", default=10)
parser.add_argument(
"--lambda-mil", default=1.0, help="balancing hyper-parameter of mil branch"
)
parser.add_argument(
"--class-threshold",
type=float,
default=0.1,
help="class threshold for rejection",
)
parser.add_argument(
"--start-threshold",
type=float,
default=0.03,
help="start threshold for action localization",
)
parser.add_argument(
"--end-threshold",
type=float,
default=0.055,
help="end threshold for action localization",
)
parser.add_argument(
"--threshold-interval",
type=float,
default=0.005,
help="threshold interval for action localization",
)
return parser
class Processor:
"""
Processor for Skeleton-based Action Recgnition
"""
def __init__(self, arg):
self.arg = arg
self.save_arg()
if arg.phase == "train":
if not arg.train_feeder_args["debug"]:
if os.path.isdir(arg.model_saved_name):
print("log_dir: ", arg.model_saved_name, "already exist")
# answer = input('delete it? y/n:')
answer = "y"
if answer == "y":
print("Deleting dir...")
shutil.rmtree(arg.model_saved_name)
print("Dir removed: ", arg.model_saved_name)
# input('Refresh the website of tensorboard by pressing any keys')
else:
print("Dir not removed: ", arg.model_saved_name)
self.train_writer = SummaryWriter(
os.path.join(arg.model_saved_name, "train"), "train"
)
self.val_writer = SummaryWriter(
os.path.join(arg.model_saved_name, "val"), "val"
)
else:
self.train_writer = self.val_writer = SummaryWriter(
os.path.join(arg.model_saved_name, "test"), "test"
)
self.global_step = 0
self.load_model()
self.load_optimizer()
self.load_data()
self.lr = self.arg.base_lr
self.best_acc = 0
self.best_per_class_acc = 0
self.loss_nce = torch.nn.BCELoss()
self.my_logger = Logger(
os.path.join(arg.model_saved_name, "log.txt"), title="SWTAL"
)
self.my_logger.set_names(["Step", "cmap"] + [f"map_0.{i}" for i in range(1, 6)]+["avg"])
def load_data(self):
Feeder = import_class(self.arg.feeder)
self.data_loader = dict()
if self.arg.phase == "train":
self.data_loader["train"] = torch.utils.data.DataLoader(
dataset=Feeder(**self.arg.train_feeder_args),
batch_size=self.arg.batch_size,
shuffle=True,
num_workers=self.arg.num_worker,
drop_last=True,
collate_fn=collate_with_padding_multi_joint,
)
self.data_loader["test"] = torch.utils.data.DataLoader(
dataset=Feeder(**self.arg.test_feeder_args),
batch_size=self.arg.test_batch_size,
shuffle=False,
num_workers=self.arg.num_worker,
drop_last=False,
collate_fn=collate_with_padding_multi_joint,
)
def load_model(self):
output_device = (
self.arg.device[0] if type(self.arg.device) is list else self.arg.device
)
self.output_device = output_device
Model = import_class(self.arg.model)
shutil.copy2(inspect.getfile(Model), self.arg.work_dir)
# print(Model)
self.model = Model(**self.arg.model_args).cuda(output_device)
# print(self.model)
self.loss_type = arg.loss
if self.arg.weights:
self.print_log("Load weights from {}.".format(self.arg.weights))
if ".pkl" in self.arg.weights:
with open(self.arg.weights, "r") as f:
weights = pickle.load(f)
else:
weights = torch.load(self.arg.weights)
weights = OrderedDict(
[
[k.split("module.")[-1], v.cuda(output_device)]
for k, v in weights.items()
]
)
weights = remove_prefix_from_state_dict(weights, 'encoder_q.agcn.')
keys = list(weights.keys())
self.arg.ignore_weights = ['data_bn','fc','encoder_q','encoder_k','queue','queue_ptr','value_transform']
for w in self.arg.ignore_weights:
for key in keys:
if w in key:
if weights.pop(key, None) is not None:
continue
# self.print_log(
# "Sucessfully Remove Weights: {}.".format(key)
# )
# else:
# self.print_log("Can Not Remove Weights: {}.".format(key))
try:
self.model.load_state_dict(weights)
except:
state = self.model.state_dict()
diff = list(set(state.keys()).difference(set(weights.keys())))
print("Can not find these weights:")
for d in diff:
print(" " + d)
state.update(weights)
self.model.load_state_dict(state)
if type(self.arg.device) is list:
if len(self.arg.device) > 1:
self.model = nn.DataParallel(
self.model, device_ids=self.arg.device, output_device=output_device
)
def load_optimizer(self):
if self.arg.optimizer == "SGD":
self.optimizer = optim.SGD(
self.model.parameters(),
lr=self.arg.base_lr,
momentum=0.9,
nesterov=self.arg.nesterov,
weight_decay=self.arg.weight_decay,
)
elif self.arg.optimizer == "Adam":
self.optimizer = optim.Adam(
self.model.parameters(),
lr=self.arg.base_lr,
weight_decay=self.arg.weight_decay,
)
else:
raise ValueError()
def save_arg(self):
# save arg
arg_dict = vars(self.arg)
if not os.path.exists(self.arg.work_dir):
os.makedirs(self.arg.work_dir)
with open("{}/config.yaml".format(self.arg.work_dir), "w") as f:
yaml.dump(arg_dict, f)
def adjust_learning_rate(self, epoch):
if self.arg.optimizer == "SGD" or self.arg.optimizer == "Adam":
if epoch < self.arg.warm_up_epoch:
lr = self.arg.base_lr * (epoch + 1) / self.arg.warm_up_epoch
else:
lr = self.arg.base_lr * (
0.1 ** np.sum(epoch >= np.array(self.arg.step))
)
for param_group in self.optimizer.param_groups:
param_group["lr"] = lr
return lr
else:
raise ValueError()
def print_time(self):
localtime = time.asctime(time.localtime(time.time()))
self.print_log("Local current time : " + localtime)
def print_log(self, str, print_time=True):
if print_time:
localtime = time.asctime(time.localtime(time.time()))
str = "[ " + localtime + " ] " + str
print(str)
if self.arg.print_log:
with open("{}/print_log.txt".format(self.arg.work_dir), "a") as f:
print(str, file=f)
def record_time(self):
self.cur_time = time.time()
return self.cur_time
def split_time(self):
split_time = time.time() - self.cur_time
self.record_time()
return split_time
def train(self, epoch, wb_dict, save_model=False):
self.model.train()
self.print_log("Training epoch: {}".format(epoch + 1))
loader = self.data_loader["train"]
self.adjust_learning_rate(epoch)
loss_value, batch_acc = [], []
self.train_writer.add_scalar("epoch", epoch, self.global_step)
self.record_time()
timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
process = tqdm(loader)
if self.arg.only_train_part:
if epoch > self.arg.only_train_epoch:
print("only train part, require grad")
for key, value in self.model.named_parameters():
if "PA" in key:
value.requires_grad = True
else:
print("only train part, do not require grad")
for key, value in self.model.named_parameters():
if "PA" in key:
value.requires_grad = False
vid_preds = []
frm_preds = []
vid_lens = []
labels = []
results = []
indexs = []
'''
Switch to FULL supervision
Dataloader->Feeder -> collate_with_padding_multi_joint
'''
for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
process
):
self.global_step += 1
# get data
data = data.float().cuda(self.output_device)
label = label.cuda(self.output_device)
target = target.cuda(self.output_device)
mask = mask.cuda(self.output_device)
soft_label = soft_label.cuda(self.output_device)
timer["dataloader"] += self.split_time()
''' into one hot'''
ground_truth_flat = target.view(-1)
one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
''' into one hot'''
indexs.extend(index.cpu().numpy().tolist())
ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
# forward
frm_scrs = self.model(data)
if epoch > -1:
frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
# frm_scrs_2_re = rearrange(frm_scrs_2, "n t c -> (n t) c")
# soft_label = rearrange(soft_label, "n t c -> (n t) c")
# loss = cls_mil_loss * 0.1 + mvl_loss(
# frm_scrs, frm_scrs_2, rate=0.2, weight=0.5
# )
loss = cross_entropy_loss(
frm_scrs_re, one_hot_ground_truth
) #+ cross_entropy_loss(frm_scrs_2_re, one_hot_ground_truth)
for i in range(data.size(0)):
frm_scr = frm_scrs[i]
label_ = label[i].cpu().numpy()
mask_ = mask[i].cpu().numpy()
vid_len = mask_.sum()
frm_pred = F.softmax(frm_scr, -1).detach().cpu().numpy()[:vid_len]
# vid_pred = mil_pred[i].detach().cpu().numpy()
vid_pred = 0
results.append(frm_pred)
vid_preds.append(vid_pred)
frm_preds.append(frm_pred)
vid_lens.append(vid_len)
labels.append(label_)
# backward
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
loss_value.append(loss.data.item())
timer["model"] += self.split_time()
vid_preds = np.array(vid_preds)
frm_preds = np.array(frm_preds)
vid_lens = np.array(vid_lens)
labels = np.array(labels)
loader.dataset.label_update(results, indexs)
# cmap = cmAP(vid_preds, labels)
cmap = 0
self.train_writer.add_scalar("acc", cmap, self.global_step)
self.train_writer.add_scalar("loss", np.mean(loss_value), self.global_step)
# statistics
self.lr = self.optimizer.param_groups[0]["lr"]
self.train_writer.add_scalar("lr", self.lr, self.global_step)
timer["statistics"] += self.split_time()
# statistics of time consumption and loss
self.print_log("\tMean training loss: {:.4f}.".format(np.mean(loss_value)))
self.print_log("\tAcc score: {:.3f}%".format(cmap))
# Log
wb_dict["train loss"] = np.mean(loss_value)
wb_dict["train acc"] = cmap
if save_model:
state_dict = self.model.state_dict()
weights = OrderedDict(
[[k.split("module.")[-1], v.cpu()] for k, v in state_dict.items()]
)
torch.save(
weights,
self.arg.model_saved_name + str(epoch) + ".pt",
)
return wb_dict
@torch.no_grad()
def eval(
self,
epoch,
wb_dict,
loader_name=["test"],
):
self.model.eval()
self.print_log("Eval epoch: {}".format(epoch + 1))
vid_preds = []
frm_preds = []
vid_lens = []
labels = []
for ln in loader_name:
loss_value = []
step = 0
process = tqdm(self.data_loader[ln])
for batch_idx, (data, label, target, mask, index, soft_label) in enumerate(
process
):
data = data.float().cuda(self.output_device)
label = label.cuda(self.output_device)
mask = mask.cuda(self.output_device)
ab_labels = torch.cat([label, torch.ones(label.size(0), 1).cuda()], -1)
# forward
frm_scrs = self.model(data)
'''Loc LOSS'''
target = target.cuda(self.output_device)
''' into one hot'''
ground_truth_flat = target.view(-1)
one_hot_ground_truth = F.one_hot(ground_truth_flat, num_classes=5)
''' into one hot'''
frm_scrs_re = rearrange(frm_scrs, "n t c -> (n t) c")
'''Loc LOSS'''
'''Loc LOSS'''
loss = cross_entropy_loss(
frm_scrs_re, one_hot_ground_truth
)
'''Loc LOSS'''
loss_value.append(loss.data.item())
for i in range(data.size(0)):
frm_scr = frm_scrs[i]
label_ = label[i].cpu().numpy()
mask_ = mask[i].cpu().numpy()
vid_len = mask_.sum()
frm_pred = F.softmax(frm_scr, -1).cpu().numpy()[:vid_len]
# vid_pred = vid_pred.cpu().numpy()
vid_pred = 0
vid_preds.append(vid_pred)
frm_preds.append(frm_pred)
vid_lens.append(vid_len)
labels.append(label_)
step += 1
vid_preds = np.array(vid_preds)
frm_preds = np.array(frm_preds)
vid_lens = np.array(vid_lens)
labels = np.array(labels)
# cmap = cmAP(vid_preds, labels)
cmap = 0
score = cmap
loss = np.mean(loss_value)
dmap, iou = dsmAP(
vid_preds,
frm_preds,
vid_lens,
self.arg.test_feeder_args["data_path"],
self.arg,
multi=True,
)
print("Classification map %f" % cmap)
for item in list(zip(iou, dmap)):
print("Detection map @ %f = %f" % (item[0], item[1]))
self.my_logger.append([epoch + 1, cmap] + dmap+[np.mean(dmap)])
wb_dict["val loss"] = loss
wb_dict["val acc"] = score
if score > self.best_acc:
self.best_acc = score
print("Acc score: ", score, " model: ", self.arg.model_saved_name)
if self.arg.phase == "train":
self.val_writer.add_scalar("loss", loss, self.global_step)
self.val_writer.add_scalar("acc", score, self.global_step)
self.print_log(
"\tMean {} loss of {} batches: {}.".format(
ln, len(self.data_loader[ln]), np.mean(loss_value)
)
)
self.print_log("\tAcc score: {:.3f}%".format(score))
return wb_dict
def start(self):
wb_dict = {}
if self.arg.phase == "train":
self.print_log("Parameters:\n{}\n".format(str(vars(self.arg))))
self.global_step = (
self.arg.start_epoch
* len(self.data_loader["train"])
/ self.arg.batch_size
)
for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
epoch + 1 == self.arg.num_epoch
)
wb_dict = {"lr": self.lr}
# Train
wb_dict = self.train(epoch, wb_dict, save_model=save_model)
if epoch%1==0:
# Eval. on val set
wb_dict = self.eval(epoch, wb_dict, loader_name=["test"])
# Log stats. for this epoch
print("Epoch: {0}\nMetrics: {1}".format(epoch, wb_dict))
print(
"best accuracy: ",
self.best_acc,
" model_name: ",
self.arg.model_saved_name,
)
elif self.arg.phase == "test":
if not self.arg.test_feeder_args["debug"]:
wf = self.arg.model_saved_name + "_wrong.txt"
rf = self.arg.model_saved_name + "_right.txt"
else:
wf = rf = None
if self.arg.weights is None:
raise ValueError("Please appoint --weights.")
self.arg.print_log = False
self.print_log("Model: {}.".format(self.arg.model))
self.print_log("Weights: {}.".format(self.arg.weights))
wb_dict = self.eval(
epoch=0,
wb_dict=wb_dict,
loader_name=["test"],
wrong_file=wf,
result_file=rf,
)
print("Inference metrics: ", wb_dict)
self.print_log("Done.\n")
def str2bool(v):
if v.lower() in ("yes", "true", "t", "y", "1"):
return True
elif v.lower() in ("no", "false", "f", "n", "0"):
return False
else:
raise argparse.ArgumentTypeError("Boolean value expected.")
def import_class(name):
components = name.split(".")
mod = __import__(components[0])
for comp in components[1:]:
mod = getattr(mod, comp)
return mod
if __name__ == "__main__":
parser = get_parser()
# load arg form config file
p = parser.parse_args()
if p.config is not None:
with open(p.config, "r") as f:
default_arg = yaml.safe_load(f)
key = vars(p).keys()
for k in default_arg.keys():
if k not in key:
print("WRONG ARG: {}".format(k))
assert k in key
parser.set_defaults(**default_arg)
arg = parser.parse_args()
print("BABEL Action Recognition")
print("Config: ", arg)
init_seed(arg.seed)
processor = Processor(arg)
processor.start()