Spaces:
Sleeping
Sleeping
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. | |
# Set up custom environment before nearly anything else is imported | |
# NOTE: this should be the first import (no not reorder) | |
from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip | |
import argparse | |
import os | |
import torch | |
from maskrcnn_benchmark.config import cfg | |
from maskrcnn_benchmark.data import make_data_loader | |
from maskrcnn_benchmark.modeling.detector import build_detection_model | |
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer | |
from maskrcnn_benchmark.utils.collect_env import collect_env_info | |
from maskrcnn_benchmark.utils.comm import synchronize, get_rank, is_main_process | |
from maskrcnn_benchmark.utils.logger import setup_logger | |
from maskrcnn_benchmark.utils.miscellaneous import mkdir | |
from maskrcnn_benchmark.utils.stats import get_model_complexity_info | |
import os | |
import functools | |
import io | |
import os | |
import datetime | |
import wandb | |
import torch | |
import torch.distributed as dist | |
import pdb | |
from pprint import pprint | |
def init_distributed_mode(args): | |
"""Initialize distributed training, if appropriate""" | |
if "RANK" in os.environ and "WORLD_SIZE" in os.environ: | |
args.rank = int(os.environ["RANK"]) | |
args.world_size = int(os.environ["WORLD_SIZE"]) | |
args.gpu = int(os.environ["LOCAL_RANK"]) | |
elif "SLURM_PROCID" in os.environ: | |
args.rank = int(os.environ["SLURM_PROCID"]) | |
args.gpu = args.rank % torch.cuda.device_count() | |
else: | |
print("Not using distributed mode") | |
args.distributed = False | |
return | |
# args.distributed = True | |
torch.cuda.set_device(args.gpu) | |
args.dist_backend = "nccl" | |
print("| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True) | |
dist.init_process_group( | |
backend=args.dist_backend, | |
init_method=args.dist_url, | |
world_size=args.world_size, | |
rank=args.rank, | |
timeout=datetime.timedelta(0, 72000), | |
) | |
dist.barrier() | |
setup_for_distributed(args.rank == 0) | |
def setup_for_distributed(is_master): | |
""" | |
This function disables printing when not in master process | |
""" | |
import builtins as __builtin__ | |
builtin_print = __builtin__.print | |
def print(*args, **kwargs): | |
force = kwargs.pop("force", False) | |
if is_master or force: | |
builtin_print(*args, **kwargs) | |
__builtin__.print = print | |
def main(): | |
parser = argparse.ArgumentParser(description="PyTorch Detection to Grounding Inference") | |
parser.add_argument( | |
"--config-file", | |
default="configs/grounding/e2e_dyhead_SwinT_S_FPN_1x_od_grounding_eval.yaml", | |
metavar="FILE", | |
help="path to config file", | |
) | |
parser.add_argument( | |
"--weight", | |
default=None, | |
metavar="FILE", | |
help="path to config file", | |
) | |
parser.add_argument("--local_rank", type=int, default=0) | |
parser.add_argument( | |
"opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER | |
) | |
parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") | |
parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training") | |
parser.add_argument("--task_config", default=None) | |
parser.add_argument("--eval_negative", action="store_true") | |
parser.add_argument("--wandb_project_name", default="haroldli/language_det_eval") | |
args = parser.parse_args() | |
num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 | |
distributed = num_gpus > 1 | |
if distributed: | |
# torch.cuda.set_device(args.local_rank) | |
# torch.distributed.init_process_group( | |
# backend="nccl", init_method="env://" | |
# ) | |
init_distributed_mode(args) | |
print("Passed distributed init") | |
cfg.local_rank = args.local_rank | |
cfg.num_gpus = num_gpus | |
cfg.merge_from_file(args.config_file) | |
cfg.merge_from_list(args.opts) | |
cfg.freeze() | |
log_dir = cfg.OUTPUT_DIR | |
if args.weight: | |
log_dir = os.path.join(log_dir, "eval", os.path.splitext(os.path.basename(args.weight))[0]) | |
if log_dir: | |
mkdir(log_dir) | |
logger = setup_logger("maskrcnn_benchmark", log_dir, get_rank()) | |
logger.info(args) | |
logger.info("Using {} GPUs".format(num_gpus)) | |
logger.info(cfg) | |
# logger.info("Collecting env info (might take some time)") | |
# logger.info("\n" + collect_env_info()) | |
model = build_detection_model(cfg) | |
model.to(cfg.MODEL.DEVICE) | |
# we currently disable this | |
# params, flops = get_model_complexity_info(model, | |
# (3, cfg.INPUT.MAX_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST), | |
# input_constructor=lambda x: {'images': [torch.rand(x).cuda()]}) | |
# print("FLOPs: {}, #Parameter: {}".format(params, flops)) | |
checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR) | |
if args.weight: | |
_ = checkpointer.load(args.weight, force=True) | |
else: | |
_ = checkpointer.load(cfg.MODEL.WEIGHT) | |
if args.weight: | |
weight_iter = os.path.splitext(os.path.basename(args.weight))[0].split("_")[-1] | |
try: | |
weight_iter = int(weight_iter) | |
except: | |
weight_iter = 1 | |
else: | |
weight_iter = 1 | |
# get the wandb name | |
train_wandb_name = os.path.basename(cfg.OUTPUT_DIR) | |
eval_wandb_name = train_wandb_name + "_eval" + "_Fixed{}_Chunk{}".format(not cfg.DATASETS.LVIS_USE_NORMAL_AP, cfg.TEST.CHUNKED_EVALUATION) | |
if args.eval_negative: | |
from maskrcnn_benchmark.engine.inference_contrastive import inference | |
inference_function = inference | |
else: | |
from maskrcnn_benchmark.engine.inference import inference | |
inference_function = inference | |
if is_main_process() and train_wandb_name != "__test__": | |
api = wandb.Api() | |
runs = api.runs(args.wandb_project_name) | |
matched_run = None | |
history = [] | |
exclude_keys = ['_runtime', '_timestamp'] | |
for run in runs: | |
if run.name == eval_wandb_name and str(run._state) == "finished": | |
print("run found", run.name) | |
print(run.summary) | |
matched_run = run | |
run_his = matched_run.scan_history() | |
#print([len(i) for i in run_his]) | |
for stat in run_his: | |
stat_i = {k: v for k, v in stat.items() if k not in exclude_keys and v is not None} | |
if len(stat_i) > 1: | |
history.append(stat_i) | |
#matched_run.delete() | |
break | |
wandb_run = wandb.init( | |
project = 'language_det_eval', | |
job_type = 'evaluate', | |
name = eval_wandb_name, | |
) | |
#pprint(history) | |
# exclude_keys = ['_step', '_runtime', '_timestamp'] | |
# for stat in history: | |
# wandb.log( | |
# {k: v for k, v in stat.items() if k not in exclude_keys}, | |
# step = stat['_step'], | |
# ) | |
else: | |
wandb_run = None | |
history = None | |
print("weight_iter: ", weight_iter) | |
print("train_wandb_name: ", train_wandb_name) | |
print("eval_wandb_name: ", eval_wandb_name) | |
if args.task_config: | |
all_task_configs = args.task_config.split(",") | |
for task_config in all_task_configs: | |
cfg_ = cfg.clone() | |
cfg_.defrost() | |
cfg_.merge_from_file(task_config) | |
cfg_.merge_from_list(args.opts) | |
iou_types = ("bbox",) | |
if cfg_.MODEL.MASK_ON: | |
iou_types = iou_types + ("segm",) | |
if cfg_.MODEL.KEYPOINT_ON: | |
iou_types = iou_types + ("keypoints",) | |
dataset_names = cfg_.DATASETS.TEST | |
if isinstance(dataset_names[0], (list, tuple)): | |
dataset_names = [dataset for group in dataset_names for dataset in group] | |
output_folders = [None] * len(dataset_names) | |
if log_dir: | |
for idx, dataset_name in enumerate(dataset_names): | |
output_folder = os.path.join(log_dir, "inference", dataset_name) | |
mkdir(output_folder) | |
output_folders[idx] = output_folder | |
data_loaders_val = make_data_loader(cfg_, is_train=False, is_distributed=distributed) | |
for output_folder, dataset_name, data_loader_val in zip( | |
output_folders, dataset_names, data_loaders_val | |
): | |
inference_function( | |
model, | |
data_loader_val, | |
dataset_name=dataset_name, | |
iou_types=iou_types, | |
box_only=cfg_.MODEL.RPN_ONLY | |
and (cfg_.MODEL.RPN_ARCHITECTURE == "RPN" or cfg_.DATASETS.CLASS_AGNOSTIC), | |
device=cfg_.MODEL.DEVICE, | |
expected_results=cfg_.TEST.EXPECTED_RESULTS, | |
expected_results_sigma_tol=cfg_.TEST.EXPECTED_RESULTS_SIGMA_TOL, | |
output_folder=output_folder, | |
cfg=cfg_, | |
wandb_run=wandb_run, | |
weight_iter=weight_iter, | |
history=history, | |
) | |
synchronize() | |
# logger.info("FLOPs: {}, #Parameter: {}".format(params, flops)) | |
else: | |
iou_types = ("bbox",) | |
if cfg.MODEL.MASK_ON: | |
iou_types = iou_types + ("segm",) | |
if cfg.MODEL.KEYPOINT_ON: | |
iou_types = iou_types + ("keypoints",) | |
dataset_names = cfg.DATASETS.TEST | |
if isinstance(dataset_names[0], (list, tuple)): | |
dataset_names = [dataset for group in dataset_names for dataset in group] | |
output_folders = [None] * len(dataset_names) | |
if log_dir: | |
for idx, dataset_name in enumerate(dataset_names): | |
output_folder = os.path.join(log_dir, "inference", dataset_name) | |
mkdir(output_folder) | |
output_folders[idx] = output_folder | |
data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) | |
for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): | |
inference_function( | |
model, | |
data_loader_val, | |
dataset_name=dataset_name, | |
iou_types=iou_types, | |
box_only=cfg.MODEL.RPN_ONLY | |
and (cfg.MODEL.RPN_ARCHITECTURE == "RPN" or cfg.DATASETS.CLASS_AGNOSTIC), | |
device=cfg.MODEL.DEVICE, | |
expected_results=cfg.TEST.EXPECTED_RESULTS, | |
expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, | |
output_folder=output_folder, | |
cfg=cfg, | |
wandb_run=wandb_run, | |
weight_iter=weight_iter, | |
history=history | |
) | |
synchronize() | |
# logger.info("FLOPs: {}, #Parameter: {}".format(params, flops)) | |
if __name__ == "__main__": | |
main() | |