desco / tools /test_grounding_net.py
zdou0830's picture
desco
749745d
raw
history blame
11.7 kB
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# Set up custom environment before nearly anything else is imported
# NOTE: this should be the first import (no not reorder)
from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip
import argparse
import os
import torch
from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.data import make_data_loader
from maskrcnn_benchmark.modeling.detector import build_detection_model
from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
from maskrcnn_benchmark.utils.collect_env import collect_env_info
from maskrcnn_benchmark.utils.comm import synchronize, get_rank, is_main_process
from maskrcnn_benchmark.utils.logger import setup_logger
from maskrcnn_benchmark.utils.miscellaneous import mkdir
from maskrcnn_benchmark.utils.stats import get_model_complexity_info
import os
import functools
import io
import os
import datetime
import wandb
import torch
import torch.distributed as dist
import pdb
from pprint import pprint
def init_distributed_mode(args):
"""Initialize distributed training, if appropriate"""
if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ["LOCAL_RANK"])
elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count()
else:
print("Not using distributed mode")
args.distributed = False
return
# args.distributed = True
torch.cuda.set_device(args.gpu)
args.dist_backend = "nccl"
print("| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True)
dist.init_process_group(
backend=args.dist_backend,
init_method=args.dist_url,
world_size=args.world_size,
rank=args.rank,
timeout=datetime.timedelta(0, 72000),
)
dist.barrier()
setup_for_distributed(args.rank == 0)
def setup_for_distributed(is_master):
"""
This function disables printing when not in master process
"""
import builtins as __builtin__
builtin_print = __builtin__.print
def print(*args, **kwargs):
force = kwargs.pop("force", False)
if is_master or force:
builtin_print(*args, **kwargs)
__builtin__.print = print
def main():
parser = argparse.ArgumentParser(description="PyTorch Detection to Grounding Inference")
parser.add_argument(
"--config-file",
default="configs/grounding/e2e_dyhead_SwinT_S_FPN_1x_od_grounding_eval.yaml",
metavar="FILE",
help="path to config file",
)
parser.add_argument(
"--weight",
default=None,
metavar="FILE",
help="path to config file",
)
parser.add_argument("--local_rank", type=int, default=0)
parser.add_argument(
"opts", help="Modify config options using the command-line", default=None, nargs=argparse.REMAINDER
)
parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
parser.add_argument("--dist-url", default="env://", help="url used to set up distributed training")
parser.add_argument("--task_config", default=None)
parser.add_argument("--eval_negative", action="store_true")
parser.add_argument("--wandb_project_name", default="haroldli/language_det_eval")
args = parser.parse_args()
num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
distributed = num_gpus > 1
if distributed:
# torch.cuda.set_device(args.local_rank)
# torch.distributed.init_process_group(
# backend="nccl", init_method="env://"
# )
init_distributed_mode(args)
print("Passed distributed init")
cfg.local_rank = args.local_rank
cfg.num_gpus = num_gpus
cfg.merge_from_file(args.config_file)
cfg.merge_from_list(args.opts)
cfg.freeze()
log_dir = cfg.OUTPUT_DIR
if args.weight:
log_dir = os.path.join(log_dir, "eval", os.path.splitext(os.path.basename(args.weight))[0])
if log_dir:
mkdir(log_dir)
logger = setup_logger("maskrcnn_benchmark", log_dir, get_rank())
logger.info(args)
logger.info("Using {} GPUs".format(num_gpus))
logger.info(cfg)
# logger.info("Collecting env info (might take some time)")
# logger.info("\n" + collect_env_info())
model = build_detection_model(cfg)
model.to(cfg.MODEL.DEVICE)
# we currently disable this
# params, flops = get_model_complexity_info(model,
# (3, cfg.INPUT.MAX_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST),
# input_constructor=lambda x: {'images': [torch.rand(x).cuda()]})
# print("FLOPs: {}, #Parameter: {}".format(params, flops))
checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.OUTPUT_DIR)
if args.weight:
_ = checkpointer.load(args.weight, force=True)
else:
_ = checkpointer.load(cfg.MODEL.WEIGHT)
if args.weight:
weight_iter = os.path.splitext(os.path.basename(args.weight))[0].split("_")[-1]
try:
weight_iter = int(weight_iter)
except:
weight_iter = 1
else:
weight_iter = 1
# get the wandb name
train_wandb_name = os.path.basename(cfg.OUTPUT_DIR)
eval_wandb_name = train_wandb_name + "_eval" + "_Fixed{}_Chunk{}".format(not cfg.DATASETS.LVIS_USE_NORMAL_AP, cfg.TEST.CHUNKED_EVALUATION)
if args.eval_negative:
from maskrcnn_benchmark.engine.inference_contrastive import inference
inference_function = inference
else:
from maskrcnn_benchmark.engine.inference import inference
inference_function = inference
if is_main_process() and train_wandb_name != "__test__":
api = wandb.Api()
runs = api.runs(args.wandb_project_name)
matched_run = None
history = []
exclude_keys = ['_runtime', '_timestamp']
for run in runs:
if run.name == eval_wandb_name and str(run._state) == "finished":
print("run found", run.name)
print(run.summary)
matched_run = run
run_his = matched_run.scan_history()
#print([len(i) for i in run_his])
for stat in run_his:
stat_i = {k: v for k, v in stat.items() if k not in exclude_keys and v is not None}
if len(stat_i) > 1:
history.append(stat_i)
#matched_run.delete()
break
wandb_run = wandb.init(
project = 'language_det_eval',
job_type = 'evaluate',
name = eval_wandb_name,
)
#pprint(history)
# exclude_keys = ['_step', '_runtime', '_timestamp']
# for stat in history:
# wandb.log(
# {k: v for k, v in stat.items() if k not in exclude_keys},
# step = stat['_step'],
# )
else:
wandb_run = None
history = None
print("weight_iter: ", weight_iter)
print("train_wandb_name: ", train_wandb_name)
print("eval_wandb_name: ", eval_wandb_name)
if args.task_config:
all_task_configs = args.task_config.split(",")
for task_config in all_task_configs:
cfg_ = cfg.clone()
cfg_.defrost()
cfg_.merge_from_file(task_config)
cfg_.merge_from_list(args.opts)
iou_types = ("bbox",)
if cfg_.MODEL.MASK_ON:
iou_types = iou_types + ("segm",)
if cfg_.MODEL.KEYPOINT_ON:
iou_types = iou_types + ("keypoints",)
dataset_names = cfg_.DATASETS.TEST
if isinstance(dataset_names[0], (list, tuple)):
dataset_names = [dataset for group in dataset_names for dataset in group]
output_folders = [None] * len(dataset_names)
if log_dir:
for idx, dataset_name in enumerate(dataset_names):
output_folder = os.path.join(log_dir, "inference", dataset_name)
mkdir(output_folder)
output_folders[idx] = output_folder
data_loaders_val = make_data_loader(cfg_, is_train=False, is_distributed=distributed)
for output_folder, dataset_name, data_loader_val in zip(
output_folders, dataset_names, data_loaders_val
):
inference_function(
model,
data_loader_val,
dataset_name=dataset_name,
iou_types=iou_types,
box_only=cfg_.MODEL.RPN_ONLY
and (cfg_.MODEL.RPN_ARCHITECTURE == "RPN" or cfg_.DATASETS.CLASS_AGNOSTIC),
device=cfg_.MODEL.DEVICE,
expected_results=cfg_.TEST.EXPECTED_RESULTS,
expected_results_sigma_tol=cfg_.TEST.EXPECTED_RESULTS_SIGMA_TOL,
output_folder=output_folder,
cfg=cfg_,
wandb_run=wandb_run,
weight_iter=weight_iter,
history=history,
)
synchronize()
# logger.info("FLOPs: {}, #Parameter: {}".format(params, flops))
else:
iou_types = ("bbox",)
if cfg.MODEL.MASK_ON:
iou_types = iou_types + ("segm",)
if cfg.MODEL.KEYPOINT_ON:
iou_types = iou_types + ("keypoints",)
dataset_names = cfg.DATASETS.TEST
if isinstance(dataset_names[0], (list, tuple)):
dataset_names = [dataset for group in dataset_names for dataset in group]
output_folders = [None] * len(dataset_names)
if log_dir:
for idx, dataset_name in enumerate(dataset_names):
output_folder = os.path.join(log_dir, "inference", dataset_name)
mkdir(output_folder)
output_folders[idx] = output_folder
data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
inference_function(
model,
data_loader_val,
dataset_name=dataset_name,
iou_types=iou_types,
box_only=cfg.MODEL.RPN_ONLY
and (cfg.MODEL.RPN_ARCHITECTURE == "RPN" or cfg.DATASETS.CLASS_AGNOSTIC),
device=cfg.MODEL.DEVICE,
expected_results=cfg.TEST.EXPECTED_RESULTS,
expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
output_folder=output_folder,
cfg=cfg,
wandb_run=wandb_run,
weight_iter=weight_iter,
history=history
)
synchronize()
# logger.info("FLOPs: {}, #Parameter: {}".format(params, flops))
if __name__ == "__main__":
main()