Spaces:
Running
Running
#!/usr/bin/env python3 -u | |
# Copyright 2022 The OFA-Sys Team. | |
# All rights reserved. | |
# This source code is licensed under the Apache 2.0 license | |
# found in the LICENSE file in the root directory. | |
import logging | |
import os | |
import sys | |
import numpy as np | |
import torch | |
from fairseq import distributed_utils, options, tasks, utils | |
from fairseq.dataclass.utils import convert_namespace_to_omegaconf | |
from fairseq.logging import progress_bar | |
from fairseq.utils import reset_logging | |
from omegaconf import DictConfig | |
from utils import checkpoint_utils | |
from utils.eval_utils import eval_step, merge_results | |
logging.basicConfig( | |
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s", | |
datefmt="%Y-%m-%d %H:%M:%S", | |
level=os.environ.get("LOGLEVEL", "INFO").upper(), | |
stream=sys.stdout, | |
) | |
logger = logging.getLogger("ofa.evaluate") | |
def apply_half(t): | |
if t.dtype is torch.float32: | |
return t.to(dtype=torch.half) | |
return t | |
def main(cfg: DictConfig, **kwargs): | |
utils.import_user_module(cfg.common) | |
reset_logging() | |
logger.info(cfg) | |
assert ( | |
cfg.dataset.max_tokens is not None or cfg.dataset.batch_size is not None | |
), "Must specify batch size either with --max-tokens or --batch-size" | |
# Fix seed for stochastic decoding | |
if cfg.common.seed is not None and not cfg.generation.no_seed_provided: | |
np.random.seed(cfg.common.seed) | |
utils.set_torch_seed(cfg.common.seed) | |
use_fp16 = cfg.common.fp16 | |
use_cuda = torch.cuda.is_available() and not cfg.common.cpu | |
if use_cuda: | |
torch.cuda.set_device(cfg.distributed_training.device_id) | |
# Load ensemble | |
overrides = eval(cfg.common_eval.model_overrides) | |
# Deal with beam-search / all-candidate VQA eval | |
if cfg.task._name == "vqa_gen": | |
overrides['val_inference_type'] = "beamsearch" if kwargs['beam_search_vqa_eval'] else "allcand" | |
logger.info("loading model(s) from {}".format(cfg.common_eval.path)) | |
if kwargs["zero_shot"]: | |
task = tasks.setup_task(cfg.task) | |
models, saved_cfg = checkpoint_utils.load_model_ensemble( | |
utils.split_paths(cfg.common_eval.path), | |
arg_overrides=overrides, | |
task=task, | |
suffix=cfg.checkpoint.checkpoint_suffix, | |
strict=(cfg.checkpoint.checkpoint_shard_count == 1), | |
num_shards=cfg.checkpoint.checkpoint_shard_count, | |
) | |
else: | |
models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( | |
utils.split_paths(cfg.common_eval.path), | |
arg_overrides=overrides, | |
suffix=cfg.checkpoint.checkpoint_suffix, | |
strict=(cfg.checkpoint.checkpoint_shard_count == 1), | |
num_shards=cfg.checkpoint.checkpoint_shard_count, | |
) | |
# loading the dataset should happen after the checkpoint has been loaded so we can give it the saved task config | |
task.load_dataset(cfg.dataset.gen_subset, task_cfg=saved_cfg.task) | |
# Move models to GPU | |
for model, ckpt_path in zip(models, utils.split_paths(cfg.common_eval.path)): | |
if kwargs['ema_eval']: | |
logger.info("loading EMA weights from {}".format(ckpt_path)) | |
model.load_state_dict(checkpoint_utils.load_ema_from_checkpoint(ckpt_path)['model']) | |
model.eval() | |
if use_fp16: | |
model.half() | |
if use_cuda and not cfg.distributed_training.pipeline_model_parallel: | |
model.cuda() | |
model.prepare_for_inference_(cfg) | |
# Load dataset (possibly sharded) | |
itr = task.get_batch_iterator( | |
dataset=task.dataset(cfg.dataset.gen_subset), | |
max_tokens=cfg.dataset.max_tokens, | |
max_sentences=cfg.dataset.batch_size, | |
max_positions=utils.resolve_max_positions( | |
task.max_positions(), *[m.max_positions() for m in models] | |
), | |
ignore_invalid_inputs=cfg.dataset.skip_invalid_size_inputs_valid_test, | |
required_batch_size_multiple=cfg.dataset.required_batch_size_multiple, | |
seed=cfg.common.seed, | |
num_shards=cfg.distributed_training.distributed_world_size, | |
shard_id=cfg.distributed_training.distributed_rank, | |
num_workers=cfg.dataset.num_workers, | |
data_buffer_size=cfg.dataset.data_buffer_size, | |
).next_epoch_itr(shuffle=False) | |
progress = progress_bar.progress_bar( | |
itr, | |
log_format=cfg.common.log_format, | |
log_interval=cfg.common.log_interval, | |
default_log_format=("tqdm" if not cfg.common.no_progress_bar else "simple"), | |
) | |
# Initialize generator | |
generator = task.build_generator(models, cfg.generation) | |
# for sample in progress: | |
# if "net_input" not in sample: | |
# continue | |
# sample = utils.move_to_cuda(sample) if use_cuda else sample | |
# sample = utils.apply_to_sample(apply_half, sample) if cfg.common.fp16 else sample | |
# with torch.no_grad(): | |
# eval_step(task, generator, models, sample, **kwargs) | |
# progress.log({"sentences": sample["nsentences"]}) | |
# | |
# merge_results(task, cfg, logger, kwargs['result_dir']) | |
results = [] | |
prec_list = [.5, .6, .7, .8, .9] | |
prec_score_sum = [torch.FloatTensor([0]).cuda() for _ in prec_list] | |
f_score_sum = torch.FloatTensor([0]).cuda() | |
ap_det_score_sum = torch.FloatTensor([0]).cuda() | |
score_sum = torch.FloatTensor([0]).cuda() | |
score_cnt = torch.FloatTensor([0]).cuda() | |
cum_I_sum = torch.FloatTensor([0]).cuda() | |
cum_U_sum = torch.FloatTensor([0]).cuda() | |
for sample in progress: | |
if "net_input" not in sample: | |
continue | |
sample = utils.move_to_cuda(sample) if use_cuda else sample | |
sample = utils.apply_to_sample(apply_half, sample) if cfg.common.fp16 else sample | |
with torch.no_grad(): | |
result, scores, f_scores, ap_scores, cum_I, cum_U = eval_step(task, generator, models, sample, **kwargs) | |
results += result | |
for prec_score, prec in zip(prec_score_sum, prec_list): | |
prec_score += sum(scores >= prec) if scores is not None else 0 | |
cum_I_sum += sum(cum_I) if scores is not None else 0 | |
cum_U_sum += sum(cum_U) if scores is not None else 0 | |
score_sum += sum(scores) if scores is not None else 0 | |
f_score_sum += sum(f_scores) if scores is not None else 0 | |
ap_det_score_sum += sum(ap_scores) if scores is not None else 0 | |
score_cnt += len(scores) if scores is not None else 0 | |
progress.log({"sentences": sample["nsentences"]}) | |
merge_results(task, cfg, logger, score_cnt, score_sum, f_score_sum, ap_det_score_sum,prec_score_sum, cum_I_sum, cum_U_sum, results) | |
def cli_main(): | |
parser = options.get_generation_parser() | |
parser.add_argument("--ema-eval", action='store_true', help="Use EMA weights to make evaluation.") | |
parser.add_argument("--beam-search-vqa-eval", action='store_true', help="Use beam search for vqa evaluation (faster inference speed but sub-optimal result), if not specified, we compute scores for each answer in the candidate set, which is slower but can obtain best result.") | |
parser.add_argument("--zero-shot", action='store_true') | |
parser.add_argument("--vis_dir", type=str, default=None) | |
parser.add_argument("--result_dir", type=str, default=None) | |
parser.add_argument("--vis", action='store_true', default=False) | |
args = options.parse_args_and_arch(parser) | |
cfg = convert_namespace_to_omegaconf(args) | |
if args.result_dir is None: | |
args.result_dir = args.vis_dir | |
distributed_utils.call_main( | |
cfg, main, ema_eval=args.ema_eval, beam_search_vqa_eval=args.beam_search_vqa_eval, zero_shot=args.zero_shot, | |
vis_dir=args.vis_dir, vis=args.vis, result_dir=args.result_dir | |
) | |
if __name__ == "__main__": | |
cli_main() | |