|
import os |
|
import pprint |
|
from tqdm import tqdm |
|
import numpy as np |
|
|
|
import torch |
|
import torch.nn.functional as F |
|
import torch.backends.cudnn as cudnn |
|
from torch.utils.data import DataLoader |
|
|
|
from config.config import TestOptions |
|
from model.conquer import CONQUER |
|
from data_loader.second_stage_start_end_dataset import StartEndDataset as StartEndEvalDataset |
|
from utils.inference_utils import \ |
|
get_submission_top_n, post_processing_vcmr_nms |
|
from utils.basic_utils import save_json , load_config |
|
from utils.tensor_utils import find_max_triples_from_upper_triangle_product |
|
from standalone_eval.eval import eval_retrieval |
|
from utils.model_utils import move_cuda , start_end_collate |
|
from utils.model_utils import VERY_NEGATIVE_NUMBER |
|
import logging |
|
from time import time |
|
from ndcg_iou_topk import calculate_ndcg_iou |
|
|
|
logger = logging.getLogger(__name__) |
|
logging.basicConfig(format="%(asctime)s.%(msecs)03d:%(levelname)s:%(name)s - %(message)s", |
|
datefmt="%Y-%m-%d %H:%M:%S", |
|
level=logging.INFO) |
|
|
|
def generate_min_max_length_mask(array_shape, min_l, max_l): |
|
""" The last two dimension denotes matrix of upper-triangle with upper-right corner masked, |
|
below is the case for 4x4. |
|
[[0, 1, 1, 0], |
|
[0, 0, 1, 1], |
|
[0, 0, 0, 1], |
|
[0, 0, 0, 0]] |
|
|
|
Args: |
|
array_shape: np.shape??? The last two dimensions should be the same |
|
min_l: int, minimum length of predicted span |
|
max_l: int, maximum length of predicted span |
|
|
|
Returns: |
|
|
|
""" |
|
single_dims = (1, ) * (len(array_shape) - 2) |
|
mask_shape = single_dims + array_shape[-2:] |
|
extra_length_mask_array = np.ones(mask_shape, dtype=np.float32) |
|
mask_triu = np.triu(extra_length_mask_array, k=min_l) |
|
mask_triu_reversed = 1 - np.triu(extra_length_mask_array, k=max_l) |
|
final_prob_mask = mask_triu * mask_triu_reversed |
|
return final_prob_mask |
|
|
|
|
|
def get_svmr_res_from_st_ed_probs_disjoint(svmr_gt_st_probs, svmr_gt_ed_probs, query_metas, video2idx, |
|
clip_length, min_pred_l, max_pred_l, max_before_nms): |
|
""" |
|
Args: |
|
svmr_gt_st_probs: np.ndarray (N_queries, L, L), value range [0, 1] |
|
svmr_gt_ed_probs: |
|
query_metas: |
|
video2idx: |
|
clip_length: float, how long each clip is in seconds |
|
min_pred_l: int, minimum number of clips |
|
max_pred_l: int, maximum number of clips |
|
max_before_nms: get top-max_before_nms predictions for each query |
|
|
|
Returns: |
|
|
|
""" |
|
svmr_res = [] |
|
query_vid_names = [e["vid_name"] for e in query_metas] |
|
|
|
|
|
|
|
_st_ed_scores = np.expand_dims(svmr_gt_st_probs,axis=2) + np.expand_dims(svmr_gt_ed_probs,axis=1) |
|
|
|
_N_q = _st_ed_scores.shape[0] |
|
|
|
_valid_prob_mask = np.logical_not(generate_min_max_length_mask( |
|
_st_ed_scores.shape, min_l=min_pred_l, max_l=max_pred_l).astype(bool)) |
|
|
|
valid_prob_mask = np.tile(_valid_prob_mask,(_N_q, 1, 1)) |
|
|
|
|
|
_st_ed_scores[valid_prob_mask] = VERY_NEGATIVE_NUMBER |
|
|
|
batched_sorted_triples = find_max_triples_from_upper_triangle_product( |
|
_st_ed_scores, top_n=max_before_nms, prob_thd=None) |
|
for i, q_vid_name in tqdm(enumerate(query_vid_names), |
|
desc="[SVMR] Loop over queries to generate predictions", |
|
total=len(query_vid_names)): |
|
q_m = query_metas[i] |
|
video_idx = video2idx[q_vid_name] |
|
_sorted_triples = batched_sorted_triples[i] |
|
_sorted_triples[:, 1] += 1 |
|
_sorted_triples[:, :2] = _sorted_triples[:, :2] * clip_length |
|
|
|
cur_ranked_predictions = [[video_idx, ] + row for row in _sorted_triples.tolist()] |
|
cur_query_pred = dict( |
|
query_id=q_m["query_id"], |
|
desc=q_m["desc"], |
|
predictions=cur_ranked_predictions |
|
) |
|
svmr_res.append(cur_query_pred) |
|
return svmr_res |
|
|
|
|
|
def get_svmr_res_from_st_ed_probs(svmr_gt_st_probs, svmr_gt_ed_probs, query_metas, video2idx, |
|
clip_length, min_pred_l, max_pred_l, max_before_nms): |
|
""" |
|
Args: |
|
svmr_gt_st_probs: np.ndarray (N_queries, L, L), value range [0, 1] |
|
svmr_gt_ed_probs: |
|
query_metas: |
|
video2idx: |
|
clip_length: float, how long each clip is in seconds |
|
min_pred_l: int, minimum number of clips |
|
max_pred_l: int, maximum number of clips |
|
max_before_nms: get top-max_before_nms predictions for each query |
|
|
|
Returns: |
|
|
|
""" |
|
svmr_res = [] |
|
query_vid_names = [e["vid_name"] for e in query_metas] |
|
|
|
|
|
|
|
st_ed_prob_product = np.einsum("bm,bn->bmn", svmr_gt_st_probs, svmr_gt_ed_probs) |
|
|
|
valid_prob_mask = generate_min_max_length_mask(st_ed_prob_product.shape, min_l=min_pred_l, max_l=max_pred_l) |
|
st_ed_prob_product *= valid_prob_mask |
|
|
|
batched_sorted_triples = find_max_triples_from_upper_triangle_product( |
|
st_ed_prob_product, top_n=max_before_nms, prob_thd=None) |
|
for i, q_vid_name in tqdm(enumerate(query_vid_names), |
|
desc="[SVMR] Loop over queries to generate predictions", |
|
total=len(query_vid_names)): |
|
q_m = query_metas[i] |
|
video_idx = video2idx[q_vid_name] |
|
_sorted_triples = batched_sorted_triples[i] |
|
_sorted_triples[:, 1] += 1 |
|
_sorted_triples[:, :2] = _sorted_triples[:, :2] * clip_length |
|
|
|
cur_ranked_predictions = [[video_idx, ] + row for row in _sorted_triples.tolist()] |
|
cur_query_pred = dict( |
|
query_id=q_m["query_id"], |
|
desc=q_m["desc"], |
|
predictions=cur_ranked_predictions |
|
) |
|
svmr_res.append(cur_query_pred) |
|
return svmr_res |
|
|
|
|
|
|
|
def compute_query2ctx_info(model, eval_dataset, opt, |
|
max_before_nms=200, max_n_videos=100, tasks=("SVMR",)): |
|
""" |
|
Use val set to do evaluation, remember to run with torch.no_grad(). |
|
model : CONQUER |
|
eval_dataset : |
|
opt : |
|
max_before_nms : max moment number before non-maximum suppression |
|
tasks: evaluation tasks |
|
|
|
general/exclusive function : r * \hat{b_i} + \hat{e_i} |
|
""" |
|
is_vr = "VR" in tasks |
|
is_vcmr = "VCMR" in tasks |
|
is_svmr = "SVMR" in tasks |
|
|
|
video2idx = eval_dataset.video2idx |
|
|
|
model.eval() |
|
query_eval_loader = DataLoader(eval_dataset, |
|
collate_fn= start_end_collate, |
|
batch_size=opt.eval_query_bsz, |
|
num_workers=opt.num_workers, |
|
shuffle=False, |
|
pin_memory=True) |
|
|
|
n_total_query = len(eval_dataset) |
|
bsz = opt.eval_query_bsz |
|
|
|
if is_vcmr: |
|
flat_st_ed_scores_sorted_indices = np.empty((n_total_query, max_before_nms), dtype=int) |
|
flat_st_ed_sorted_scores = np.zeros((n_total_query, max_before_nms), dtype=np.float32) |
|
|
|
if is_vr : |
|
if opt.use_interal_vr_scores: |
|
sorted_q2c_indices = np.tile(np.arange(max_n_videos, dtype=int),n_total_query).reshape(n_total_query,max_n_videos) |
|
sorted_q2c_scores = np.empty((n_total_query, max_n_videos), dtype=np.float32) |
|
else: |
|
sorted_q2c_indices = np.empty((n_total_query, max_n_videos), dtype=int) |
|
sorted_q2c_scores = np.empty((n_total_query, max_n_videos), dtype=np.float32) |
|
|
|
if is_svmr: |
|
svmr_gt_st_probs = np.zeros((n_total_query, opt.max_ctx_len), dtype=np.float32) |
|
svmr_gt_ed_probs = np.zeros((n_total_query, opt.max_ctx_len), dtype=np.float32) |
|
|
|
query_metas = [] |
|
for idx, batch in tqdm( |
|
enumerate(query_eval_loader), desc="Computing q embedding", total=len(query_eval_loader)): |
|
|
|
_query_metas = batch["meta"] |
|
query_metas.extend(batch["meta"]) |
|
|
|
if opt.device.type == "cuda": |
|
model_inputs = move_cuda(batch["model_inputs"], opt.device) |
|
else: |
|
model_inputs = batch["model_inputs"] |
|
|
|
|
|
video_similarity_score, begin_score_distribution, end_score_distribution = \ |
|
model.get_pred_from_raw_query(model_inputs) |
|
|
|
if is_svmr: |
|
_svmr_st_probs = begin_score_distribution[:, 0] |
|
_svmr_ed_probs = end_score_distribution[:, 0] |
|
|
|
|
|
|
|
_svmr_st_probs = F.softmax(_svmr_st_probs, dim=-1) |
|
_svmr_ed_probs = F.softmax(_svmr_ed_probs, dim=-1) |
|
if opt.debug: |
|
print("svmr_st_probs: ", _svmr_st_probs) |
|
|
|
svmr_gt_st_probs[idx * bsz:(idx + 1) * bsz] = \ |
|
_svmr_st_probs.cpu().numpy() |
|
|
|
svmr_gt_ed_probs[idx * bsz:(idx + 1) * bsz] = \ |
|
_svmr_ed_probs.cpu().numpy() |
|
|
|
_vcmr_st_prob = begin_score_distribution[:, 1:] |
|
_vcmr_ed_prob = end_score_distribution[:, 1:] |
|
|
|
if not (is_vr or is_vcmr): |
|
continue |
|
|
|
if opt.use_interal_vr_scores: |
|
bs = begin_score_distribution.size()[0] |
|
_sorted_q2c_indices = torch.arange(max_n_videos).to(begin_score_distribution.device).repeat(bs,1) |
|
_sorted_q2c_scores = model_inputs["inference_vr_scores"] |
|
if is_vr: |
|
sorted_q2c_scores[idx * bsz:(idx + 1) * bsz] = model_inputs["inference_vr_scores"].cpu().numpy() |
|
else: |
|
video_similarity_score = video_similarity_score[:, 1:] |
|
_query_context_scores = torch.softmax(video_similarity_score,dim=1) |
|
|
|
|
|
_sorted_q2c_scores, _sorted_q2c_indices = \ |
|
torch.topk(_query_context_scores, max_n_videos, dim=1, largest=True) |
|
if is_vr: |
|
sorted_q2c_indices[idx * bsz:(idx + 1) * bsz] = _sorted_q2c_indices.cpu().numpy() |
|
sorted_q2c_scores[idx * bsz:(idx + 1) * bsz] = _sorted_q2c_scores.cpu().numpy() |
|
|
|
|
|
if not is_vcmr: |
|
continue |
|
|
|
|
|
|
|
|
|
_st_probs = F.softmax(_vcmr_st_prob, dim=-1) |
|
_ed_probs = F.softmax(_vcmr_ed_prob, dim=-1) |
|
|
|
|
|
|
|
|
|
row_indices = torch.arange(0, len(_st_probs), device=opt.device).unsqueeze(1) |
|
_st_probs = _st_probs[row_indices, _sorted_q2c_indices] |
|
_ed_probs = _ed_probs[row_indices, _sorted_q2c_indices] |
|
|
|
|
|
|
|
_st_ed_scores = torch.einsum("qvm,qv,qvn->qvmn", _st_probs, _sorted_q2c_scores, _ed_probs) |
|
|
|
valid_prob_mask = generate_min_max_length_mask( |
|
_st_ed_scores.shape, min_l=opt.min_pred_l, max_l=opt.max_pred_l) |
|
|
|
_st_ed_scores *= torch.from_numpy( |
|
valid_prob_mask).to(_st_ed_scores.device) |
|
|
|
_n_q = _st_ed_scores.shape[0] |
|
|
|
|
|
|
|
|
|
_flat_st_ed_scores = _st_ed_scores.reshape(_n_q, -1) |
|
_flat_st_ed_sorted_scores, _flat_st_ed_scores_sorted_indices = \ |
|
torch.sort(_flat_st_ed_scores, dim=1, descending=True) |
|
|
|
|
|
flat_st_ed_sorted_scores[idx * bsz:(idx + 1) * bsz] = \ |
|
_flat_st_ed_sorted_scores[:, :max_before_nms].detach().cpu().numpy() |
|
flat_st_ed_scores_sorted_indices[idx * bsz:(idx + 1) * bsz] = \ |
|
_flat_st_ed_scores_sorted_indices[:, :max_before_nms].detach().cpu().numpy() |
|
|
|
if opt.debug: |
|
break |
|
|
|
|
|
vr_res = [] |
|
if is_vr: |
|
for i, (_sorted_q2c_scores_row, _sorted_q2c_indices_row) in tqdm( |
|
enumerate(zip(sorted_q2c_scores, sorted_q2c_indices)), |
|
desc="[VR] Loop over queries to generate predictions", total=n_total_query): |
|
cur_vr_redictions = [] |
|
query_specific_video_metas = query_metas[i]["sample_vid_name_list"] |
|
for j, (v_score, v_meta_idx) in enumerate(zip(_sorted_q2c_scores_row, _sorted_q2c_indices_row)): |
|
video_idx = video2idx[query_specific_video_metas[v_meta_idx]] |
|
cur_vr_redictions.append([video_idx, 0, 0, float(v_score)]) |
|
cur_query_pred = dict( |
|
query_id=query_metas[i]["query_id"], |
|
desc=query_metas[i]["desc"], |
|
predictions=cur_vr_redictions |
|
) |
|
vr_res.append(cur_query_pred) |
|
|
|
svmr_res = [] |
|
if is_svmr: |
|
svmr_res = get_svmr_res_from_st_ed_probs(svmr_gt_st_probs, svmr_gt_ed_probs, |
|
query_metas, video2idx, |
|
clip_length=opt.clip_length, |
|
min_pred_l=opt.min_pred_l, |
|
max_pred_l=opt.max_pred_l, |
|
max_before_nms=max_before_nms) |
|
|
|
|
|
vcmr_res = [] |
|
if is_vcmr: |
|
for i, (_flat_st_ed_scores_sorted_indices, _flat_st_ed_sorted_scores) in tqdm( |
|
enumerate(zip(flat_st_ed_scores_sorted_indices, flat_st_ed_sorted_scores)), |
|
desc="[VCMR] Loop over queries to generate predictions", total=n_total_query): |
|
|
|
video_meta_indices_local, pred_st_indices, pred_ed_indices = \ |
|
np.unravel_index(_flat_st_ed_scores_sorted_indices, |
|
shape=(max_n_videos, opt.max_ctx_len, opt.max_ctx_len)) |
|
|
|
|
|
|
|
video_meta_indices = sorted_q2c_indices[i, video_meta_indices_local] |
|
|
|
pred_st_in_seconds = pred_st_indices.astype(np.float32) * opt.clip_length |
|
pred_ed_in_seconds = pred_ed_indices.astype(np.float32) * opt.clip_length + opt.clip_length |
|
cur_vcmr_redictions = [] |
|
query_specific_video_metas = query_metas[i]["sample_vid_name_list"] |
|
for j, (v_meta_idx, v_score) in enumerate(zip(video_meta_indices, _flat_st_ed_sorted_scores)): |
|
video_idx = video2idx[query_specific_video_metas[v_meta_idx]] |
|
cur_vcmr_redictions.append( |
|
[video_idx, float(pred_st_in_seconds[j]), float(pred_ed_in_seconds[j]), float(v_score)]) |
|
|
|
cur_query_pred = dict( |
|
query_id=query_metas[i]["query_id"], |
|
desc=query_metas[i]["desc"], |
|
predictions=cur_vcmr_redictions) |
|
vcmr_res.append(cur_query_pred) |
|
|
|
res = dict(VCMR=vcmr_res, SVMR=svmr_res, VR=vr_res) |
|
return {k: v for k, v in res.items() if len(v) != 0} |
|
|
|
|
|
def compute_query2ctx_info_disjoint(model, eval_dataset, opt, |
|
max_before_nms=200, max_n_videos=100, maxtopk = 40): |
|
"""Use val set to do evaluation, remember to run with torch.no_grad(). |
|
model : CONQUER |
|
eval_dataset : |
|
opt : |
|
max_before_nms : max moment number before non-maximum suppression |
|
tasks: evaluation tasks |
|
|
|
disjoint function : b_i + e_i |
|
|
|
""" |
|
video2idx = eval_dataset.video2idx |
|
|
|
model.eval() |
|
query_eval_loader = DataLoader(eval_dataset, collate_fn= start_end_collate, batch_size=opt.eval_query_bsz, |
|
num_workers=opt.num_workers, shuffle=False, pin_memory=True) |
|
|
|
n_total_query = len(eval_dataset) |
|
bsz = opt.eval_query_bsz |
|
|
|
flat_st_ed_scores_sorted_indices = np.empty((n_total_query, max_before_nms), dtype=int) |
|
flat_st_ed_sorted_scores = np.zeros((n_total_query, max_before_nms), dtype=np.float32) |
|
|
|
|
|
query_metas = [] |
|
for idx, batch in tqdm( |
|
enumerate(query_eval_loader), desc="Computing q embedding", total=len(query_eval_loader)): |
|
|
|
query_metas.extend(batch["meta"]) |
|
if opt.device.type == "cuda": |
|
model_inputs = move_cuda(batch["model_inputs"], opt.device) |
|
|
|
else: |
|
model_inputs = batch["model_inputs"] |
|
|
|
_ , begin_score_distribution, end_score_distribution = model.get_pred_from_raw_query(model_inputs) |
|
|
|
begin_score_distribution = begin_score_distribution[:,1:] |
|
end_score_distribution= end_score_distribution[:,1:] |
|
|
|
|
|
|
|
|
|
_st_ed_scores = torch.unsqueeze(begin_score_distribution, 3) + torch.unsqueeze(end_score_distribution, 2) |
|
|
|
_n_q, total_n_videos = _st_ed_scores.size()[:2] |
|
|
|
|
|
|
|
_valid_prob_mask = np.logical_not(generate_min_max_length_mask( |
|
_st_ed_scores.shape, min_l=opt.min_pred_l, max_l=opt.max_pred_l).astype(bool)) |
|
|
|
_valid_prob_mask = torch.from_numpy(_valid_prob_mask).to(_st_ed_scores.device) |
|
|
|
valid_prob_mask = _valid_prob_mask.repeat(_n_q,total_n_videos,1,1) |
|
|
|
|
|
_st_ed_scores[valid_prob_mask] = VERY_NEGATIVE_NUMBER |
|
|
|
|
|
|
|
_flat_st_ed_scores = _st_ed_scores.reshape(_n_q, -1) |
|
_flat_st_ed_sorted_scores, _flat_st_ed_scores_sorted_indices = \ |
|
torch.sort(_flat_st_ed_scores, dim=1, descending=True) |
|
|
|
|
|
flat_st_ed_sorted_scores[idx * bsz:(idx + 1) * bsz] = \ |
|
_flat_st_ed_sorted_scores[:, :max_before_nms].detach().cpu().numpy() |
|
flat_st_ed_scores_sorted_indices[idx * bsz:(idx + 1) * bsz] = \ |
|
_flat_st_ed_scores_sorted_indices[:, :max_before_nms].detach().cpu().numpy() |
|
|
|
|
|
|
|
vcmr_res = {} |
|
for i, (_flat_st_ed_scores_sorted_indices, _flat_st_ed_sorted_scores) in tqdm( |
|
enumerate(zip(flat_st_ed_scores_sorted_indices, flat_st_ed_sorted_scores)), |
|
desc="[VCMR] Loop over queries to generate predictions", total=n_total_query): |
|
|
|
video_meta_indices_local, pred_st_indices, pred_ed_indices = \ |
|
np.unravel_index(_flat_st_ed_scores_sorted_indices, |
|
shape=(total_n_videos, opt.max_ctx_len, opt.max_ctx_len)) |
|
|
|
pred_st_in_seconds = pred_st_indices.astype(np.float32) * opt.clip_length |
|
pred_ed_in_seconds = pred_ed_indices.astype(np.float32) * opt.clip_length + opt.clip_length |
|
cur_vcmr_redictions = [] |
|
query_specific_video_metas = query_metas[i]["sample_vid_name_list"] |
|
for j, (v_meta_idx, v_score) in enumerate(zip(video_meta_indices_local, _flat_st_ed_sorted_scores)): |
|
|
|
cur_vcmr_redictions.append( |
|
{ |
|
"video_name": query_specific_video_metas[v_meta_idx], |
|
"timestamp": [float(pred_st_in_seconds[j]), float(pred_ed_in_seconds[j])], |
|
"model_scores": float(v_score) |
|
} |
|
) |
|
query_id=query_metas[i]["query_id"] |
|
vcmr_res[query_id] = cur_vcmr_redictions[:maxtopk] |
|
return vcmr_res |
|
|
|
def get_eval_res(model, eval_dataset, opt): |
|
"""compute and save query and video proposal embeddings""" |
|
|
|
if opt.similarity_measure == "disjoint": |
|
eval_res = compute_query2ctx_info_disjoint(model, eval_dataset, opt, |
|
max_before_nms=opt.max_before_nms, |
|
max_n_videos=opt.max_vcmr_video) |
|
elif opt.similarity_measure in ["general" , "exclusive" ] : |
|
eval_res = compute_query2ctx_info(model, eval_dataset, opt, |
|
max_before_nms=opt.max_before_nms, |
|
max_n_videos=opt.max_vcmr_video, |
|
tasks=tasks) |
|
|
|
|
|
return eval_res |
|
|
|
|
|
POST_PROCESSING_MMS_FUNC = { |
|
"SVMR": post_processing_vcmr_nms, |
|
"VCMR": post_processing_vcmr_nms |
|
} |
|
|
|
def get_prediction_top_n(list_dict_predictions, top_n): |
|
top_n_res = [] |
|
for e in list_dict_predictions: |
|
e["predictions"] = e["predictions"][:top_n] |
|
top_n_res.append(e) |
|
return top_n_res |
|
|
|
|
|
def eval_epoch(model, eval_dataset, opt, max_after_nms, iou_thds, topks): |
|
|
|
pred_data = get_eval_res(model, eval_dataset, opt) |
|
|
|
|
|
|
|
gt_data = eval_dataset.ground_truth |
|
average_ndcg = calculate_ndcg_iou(gt_data, pred_data, iou_thds, topks) |
|
return average_ndcg, pred_data |
|
|
|
|
|
|
|
def setup_model(opt): |
|
"""Load model from checkpoint and move to specified device""" |
|
checkpoint = torch.load(opt.ckpt_filepath) |
|
loaded_model_cfg = checkpoint["model_cfg"] |
|
|
|
model = CONQUER(loaded_model_cfg, |
|
visual_dim=opt.visual_dim, |
|
text_dim=opt.text_dim, |
|
query_dim=opt.query_dim, |
|
hidden_dim=opt.hidden_dim, |
|
video_len=opt.max_ctx_len, |
|
ctx_mode=opt.ctx_mode, |
|
no_output_moe_weight=opt.no_output_moe_weight, |
|
similarity_measure=opt.similarity_measure, |
|
use_debug = opt.debug) |
|
model.load_state_dict(checkpoint["model"]) |
|
|
|
logger.info("Loaded model saved at epoch {} from checkpoint: {}" |
|
.format(checkpoint["epoch"], opt.ckpt_filepath)) |
|
|
|
if opt.device.type == "cuda": |
|
logger.info("CUDA enabled.") |
|
model.to(opt.device) |
|
assert len(opt.device_ids) == 1 |
|
|
|
|
|
|
|
return model |
|
|
|
|
|
def start_inference(): |
|
logger.info("Setup config, data and model...") |
|
opt = TestOptions().parse() |
|
cudnn.benchmark = False |
|
cudnn.deterministic = True |
|
|
|
data_config = load_config(opt.dataset_config) |
|
|
|
eval_dataset = StartEndEvalDataset( |
|
config = data_config, |
|
max_ctx_len=opt.max_ctx_len, |
|
max_desc_len= opt.max_desc_len, |
|
clip_length = opt.clip_length, |
|
ctx_mode = opt.ctx_mode, |
|
mode = opt.eval_split_name, |
|
data_ratio = opt.data_ratio, |
|
is_eval = True, |
|
inference_top_k = opt.max_vcmr_video) |
|
|
|
postfix = "_hero" |
|
model = setup_model(opt) |
|
save_submission_filename = "inference_{}_{}_{}_predictions_{}{}.json".format( |
|
opt.dset_name, opt.eval_split_name, opt.eval_id, "_".join(opt.tasks),postfix) |
|
print(save_submission_filename) |
|
logger.info("Starting inference...") |
|
with torch.no_grad(): |
|
metrics_no_nms, metrics_nms, latest_file_paths = \ |
|
eval_epoch(model, eval_dataset, opt, save_submission_filename, |
|
tasks=opt.tasks, max_after_nms=100) |
|
logger.info("metrics_no_nms \n{}".format(pprint.pformat(metrics_no_nms, indent=4))) |
|
logger.info("metrics_nms \n{}".format(pprint.pformat(metrics_nms, indent=4))) |
|
|
|
|
|
if __name__ == '__main__': |
|
start_inference() |
|
|