| | from __future__ import absolute_import |
| | from __future__ import division |
| | from __future__ import print_function |
| |
|
| | import os |
| | import sys |
| | import collections |
| | import torch |
| | import numpy as np |
| | import json |
| | from collections import OrderedDict |
| | from tqdm import tqdm |
| | from os.path import dirname, abspath |
| |
|
| | pdvc_dir = dirname(abspath(__file__)) |
| | sys.path.insert(0, pdvc_dir) |
| | sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3')) |
| | sys.path.insert(0, os.path.join(pdvc_dir, 'densevid_eval3/SODA')) |
| |
|
| |
|
| | from densevid_eval3.eval_soda import eval_soda |
| | from densevid_eval3.eval_para import eval_para |
| | from densevid_eval3.eval_dvc import eval_dvc |
| |
|
| | def calculate_avg_proposal_num(json_path): |
| | data = json.load(open(json_path)) |
| | return np.array([len(v) for v in data['results'].values()]).mean() |
| |
|
| | def convert_tapjson_to_dvcjson(tap_json, dvc_json): |
| | data = json.load(open(tap_json, 'r')) |
| | data['version'] = "VERSION 1.0" |
| | data['external_data'] = {'used:': True, 'details': "C3D pretrained on Sports-1M"} |
| |
|
| | all_names = list(data['results'].keys()) |
| | for video_name in all_names: |
| | for p_info in data['results'][video_name]: |
| | p_info['timestamp'] = p_info.pop('segment') |
| | p_info['proposal_score'] = p_info.pop('score') |
| | p_info['sentence_score'] = p_info.pop('sentence_score', 0) |
| | data['results']["v_" + video_name] = data['results'].pop(video_name) |
| | json.dump(data, open(dvc_json, 'w')) |
| |
|
| |
|
| | def convert_dvcjson_to_tapjson(dvc_json, tap_json): |
| | data = json.load(open(dvc_json, 'r'))['results'] |
| | out = {} |
| | out['version'] = "VERSION 1.0" |
| | out['external_data'] = {'used:': True, 'details': "GT proposals"} |
| | out['results'] = {} |
| |
|
| | all_names = list(data.keys()) |
| | for video_name in all_names: |
| | video_info = [] |
| | event_num = len(data[video_name]) |
| | timestamps = [data[video_name][i]['timestamp'] for i in range(event_num)] |
| | sentences = [data[video_name][i]['sentence'] for i in range(event_num)] |
| | for i, timestamp in enumerate(timestamps): |
| | score = data[video_name][i].get('proposal_score', 1.0) |
| | video_info.append({'segment': timestamp, 'score': score, 'sentence': sentences[i], 'sentence_score': data[video_name][i].get('sentence_score', 0)}) |
| | out['results'][video_name[2:]] = video_info |
| | json.dump(out, open(tap_json, 'w')) |
| |
|
| |
|
| | def convert_gtjson_to_tapjson(gt_json, tap_json): |
| | data = json.load(open(gt_json, 'r')) |
| | out = {} |
| | out['version'] = "VERSION 1.0" |
| | out['external_data'] = {'used:': True, 'details': "GT proposals"} |
| | out['results'] = {} |
| |
|
| | all_names = list(data.keys()) |
| | for video_name in all_names: |
| | video_info = [] |
| | timestamps = data[video_name]['timestamps'] |
| | sentences = data[video_name]['sentences'] |
| | for i, timestamp in enumerate(timestamps): |
| | video_info.append({'segment': timestamp, 'score': 1., 'sentence': sentences[i]}) |
| | out['results'][video_name[2:]] = video_info |
| | with open(tap_json, 'w') as f: |
| | json.dump(out, f) |
| |
|
| |
|
| | def get_topn_from_dvcjson(dvc_json, out_json, top_n=3, ranking_key='proposal_score', score_thres=-1e8): |
| | data = json.load(open(dvc_json, 'r'))['results'] |
| | out = {} |
| | out['version'] = "VERSION 1.0" |
| | out['external_data'] = {'used:': True, 'details': "GT proposals"} |
| | out['results'] = {} |
| | all_names = list(data.keys()) |
| | num = 0 |
| | bad_vid = 0 |
| | for video_name in all_names: |
| | info = data[video_name] |
| | new_info = sorted(info, key=lambda x: x[ranking_key], reverse=True) |
| | new_info = [p for p in new_info if p[ranking_key] > score_thres] |
| | new_info = new_info[:top_n] |
| | out['results'][video_name] = new_info |
| | num += len(new_info) |
| | if len(new_info) == 0: |
| | bad_vid += 1 |
| | out['results'].pop(video_name) |
| | print('average proosal number: {}'.format(num / len(all_names))) |
| | print('bad videos number: {}'.format(bad_vid)) |
| | print('good videos number: {}'.format(len(out['results']))) |
| | with open(out_json, 'w') as f: |
| | json.dump(out, f) |
| |
|
| |
|
| | def eval_metrics(dvc_filename, gt_filenames, para_gt_filenames, alpha=0.3, ranking_key='proposal_score', rerank=False, dvc_eval_version='2018', transformer_input_type='queries'): |
| | score = collections.defaultdict(lambda: -1) |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | if transformer_input_type == 'prior_proposals': |
| | dvc_score = eval_para(dvc_filename, referneces=para_gt_filenames) |
| | score.update(dvc_score) |
| | |
| | return score |
| | |
| | else: |
| | if rerank: |
| | dvc_filename = reranking(dvc_filename, alpha=alpha, temperature=2.0) |
| | dvc_score = eval_dvc(json_path=dvc_filename, reference=gt_filenames, version=dvc_eval_version) |
| | dvc_score = {k: sum(v) / len(v) for k, v in dvc_score.items()} |
| | dvc_score.update(eval_soda(dvc_filename, ref_list=gt_filenames)) |
| | dvc_score.update(eval_para(dvc_filename, referneces=para_gt_filenames)) |
| | score.update(dvc_score) |
| | return score |
| |
|
| |
|
| | def save_dvc_json(out_json, path): |
| | with open(path, 'w') as f: |
| | out_json['valid_video_num'] = len(out_json['results']) |
| | out_json['avg_proposal_num'] = np.array([len(v) for v in out_json['results'].values()]).mean().item() |
| | json.dump(out_json, f) |
| |
|
| | def reranking(p_src, alpha, temperature): |
| | print('alpha: {}, temp: {}'.format(alpha, temperature)) |
| | d = json.load(open(p_src)) |
| | d_items = list(d['results'].items()) |
| | for k,v in d_items: |
| | if True: |
| | sent_scores = [p['sentence_score'] / (float(len(p['sentence'].split()))**(temperature) + 1e-5) for p in v] |
| | prop_score = [p['proposal_score'] for p in v] |
| | joint_score = alpha * (np.array(sent_scores)) + (np.array(prop_score)) |
| | for i,p in enumerate(v): |
| | p['joint_score'] = joint_score[i] |
| | v = sorted(v, key=lambda x: x['joint_score'], reverse=True) |
| | topN = v[0]['pred_event_count'] |
| | v = v[:topN] |
| | v = sorted(v, key=lambda x: x['timestamp']) |
| | d['results'][k] = v |
| | save_path = p_src+'_rerank_alpha{}_temp{}.json'.format(alpha, temperature) |
| | save_dvc_json(d, save_path) |
| | return save_path |
| |
|
| |
|
| | def evaluate(model, criterion, postprocessors, loader, dvc_json_path, logger=None, args=None, score_threshold=0, |
| | alpha=0.3, dvc_eval_version='2018', device='cuda', debug=False, skip_lang_eval=False): |
| | out_json = {'results': {}, |
| | 'version': "VERSION 1.0", |
| | 'external_data': {'used:': True, 'details': None}} |
| | opt = loader.dataset.opt |
| |
|
| | loss_sum = OrderedDict() |
| | with torch.set_grad_enabled(False): |
| | for dt in tqdm(loader, disable=opt.disable_tqdm): |
| | |
| | |
| | dt = {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in dt.items()} |
| | dt = collections.defaultdict(lambda: None, dt) |
| |
|
| | dt['video_target'] = [ |
| | {key: _.to(device) if isinstance(_, torch.Tensor) else _ for key, _ in vid_info.items()} for vid_info in |
| | dt['video_target']] |
| |
|
| | |
| | output, _ = model(dt, criterion, contrastive_criterion=None, eval_mode=True) |
| | orig_target_sizes = dt['video_length'][:, 1] |
| |
|
| | weight_dict = criterion.weight_dict |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | results = postprocessors['bbox'](output, orig_target_sizes, loader) |
| |
|
| | batch_json = {} |
| | for idx, video_name in enumerate(dt['video_key']): |
| | segment = results[idx]['boxes'].cpu().numpy() |
| | raw_boxes = results[idx]['raw_boxes'].cpu().numpy() |
| | |
| | |
| | batch_json[video_name] = [ |
| | { |
| | "timestamp": segment[pid].tolist(), |
| | "raw_box": raw_boxes[pid].tolist(), |
| | "proposal_score": results[idx]['scores'][pid].item(), |
| | "sentence": results[idx]['captions'][pid], |
| | "sentence_score": results[idx]['caption_scores'][pid], |
| | 'query_id': results[idx]['query_id'][pid].item(), |
| | 'vid_duration': results[idx]['vid_duration'].item(), |
| | 'pred_event_count': results[idx]['pred_seq_len'].item(), |
| | } |
| | for pid in range(len(segment)) if results[idx]['scores'][pid].item() > score_threshold] |
| | out_json['results'].update(batch_json) |
| | if debug and len(out_json['results']) > 5: |
| | break |
| |
|
| | save_dvc_json(out_json, dvc_json_path) |
| |
|
| | if skip_lang_eval: |
| | return None, None |
| | |
| | |
| | |
| | |
| | |
| | scores = eval_metrics(dvc_json_path, |
| | gt_filenames=opt.gt_file_for_eval, |
| | para_gt_filenames=opt.gt_file_for_para_eval, |
| | alpha=alpha, |
| | rerank=(opt.count_loss_coef > 0), |
| | dvc_eval_version=dvc_eval_version, |
| | transformer_input_type=opt.transformer_input_type |
| | ) |
| |
|
| | out_json.update(scores) |
| | save_dvc_json(out_json, dvc_json_path) |
| | |
| | return scores, [] |
| |
|