import sys import os from natsort import natsorted import argparse from collections import OrderedDict import io import contextlib import itertools import numpy as np import mmcv from mmdet.datasets.api_wrappers import COCO, COCOeval sys.path.append('/home/caduser/KOTORI/vin-ssl/source') os.chdir('/home/caduser/KOTORI/vin-ssl/source') from base_config_track import get_config from mmdet_tools import mmdet_test def print_log(msg, logger): pass #print(msg) def evaluate(dataset, results, metric='bbox', logger=None, jsonfile_prefix=None, classwise=False, proposal_nums=(100, 300, 1000), iou_thrs=None, metric_items=None): """Evaluation in COCO protocol. Args: results (list[list | tuple]): Testing results of the dataset. metric (str | list[str]): Metrics to be evaluated. Options are 'bbox', 'segm', 'proposal', 'proposal_fast'. logger (logging.Logger | str | None): Logger used for printing related information during evaluation. Default: None. jsonfile_prefix (str | None): The prefix of json files. It includes the file path and the prefix of filename, e.g., "a/b/prefix". If not specified, a temp file will be created. Default: None. classwise (bool): Whether to evaluating the AP for each class. proposal_nums (Sequence[int]): Proposal number used for evaluating recalls, such as recall@100, recall@1000. Default: (100, 300, 1000). iou_thrs (Sequence[float], optional): IoU threshold used for evaluating recalls/mAPs. If set to a list, the average of all IoUs will also be computed. If not specified, [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] will be used. Default: None. metric_items (list[str] | str, optional): Metric items that will be returned. If not specified, ``['AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ]`` will be used when ``metric=='proposal'``, ``['mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l']`` will be used when ``metric=='bbox' or metric=='segm'``. Returns: dict[str, float]: COCO style evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') if iou_thrs is None: iou_thrs = np.linspace( .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) if metric_items is not None: if not isinstance(metric_items, list): metric_items = [metric_items] result_files, tmp_dir = dataset.format_results(results, jsonfile_prefix) eval_results = OrderedDict() cocoGt = dataset.coco results_per_category = [] for metric in metrics: msg = f'Evaluating {metric}...' if logger is None: msg = '\n' + msg print_log(msg, logger=logger) if metric == 'proposal_fast': ar = dataset.fast_eval_recall( results, proposal_nums, iou_thrs, logger='silent') log_msg = [] for i, num in enumerate(proposal_nums): eval_results[f'AR@{num}'] = ar[i] log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}') log_msg = ''.join(log_msg) print_log(log_msg, logger=logger) continue iou_type = 'bbox' if metric == 'proposal' else metric if metric not in result_files: raise KeyError(f'{metric} is not in results') try: predictions = mmcv.load(result_files[metric]) if iou_type == 'segm': # Refer to https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L331 # noqa # When evaluating mask AP, if the results contain bbox, # cocoapi will use the box area instead of the mask area # for calculating the instance area. Though the overall AP # is not affected, this leads to different # small/medium/large mask AP results. for x in predictions: x.pop('bbox') warnings.simplefilter('once') warnings.warn( 'The key "bbox" is deleted for more accurate mask AP ' 'of small/medium/large instances since v2.12.0. This ' 'does not change the overall mAP calculation.', UserWarning) cocoDt = cocoGt.loadRes(predictions) except IndexError: print_log( 'The testing results of the whole dataset is empty.', logger=logger, level=logging.ERROR) break cocoEval = COCOeval(cocoGt, cocoDt, iou_type) cocoEval.params.catIds = dataset.cat_ids cocoEval.params.imgIds = dataset.img_ids cocoEval.params.maxDets = list(proposal_nums) cocoEval.params.iouThrs = iou_thrs # mapping of cocoEval.stats coco_metric_names = { 'mAP': 0, 'mAP_50': 1, 'mAP_75': 2, 'mAP_s': 3, 'mAP_m': 4, 'mAP_l': 5, 'AR@100': 6, 'AR@300': 7, 'AR@1000': 8, 'AR_s@1000': 9, 'AR_m@1000': 10, 'AR_l@1000': 11 } if metric_items is not None: for metric_item in metric_items: if metric_item not in coco_metric_names: raise KeyError( f'metric item {metric_item} is not supported') if metric == 'proposal': cocoEval.params.useCats = 0 cocoEval.evaluate() cocoEval.accumulate() # Save coco summarize print information to logger redirect_string = io.StringIO() with contextlib.redirect_stdout(redirect_string): cocoEval.summarize() print_log('\n' + redirect_string.getvalue(), logger=logger) if metric_items is None: metric_items = [ 'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ] for item in metric_items: val = float( f'{cocoEval.stats[coco_metric_names[item]]:.3f}') eval_results[item] = val else: cocoEval.evaluate() cocoEval.accumulate() # Save coco summarize print information to logger redirect_string = io.StringIO() with contextlib.redirect_stdout(redirect_string): cocoEval.summarize() print_log('\n' + redirect_string.getvalue(), logger=logger) if classwise: # Compute per-category AP # Compute per-category AP # from https://github.com/facebookresearch/detectron2/ precisions = cocoEval.eval['precision'] # precision: (iou, recall, cls, area range, max dets) assert len(dataset.cat_ids) == precisions.shape[2] for idx, catId in enumerate(dataset.cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = dataset.coco.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (f'{nm["name"]}', float(ap))) num_columns = min(6, len(results_per_category) * 2) results_flatten = list( itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest(*[ results_flatten[i::num_columns] for i in range(num_columns) ]) if metric_items is None: metric_items = [ 'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l' ] for metric_item in metric_items: key = f'{metric}_{metric_item}' val = float( f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}' ) eval_results[key] = val ap = cocoEval.stats[:6] eval_results[f'{metric}_mAP_copypaste'] = ( f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} ' f'{ap[4]:.3f} {ap[5]:.3f}') if tmp_dir is not None: tmp_dir.cleanup() return eval_results, results_per_category # define parse def get_args(): parser = argparse.ArgumentParser(description='Test trained object detection model') parser.add_argument( '--experiment_name', '-exp-name', type=str, default='no-exp',help='providing folder store checkpoint models') return parser.parse_args() if __name__ == "__main__": args = get_args() experiment_name = args.experiment_name print ("**********" * 3) print ('Staring evaluation process') checkpoints = os.listdir(os.path.join('../trained_weights', experiment_name)) checkpoints = natsorted(checkpoints) checkpoints = [p for p in checkpoints if 'epoch_' in p] # checkpoint = os.path.join('../trained_weights', experiment_name, checkpoints[-1]) selected_checkpoints = checkpoints[-1:] # change the number of models want to infer here. dict_results = {} valid_dict_results = {} eval_on_valid = False for checkpoint_name in selected_checkpoints: print ('-----'*5) print ('Processing for checkpoint', checkpoint_name) checkpoint = os.path.join('../trained_weights', experiment_name, checkpoint_name) results = {} results_dir = 'results' os.makedirs(results_dir, exist_ok=True) results_avg = [] results_avg_ar = [] results_classwise = [] cfg = get_config() if eval_on_valid: cfg.data.test['img_prefix'] = './data/' # uncomment lines 267-268 for inference on validation set cfg.data.test['ann_file'] = './data/valid_annotations.json' args_result = argparse.Namespace(eval='bbox', out='results/' + experiment_name + '.pkl', checkpoint=None, work_dir=results_dir, fuse_conv_bn=None, gpu_ids=None, format_only=None, show=None, show_dir=None, show_score_thr=0.3, gpu_collect=None, tmpdir=None, cfg_options=None, options=None, launcher='none', eval_options=None, local_rank=0) dataset, outputs = mmdet_test.get_outputs(cfg, checkpoint, args_result) metrics, results_per_category = evaluate(dataset, outputs, metric='bbox', classwise=True) #, iou_thrs=[0.5]) metrics_ar, _ = evaluate(dataset, outputs, metric='proposal') results_avg.append([experiment_name, metrics]) results_avg_ar.append([experiment_name, metrics_ar]) results_classwise.append([experiment_name, OrderedDict(results_per_category)]) print('--------------------------------') valid_dict_results[checkpoint_name] = [] print('Average Precision') print(list(results_avg[0][1].keys())[:-1]) valid_dict_results[checkpoint_name].append(list(results_avg[0][1].keys())[:-1]) # append output to valid_dict_results for res in results_avg: print([res[0], list(res[1].values())[:-1]]) valid_dict_results[checkpoint_name].append([res[0], list(res[1].values())[:-1]]) dict_results[checkpoint_name] = list(results_avg[0][1].values())[1] print ("Results on testing set") print (valid_dict_results) print("**********" * 3)