Rafael commited on
Commit
a08c529
1 Parent(s): 969b59b

init commit

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ setup.cfg
2
+ bin/
3
+ __pycache__/
4
+ build/
5
+ *.egg-*/
README.md CHANGED
@@ -1,12 +1,75 @@
1
- ---
2
- title: Detection Metrics2
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 3.37.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # COCO Metrics
2
+
3
+ COCO Metrics is a Python package that provides evaluation metrics for **object detection** tasks using the COCO (Common Objects in Context) [evaluation protocol](https://cocodataset.org/#detection-eval). In the future instance segmentation tasks will also be supported.
4
+
5
+ ## Advantages
6
+
7
+ * This project does not depend directly on [pycocotools](https://github.com/cocodataset/cocoapi/tree/master/PythonAPI/pycocotools), COCO's official code to compute metrics.
8
+ * It does not require a `.json` file in disk, as originally required by pycocotools.
9
+ * Integrated with HuggingFace 🤗 evaluate model.
10
+
11
+ ## Metrics
12
+
13
+ The following 12 metrics are computed to for characterizing the performance of an object detector:
14
+ * **Average Precision** (AP) IoU=.50:.05:.95
15
+ * **Average Precision** (AP) IoU=.50
16
+ * **Average Precision** (AP) IoU=.75
17
+ * **Average Precision** (AP) Across Scales for small objects: area < 322
18
+ * **Average Precision** (AP) Across Scales for medium objects: 322 < area < 962
19
+ * **Average Precision** (AP) Across Scales for large objects: area < 962
20
+ * **Average Recall** (AR) given 1 detection per image
21
+ * **Average Recall** (AR) given 10 detections per image
22
+ * **Average Recall** (AR) given 100 detections per image
23
+ * **Average Recall** (AR) for small objects: area < 322
24
+ * **Average Recall** (AR) for medium objects: 322 < area < 962
25
+ * **Average Recall** (AR) for large objects: area > 962
26
+
27
+ ## Installation
28
+ COCO Metrics can be easily installed using pip:
29
+
30
+ ```
31
+ # Clone the project
32
+ git clone https://github.com/rafaelpadilla/coco_metrics
33
+ cd coco_metrics
34
+
35
+ # Create environment
36
+ conda create -n coco-metrics python=3.10
37
+ conda activate coco-metrics
38
+
39
+ # Install packages
40
+ pip install -r requirements.txt
41
+ ```
42
+
43
+ ## Example
44
+
45
+ The code [example.py](https://github.com/rafaelpadilla/coco_metrics/blob/main/example.py) shows how to make usage of the COCO evaluator using HuggingFace 🤗 evaluate model.
46
+
47
+ The snippet below illustrates how to make call of the evaluator.
48
+
49
+ ```
50
+ import evaluate
51
+
52
+ # Load evaluate from github
53
+ coco_bbx_evaluator = evaluate.load("rafaelpadilla/detection_metrics", coco=coco_gt, iou_type="bbox")
54
+
55
+ # Within your dataset looping, you add the metrics
56
+ for batch in dataloader:
57
+
58
+ results = # model compute predicted results
59
+ labels = # get ground-truth labels
60
+
61
+ # Add prediction and expected labels to the evaluator
62
+ coco_bbx_evaluator.add(prediction=results, reference=labels)
63
+
64
+ # Compute the metrics and show results
65
+ results = coco_bbx_evaluator.compute()
66
+ print(results)
67
+
68
+ ```
69
+
70
+ ## References:
71
+
72
+ [1] [COCO Metrics](https://cocodataset.org/#detection-eval)
73
+ [2] [A Survey on Performance Metrics for Object-Detection Algorithms](https://www.researchgate.net/profile/Rafael-Padilla/publication/343194514_A_Survey_on_Performance_Metrics_for_Object-Detection_Algorithms/links/5f1b5a5e45851515ef478268/A-Survey-on-Performance-Metrics-for-Object-Detection-Algorithms.pdf)
74
+ [3] [A comparative analysis of object detection metrics with a companion open-source toolkit](https://www.researchgate.net/profile/Rafael-Padilla/publication/343194514_A_Survey_on_Performance_Metrics_for_Object-Detection_Algorithms/links/5f1b5a5e45851515ef478268/A-Survey-on-Performance-Metrics-for-Object-Detection-Algorithms.pdf)
75
+
coco_metrics.py ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Union
2
+ from pathlib import Path
3
+ import datasets
4
+ import torch
5
+ import evaluate
6
+ import json
7
+ from tqdm import tqdm
8
+ from coco_metrics.pycocotools.coco import COCO
9
+ from coco_metrics.coco_evaluate import COCOEvaluator
10
+ from coco_metrics.utils import _TYPING_PREDICTION, _TYPING_REFERENCE
11
+
12
+ _DESCRIPTION = "This class evaluates object detection models using the COCO dataset \
13
+ and its evaluation metrics."
14
+ _HOMEPAGE = "https://cocodataset.org"
15
+ _CITATION = """
16
+ @misc{lin2015microsoft, \
17
+ title={Microsoft COCO: Common Objects in Context},
18
+ author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and \
19
+ Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick \
20
+ and Piotr Dollár},
21
+ year={2015},
22
+ eprint={1405.0312},
23
+ archivePrefix={arXiv},
24
+ primaryClass={cs.CV}
25
+ }
26
+ """
27
+ _REFERENCE_URLS = [
28
+ "https://ieeexplore.ieee.org/abstract/document/9145130",
29
+ "https://www.mdpi.com/2079-9292/10/3/279",
30
+ "https://cocodataset.org/#detection-eval",
31
+ ]
32
+ _KWARGS_DESCRIPTION = """\
33
+ Computes COCO metrics for object detection: AP(mAP) and its variants.
34
+
35
+ Args:
36
+ coco (COCO): COCO Evaluator object for evaluating predictions.
37
+ **kwargs: Additional keyword arguments forwarded to evaluate.Metrics.
38
+ """
39
+
40
+ class EvaluateObjectDetection(evaluate.Metric):
41
+ """
42
+ Class for evaluating object detection models.
43
+ """
44
+
45
+ def __init__(self, json_gt: Union[Path, Dict], iou_type: str = "bbox", **kwargs):
46
+ """
47
+ Initializes the EvaluateObjectDetection class.
48
+
49
+ Args:
50
+ json_gt: JSON with ground-truth annotations in COCO format.
51
+ # coco_groundtruth (COCO): COCO Evaluator object for evaluating predictions.
52
+ **kwargs: Additional keyword arguments forwarded to evaluate.Metrics.
53
+ """
54
+ super().__init__(**kwargs)
55
+
56
+ # Create COCO object from ground-truth annotations
57
+ if isinstance(json_gt, Path):
58
+ assert json_gt.exists(), f"Path {json_gt} does not exist."
59
+ with open(json_gt) as f:
60
+ json_data = json.load(f)
61
+ elif isinstance(json_gt, dict):
62
+ json_data = json_gt
63
+ coco = COCO(json_data)
64
+
65
+ self.coco_evaluator = COCOEvaluator(coco, [iou_type])
66
+
67
+ def remove_classes(self, classes_to_remove: List[str]):
68
+ to_remove = [c.upper() for c in classes_to_remove]
69
+ cats = {}
70
+ for id, cat in self.coco_evaluator.coco_eval["bbox"].cocoGt.cats.items():
71
+ if cat["name"].upper() not in to_remove:
72
+ cats[id] = cat
73
+ self.coco_evaluator.coco_eval["bbox"].cocoGt.cats = cats
74
+ self.coco_evaluator.coco_gt.cats = cats
75
+ self.coco_evaluator.coco_gt.dataset["categories"] = list(cats.values())
76
+ self.coco_evaluator.coco_eval["bbox"].params.catIds = [c["id"] for c in cats.values()]
77
+
78
+ def _info(self):
79
+ """
80
+ Returns the MetricInfo object with information about the module.
81
+
82
+ Returns:
83
+ evaluate.MetricInfo: Metric information object.
84
+ """
85
+ return evaluate.MetricInfo(
86
+ module_type="metric",
87
+ description=_DESCRIPTION,
88
+ citation=_CITATION,
89
+ inputs_description=_KWARGS_DESCRIPTION,
90
+ # This defines the format of each prediction and reference
91
+ features=datasets.Features(
92
+ {
93
+ "predictions": [
94
+ datasets.Features(
95
+ {
96
+ "scores": datasets.Sequence(datasets.Value("float")),
97
+ "labels": datasets.Sequence(datasets.Value("int64")),
98
+ "boxes": datasets.Sequence(
99
+ datasets.Sequence(datasets.Value("float"))
100
+ ),
101
+ }
102
+ )
103
+ ],
104
+ "references": [
105
+ datasets.Features(
106
+ {
107
+ "image_id": datasets.Sequence(datasets.Value("int64")),
108
+ }
109
+ )
110
+ ],
111
+ }
112
+ ),
113
+ # Homepage of the module for documentation
114
+ homepage=_HOMEPAGE,
115
+ # Additional links to the codebase or references
116
+ reference_urls=_REFERENCE_URLS,
117
+ )
118
+
119
+ def _preprocess(
120
+ self, predictions: List[Dict[str, torch.Tensor]]
121
+ ) -> List[_TYPING_PREDICTION]:
122
+ """
123
+ Preprocesses the predictions before computing the scores.
124
+
125
+ Args:
126
+ predictions (List[Dict[str, torch.Tensor]]): A list of prediction dicts.
127
+
128
+ Returns:
129
+ List[_TYPING_PREDICTION]: A list of preprocessed prediction dicts.
130
+ """
131
+ processed_predictions = []
132
+ for pred in predictions:
133
+ processed_pred: _TYPING_PREDICTION = {}
134
+ for key, val in pred.items():
135
+ if isinstance(val, torch.Tensor):
136
+ val = pred[key].detach().cpu().tolist()
137
+ processed_pred[key] = val
138
+ processed_predictions.append(processed_pred)
139
+ return processed_predictions
140
+
141
+ def _clear_predictions(self, predictions):
142
+ # Remove unnecessary keys from predictions
143
+ required = ["scores", "labels", "boxes"]
144
+ ret = []
145
+ for prediction in predictions:
146
+ ret.append({k: v for k, v in prediction.items() if k in required})
147
+ return ret
148
+
149
+ def _clear_references(self, references):
150
+ required = [""]
151
+ ret = []
152
+ for ref in references:
153
+ ret.append({k: v for k, v in ref.items() if k in required})
154
+ return ret
155
+
156
+ def add(self, *, prediction = None, reference = None, **kwargs):
157
+ """
158
+ Preprocesses the predictions and references and calls the parent class function.
159
+
160
+ Args:
161
+ prediction: A list of prediction dicts.
162
+ reference: A list of reference dicts.
163
+ **kwargs: Additional keyword arguments.
164
+ """
165
+ if prediction is not None:
166
+ prediction = self._clear_predictions(prediction)
167
+ prediction = self._preprocess(prediction)
168
+
169
+ super(evaluate.Metric, self).add(prediction=prediction, references=reference, **kwargs)
170
+
171
+ def _compute(
172
+ self,
173
+ predictions: List[List[_TYPING_PREDICTION]],
174
+ references: List[List[_TYPING_REFERENCE]],
175
+ ) -> Dict[str, Dict[str, float]]:
176
+ """
177
+ Returns the evaluation scores.
178
+
179
+ Args:
180
+ predictions (List[List[_TYPING_PREDICTION]]): A list of predictions.
181
+ references (List[List[_TYPING_REFERENCE]]): A list of references.
182
+
183
+ Returns:
184
+ Dict: A dictionary containing evaluation scores.
185
+ """
186
+ # Loop through each pair of prediction and reference obtained per batch
187
+ pbar = tqdm(zip(predictions, references), desc="Updating detections", total=len(predictions))
188
+ for pred, ref in pbar:
189
+ res = {} # {image_id} : prediction
190
+ for target, output in zip(ref, pred):
191
+ res[target["image_id"][0]] = output
192
+ self.coco_evaluator.update(res)
193
+
194
+ print("Synchronizing processes")
195
+ self.coco_evaluator.synchronize_between_processes()
196
+
197
+ print("Accumulating values")
198
+ self.coco_evaluator.accumulate()
199
+
200
+ print("Summarizing results")
201
+ self.coco_evaluator.summarize()
202
+
203
+ stats = self.coco_evaluator.get_results()
204
+ return stats
coco_metrics/.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ setup.cfg
2
+ bin/
3
+ __pycache__/
4
+ build/
5
+ *.egg-*/
coco_metrics/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.0.1"
coco_metrics/coco_evaluate.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import contextlib
2
+ import copy
3
+ import os
4
+ from typing import Dict, List, Union
5
+
6
+ import numpy as np
7
+ import torch
8
+
9
+ from coco_metrics.pycocotools.coco import COCO
10
+ from coco_metrics.pycocotools.cocoeval import COCOeval
11
+ from coco_metrics.utils import (_TYPING_BOX, _TYPING_PREDICTIONS, convert_to_xywh,
12
+ create_common_coco_eval)
13
+
14
+ _SUPPORTED_TYPES = ["bbox"]
15
+
16
+
17
+ class COCOEvaluator(object):
18
+ """
19
+ Class to perform evaluation for the COCO dataset.
20
+ """
21
+
22
+ def __init__(self, coco_gt: COCO, iou_types: List[str] = ["bbox"]):
23
+ """
24
+ Initializes COCOEvaluator with the ground truth COCO dataset and IoU types.
25
+
26
+ Args:
27
+ coco_gt: The ground truth COCO dataset.
28
+ iou_types: Intersection over Union (IoU) types for evaluation (Supported: "bbox").
29
+ """
30
+ self.coco_gt = copy.deepcopy(coco_gt)
31
+
32
+ self.coco_eval = {}
33
+ for iou_type in iou_types:
34
+ assert iou_type in _SUPPORTED_TYPES, ValueError(
35
+ f"IoU type not supported {iou_type}"
36
+ )
37
+ self.coco_eval[iou_type] = COCOeval(self.coco_gt, iouType=iou_type)
38
+
39
+ self.iou_types = iou_types
40
+ self.img_ids = []
41
+ self.eval_imgs = {k: [] for k in iou_types}
42
+
43
+ def update(self, predictions: _TYPING_PREDICTIONS) -> None:
44
+ """
45
+ Update the evaluator with new predictions.
46
+
47
+ Args:
48
+ predictions: The predictions to update.
49
+ """
50
+ img_ids = list(np.unique(list(predictions.keys())))
51
+ self.img_ids.extend(img_ids)
52
+
53
+ for iou_type in self.iou_types:
54
+ results = self.prepare(predictions, iou_type)
55
+
56
+ # suppress pycocotools prints
57
+ with open(os.devnull, "w") as devnull:
58
+ with contextlib.redirect_stdout(devnull):
59
+ coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO()
60
+ coco_eval = self.coco_eval[iou_type]
61
+
62
+ coco_eval.cocoDt = coco_dt
63
+ coco_eval.params.imgIds = list(img_ids)
64
+ eval_imgs = coco_eval.evaluate()
65
+ self.eval_imgs[iou_type].append(eval_imgs)
66
+
67
+ def synchronize_between_processes(self) -> None:
68
+ """
69
+ Synchronizes evaluation images between processes.
70
+ """
71
+ for iou_type in self.iou_types:
72
+ self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
73
+ create_common_coco_eval(
74
+ self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]
75
+ )
76
+
77
+ def accumulate(self) -> None:
78
+ """
79
+ Accumulates the evaluation results.
80
+ """
81
+ for coco_eval in self.coco_eval.values():
82
+ coco_eval.accumulate()
83
+
84
+ def summarize(self) -> None:
85
+ """
86
+ Prints the IoU metric and summarizes the evaluation results.
87
+ """
88
+ for iou_type, coco_eval in self.coco_eval.items():
89
+ print("IoU metric: {}".format(iou_type))
90
+ coco_eval.summarize()
91
+
92
+ def prepare(
93
+ self, predictions: _TYPING_PREDICTIONS, iou_type: str
94
+ ) -> List[Dict[str, Union[int, _TYPING_BOX, float]]]:
95
+ """
96
+ Prepares the predictions for COCO detection.
97
+
98
+ Args:
99
+ predictions: The predictions to prepare.
100
+ iou_type: The Intersection over Union (IoU) type for evaluation.
101
+
102
+ Returns:
103
+ A dictionary with the prepared predictions.
104
+ """
105
+ if iou_type == "bbox":
106
+ return self.prepare_for_coco_detection(predictions)
107
+ else:
108
+ raise ValueError(f"IoU type not supported {iou_type}")
109
+
110
+ def _post_process_stats(
111
+ self, stats, coco_eval_object, iou_type="bbox"
112
+ ) -> Dict[str, float]:
113
+ """
114
+ Prepares the predictions for COCO detection.
115
+
116
+ Args:
117
+ predictions: The predictions to prepare.
118
+ iou_type: The Intersection over Union (IoU) type for evaluation.
119
+
120
+ Returns:
121
+ A dictionary with the prepared predictions.
122
+ """
123
+ if iou_type not in _SUPPORTED_TYPES:
124
+ raise ValueError(f"iou_type '{iou_type}' not supported")
125
+
126
+ current_max_dets = coco_eval_object.params.maxDets
127
+
128
+ index_to_title = {
129
+ "bbox": {
130
+ 0: f"AP-IoU=0.50:0.95-area=all-maxDets={current_max_dets[2]}",
131
+ 1: f"AP-IoU=0.50-area=all-maxDets={current_max_dets[2]}",
132
+ 2: f"AP-IoU=0.75-area=all-maxDets={current_max_dets[2]}",
133
+ 3: f"AP-IoU=0.50:0.95-area=small-maxDets={current_max_dets[2]}",
134
+ 4: f"AP-IoU=0.50:0.95-area=medium-maxDets={current_max_dets[2]}",
135
+ 5: f"AP-IoU=0.50:0.95-area=large-maxDets={current_max_dets[2]}",
136
+ 6: f"AR-IoU=0.50:0.95-area=all-maxDets={current_max_dets[0]}",
137
+ 7: f"AR-IoU=0.50:0.95-area=all-maxDets={current_max_dets[1]}",
138
+ 8: f"AR-IoU=0.50:0.95-area=all-maxDets={current_max_dets[2]}",
139
+ 9: f"AR-IoU=0.50:0.95-area=small-maxDets={current_max_dets[2]}",
140
+ 10: f"AR-IoU=0.50:0.95-area=medium-maxDets={current_max_dets[2]}",
141
+ 11: f"AR-IoU=0.50:0.95-area=large-maxDets={current_max_dets[2]}",
142
+ },
143
+ "keypoints": {
144
+ 0: "AP-IoU=0.50:0.95-area=all-maxDets=20",
145
+ 1: "AP-IoU=0.50-area=all-maxDets=20",
146
+ 2: "AP-IoU=0.75-area=all-maxDets=20",
147
+ 3: "AP-IoU=0.50:0.95-area=medium-maxDets=20",
148
+ 4: "AP-IoU=0.50:0.95-area=large-maxDets=20",
149
+ 5: "AR-IoU=0.50:0.95-area=all-maxDets=20",
150
+ 6: "AR-IoU=0.50-area=all-maxDets=20",
151
+ 7: "AR-IoU=0.75-area=all-maxDets=20",
152
+ 8: "AR-IoU=0.50:0.95-area=medium-maxDets=20",
153
+ 9: "AR-IoU=0.50:0.95-area=large-maxDets=20",
154
+ },
155
+ }
156
+
157
+ output_dict: Dict[str, float] = {}
158
+ for index, stat in enumerate(stats):
159
+ output_dict[index_to_title[iou_type][index]] = stat
160
+
161
+ return output_dict
162
+
163
+ def get_results(self) -> Dict[str, Dict[str, float]]:
164
+ """
165
+ Gets the results of the COCO evaluation.
166
+
167
+ Returns:
168
+ A dictionary with the results of the COCO evaluation.
169
+ """
170
+ output_dict = {}
171
+
172
+ for iou_type, coco_eval in self.coco_eval.items():
173
+ if iou_type == "segm":
174
+ iou_type = "bbox"
175
+ output_dict[f"iou_{iou_type}"] = self._post_process_stats(
176
+ coco_eval.stats, coco_eval, iou_type
177
+ )
178
+ return output_dict
179
+
180
+ def prepare_for_coco_detection(
181
+ self, predictions: _TYPING_PREDICTIONS
182
+ ) -> List[Dict[str, Union[int, _TYPING_BOX, float]]]:
183
+ """
184
+ Prepares the predictions for COCO detection.
185
+
186
+ Args:
187
+ predictions: The predictions to prepare.
188
+
189
+ Returns:
190
+ A list of dictionaries with the prepared predictions.
191
+ """
192
+ coco_results = []
193
+ for original_id, prediction in predictions.items():
194
+ if len(prediction) == 0:
195
+ continue
196
+
197
+ boxes = prediction["boxes"]
198
+ if len(boxes) == 0:
199
+ continue
200
+
201
+ if not isinstance(boxes, torch.Tensor):
202
+ boxes = torch.as_tensor(boxes)
203
+ boxes = boxes.tolist()
204
+
205
+ scores = prediction["scores"]
206
+ if not isinstance(scores, list):
207
+ scores = scores.tolist()
208
+
209
+ labels = prediction["labels"]
210
+ if not isinstance(labels, list):
211
+ labels = prediction["labels"].tolist()
212
+
213
+ coco_results.extend(
214
+ [
215
+ {
216
+ "image_id": original_id,
217
+ "category_id": labels[k],
218
+ "bbox": box,
219
+ "score": scores[k],
220
+ }
221
+ for k, box in enumerate(boxes)
222
+ ]
223
+ )
224
+ return coco_results
coco_metrics/pycocotools/coco.py ADDED
@@ -0,0 +1,491 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This code is basically a copy and paste from the original cocoapi file:
2
+ # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py
3
+ # with the following changes:
4
+ # * Instead of receiving the path to the annotation file, it receives a json object.
5
+ # * Commented out all parts of code that depends on maskUtils, which is not needed
6
+ # for bounding box evaluation.
7
+
8
+ __author__ = "tylin"
9
+ __version__ = "2.0"
10
+ # Interface for accessing the Microsoft COCO dataset.
11
+
12
+ # Microsoft COCO is a large image dataset designed for object detection,
13
+ # segmentation, and caption generation. pycocotools is a Python API that
14
+ # assists in loading, parsing and visualizing the annotations in COCO.
15
+ # Please visit http://mscoco.org/ for more information on COCO, including
16
+ # for the data, paper, and tutorials. The exact format of the annotations
17
+ # is also described on the COCO website. For example usage of the pycocotools
18
+ # please see pycocotools_demo.ipynb. In addition to this API, please download both
19
+ # the COCO images and annotations in order to run the demo.
20
+
21
+ # An alternative to using the API is to load the annotations directly
22
+ # into Python dictionary
23
+ # Using the API provides additional utility functions. Note that this API
24
+ # supports both *instance* and *caption* annotations. In the case of
25
+ # captions not all functions are defined (e.g. categories are undefined).
26
+
27
+ # The following API functions are defined:
28
+ # COCO - COCO api class that loads COCO annotation file and prepare data structures.
29
+ # decodeMask - Decode binary mask M encoded via run-length encoding.
30
+ # encodeMask - Encode binary mask M using run-length encoding.
31
+ # getAnnIds - Get ann ids that satisfy given filter conditions.
32
+ # getCatIds - Get cat ids that satisfy given filter conditions.
33
+ # getImgIds - Get img ids that satisfy given filter conditions.
34
+ # loadAnns - Load anns with the specified ids.
35
+ # loadCats - Load cats with the specified ids.
36
+ # loadImgs - Load imgs with the specified ids.
37
+ # annToMask - Convert segmentation in an annotation to binary mask.
38
+ # showAnns - Display the specified annotations.
39
+ # loadRes - Load algorithm results and create API for accessing them.
40
+ # download - Download COCO images from mscoco.org server.
41
+ # Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
42
+ # Help on each functions can be accessed by: "help COCO>function".
43
+
44
+ # See also COCO>decodeMask,
45
+ # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
46
+ # COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
47
+ # COCO>loadImgs, COCO>annToMask, COCO>showAnns
48
+
49
+ # Microsoft COCO Toolbox. version 2.0
50
+ # Data, paper, and tutorials available at: http://mscoco.org/
51
+ # Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
52
+ # Licensed under the Simplified BSD License [see bsd.txt]
53
+
54
+ import copy
55
+ import itertools
56
+ import json
57
+ # from . import mask as maskUtils
58
+ import os
59
+ import sys
60
+ import time
61
+ from collections import defaultdict
62
+
63
+ import matplotlib.pyplot as plt
64
+ import numpy as np
65
+ from matplotlib.collections import PatchCollection
66
+ from matplotlib.patches import Polygon
67
+
68
+ PYTHON_VERSION = sys.version_info[0]
69
+ if PYTHON_VERSION == 2:
70
+ from urllib import urlretrieve
71
+ elif PYTHON_VERSION == 3:
72
+ from urllib.request import urlretrieve
73
+
74
+
75
+ def _isArrayLike(obj):
76
+ return hasattr(obj, "__iter__") and hasattr(obj, "__len__")
77
+
78
+
79
+ class COCO:
80
+ def __init__(self, annotations=None):
81
+ """
82
+ Constructor of Microsoft COCO helper class for reading and visualizing annotations.
83
+ :param annotation_file (str): location of annotation file
84
+ :param image_folder (str): location to the folder that hosts images.
85
+ :return:
86
+ """
87
+ # load dataset
88
+ self.dataset, self.anns, self.cats, self.imgs = dict(), dict(), dict(), dict()
89
+ self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
90
+ # Modified the original code to receive a json object instead of a path to a file
91
+ if annotations:
92
+ assert (
93
+ type(annotations) == dict
94
+ ), f"annotation file format {type(annotations)} not supported."
95
+ self.dataset = annotations
96
+ self.createIndex()
97
+
98
+ def createIndex(self):
99
+ # create index
100
+ print("creating index...")
101
+ anns, cats, imgs = {}, {}, {}
102
+ imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
103
+ if "annotations" in self.dataset:
104
+ for ann in self.dataset["annotations"]:
105
+ imgToAnns[ann["image_id"]].append(ann)
106
+ anns[ann["id"]] = ann
107
+
108
+ if "images" in self.dataset:
109
+ for img in self.dataset["images"]:
110
+ imgs[img["id"]] = img
111
+
112
+ if "categories" in self.dataset:
113
+ for cat in self.dataset["categories"]:
114
+ cats[cat["id"]] = cat
115
+
116
+ if "annotations" in self.dataset and "categories" in self.dataset:
117
+ for ann in self.dataset["annotations"]:
118
+ catToImgs[ann["category_id"]].append(ann["image_id"])
119
+
120
+ print("index created!")
121
+
122
+ # create class members
123
+ self.anns = anns
124
+ self.imgToAnns = imgToAnns
125
+ self.catToImgs = catToImgs
126
+ self.imgs = imgs
127
+ self.cats = cats
128
+
129
+ def info(self):
130
+ """
131
+ Print information about the annotation file.
132
+ :return:
133
+ """
134
+ for key, value in self.dataset["info"].items():
135
+ print("{}: {}".format(key, value))
136
+
137
+ def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
138
+ """
139
+ Get ann ids that satisfy given filter conditions. default skips that filter
140
+ :param imgIds (int array) : get anns for given imgs
141
+ catIds (int array) : get anns for given cats
142
+ areaRng (float array) : get anns for given area range (e.g. [0 inf])
143
+ iscrowd (boolean) : get anns for given crowd label (False or True)
144
+ :return: ids (int array) : integer array of ann ids
145
+ """
146
+ imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
147
+ catIds = catIds if _isArrayLike(catIds) else [catIds]
148
+
149
+ if len(imgIds) == len(catIds) == len(areaRng) == 0:
150
+ anns = self.dataset["annotations"]
151
+ else:
152
+ if not len(imgIds) == 0:
153
+ lists = [
154
+ self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns
155
+ ]
156
+ anns = list(itertools.chain.from_iterable(lists))
157
+ else:
158
+ anns = self.dataset["annotations"]
159
+ anns = (
160
+ anns
161
+ if len(catIds) == 0
162
+ else [ann for ann in anns if ann["category_id"] in catIds]
163
+ )
164
+ anns = (
165
+ anns
166
+ if len(areaRng) == 0
167
+ else [
168
+ ann
169
+ for ann in anns
170
+ if ann["area"] > areaRng[0] and ann["area"] < areaRng[1]
171
+ ]
172
+ )
173
+ if not iscrowd == None:
174
+ ids = [ann["id"] for ann in anns if ann["iscrowd"] == iscrowd]
175
+ else:
176
+ ids = [ann["id"] for ann in anns]
177
+ return ids
178
+
179
+ def getCatIds(self, catNms=[], supNms=[], catIds=[]):
180
+ """
181
+ filtering parameters. default skips that filter.
182
+ :param catNms (str array) : get cats for given cat names
183
+ :param supNms (str array) : get cats for given supercategory names
184
+ :param catIds (int array) : get cats for given cat ids
185
+ :return: ids (int array) : integer array of cat ids
186
+ """
187
+ catNms = catNms if _isArrayLike(catNms) else [catNms]
188
+ supNms = supNms if _isArrayLike(supNms) else [supNms]
189
+ catIds = catIds if _isArrayLike(catIds) else [catIds]
190
+
191
+ if len(catNms) == len(supNms) == len(catIds) == 0:
192
+ cats = self.dataset["categories"]
193
+ else:
194
+ cats = self.dataset["categories"]
195
+ cats = (
196
+ cats
197
+ if len(catNms) == 0
198
+ else [cat for cat in cats if cat["name"] in catNms]
199
+ )
200
+ cats = (
201
+ cats
202
+ if len(supNms) == 0
203
+ else [cat for cat in cats if cat["supercategory"] in supNms]
204
+ )
205
+ cats = (
206
+ cats
207
+ if len(catIds) == 0
208
+ else [cat for cat in cats if cat["id"] in catIds]
209
+ )
210
+ ids = [cat["id"] for cat in cats]
211
+ return ids
212
+
213
+ def getImgIds(self, imgIds=[], catIds=[]):
214
+ """
215
+ Get img ids that satisfy given filter conditions.
216
+ :param imgIds (int array) : get imgs for given ids
217
+ :param catIds (int array) : get imgs with all given cats
218
+ :return: ids (int array) : integer array of img ids
219
+ """
220
+ imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
221
+ catIds = catIds if _isArrayLike(catIds) else [catIds]
222
+
223
+ if len(imgIds) == len(catIds) == 0:
224
+ ids = self.imgs.keys()
225
+ else:
226
+ ids = set(imgIds)
227
+ for i, catId in enumerate(catIds):
228
+ if i == 0 and len(ids) == 0:
229
+ ids = set(self.catToImgs[catId])
230
+ else:
231
+ ids &= set(self.catToImgs[catId])
232
+ return list(ids)
233
+
234
+ def loadAnns(self, ids=[]):
235
+ """
236
+ Load anns with the specified ids.
237
+ :param ids (int array) : integer ids specifying anns
238
+ :return: anns (object array) : loaded ann objects
239
+ """
240
+ if _isArrayLike(ids):
241
+ return [self.anns[id] for id in ids]
242
+ elif type(ids) == int:
243
+ return [self.anns[ids]]
244
+
245
+ def loadCats(self, ids=[]):
246
+ """
247
+ Load cats with the specified ids.
248
+ :param ids (int array) : integer ids specifying cats
249
+ :return: cats (object array) : loaded cat objects
250
+ """
251
+ if _isArrayLike(ids):
252
+ return [self.cats[id] for id in ids]
253
+ elif type(ids) == int:
254
+ return [self.cats[ids]]
255
+
256
+ def loadImgs(self, ids=[]):
257
+ """
258
+ Load anns with the specified ids.
259
+ :param ids (int array) : integer ids specifying img
260
+ :return: imgs (object array) : loaded img objects
261
+ """
262
+ if _isArrayLike(ids):
263
+ return [self.imgs[id] for id in ids]
264
+ elif type(ids) == int:
265
+ return [self.imgs[ids]]
266
+
267
+ def showAnns(self, anns, draw_bbox=False):
268
+ """
269
+ Display the specified annotations.
270
+ :param anns (array of object): annotations to display
271
+ :return: None
272
+ """
273
+ if len(anns) == 0:
274
+ return 0
275
+ if "segmentation" in anns[0] or "keypoints" in anns[0]:
276
+ datasetType = "instances"
277
+ elif "caption" in anns[0]:
278
+ datasetType = "captions"
279
+ else:
280
+ raise Exception("datasetType not supported")
281
+ if datasetType == "instances":
282
+ ax = plt.gca()
283
+ ax.set_autoscale_on(False)
284
+ polygons = []
285
+ color = []
286
+ for ann in anns:
287
+ c = (np.random.random((1, 3)) * 0.6 + 0.4).tolist()[0]
288
+ if "segmentation" in ann:
289
+ if type(ann["segmentation"]) == list:
290
+ # polygon
291
+ for seg in ann["segmentation"]:
292
+ poly = np.array(seg).reshape((int(len(seg) / 2), 2))
293
+ polygons.append(Polygon(poly))
294
+ color.append(c)
295
+ else:
296
+ raise NotImplementedError(
297
+ "This type is not is not supported yet."
298
+ )
299
+ # # mask
300
+ # t = self.imgs[ann['image_id']]
301
+ # if type(ann['segmentation']['counts']) == list:
302
+ # rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
303
+ # else:
304
+ # rle = [ann['segmentation']]
305
+ # m = maskUtils.decode(rle)
306
+ # img = np.ones( (m.shape[0], m.shape[1], 3) )
307
+ # if ann['iscrowd'] == 1:
308
+ # color_mask = np.array([2.0,166.0,101.0])/255
309
+ # if ann['iscrowd'] == 0:
310
+ # color_mask = np.random.random((1, 3)).tolist()[0]
311
+ # for i in range(3):
312
+ # img[:,:,i] = color_mask[i]
313
+ # ax.imshow(np.dstack( (img, m*0.5) ))
314
+ if "keypoints" in ann and type(ann["keypoints"]) == list:
315
+ # turn skeleton into zero-based index
316
+ sks = np.array(self.loadCats(ann["category_id"])[0]["skeleton"]) - 1
317
+ kp = np.array(ann["keypoints"])
318
+ x = kp[0::3]
319
+ y = kp[1::3]
320
+ v = kp[2::3]
321
+ for sk in sks:
322
+ if np.all(v[sk] > 0):
323
+ plt.plot(x[sk], y[sk], linewidth=3, color=c)
324
+ plt.plot(
325
+ x[v > 0],
326
+ y[v > 0],
327
+ "o",
328
+ markersize=8,
329
+ markerfacecolor=c,
330
+ markeredgecolor="k",
331
+ markeredgewidth=2,
332
+ )
333
+ plt.plot(
334
+ x[v > 1],
335
+ y[v > 1],
336
+ "o",
337
+ markersize=8,
338
+ markerfacecolor=c,
339
+ markeredgecolor=c,
340
+ markeredgewidth=2,
341
+ )
342
+
343
+ if draw_bbox:
344
+ [bbox_x, bbox_y, bbox_w, bbox_h] = ann["bbox"]
345
+ poly = [
346
+ [bbox_x, bbox_y],
347
+ [bbox_x, bbox_y + bbox_h],
348
+ [bbox_x + bbox_w, bbox_y + bbox_h],
349
+ [bbox_x + bbox_w, bbox_y],
350
+ ]
351
+ np_poly = np.array(poly).reshape((4, 2))
352
+ polygons.append(Polygon(np_poly))
353
+ color.append(c)
354
+
355
+ p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
356
+ ax.add_collection(p)
357
+ p = PatchCollection(
358
+ polygons, facecolor="none", edgecolors=color, linewidths=2
359
+ )
360
+ ax.add_collection(p)
361
+ elif datasetType == "captions":
362
+ for ann in anns:
363
+ print(ann["caption"])
364
+
365
+ def loadRes(self, resFile):
366
+ """
367
+ Load result file and return a result api object.
368
+ :param resFile (str) : file name of result file
369
+ :return: res (obj) : result api object
370
+ """
371
+ res = COCO()
372
+ res.dataset["images"] = [img for img in self.dataset["images"]]
373
+
374
+ print("Loading and preparing results...")
375
+ tic = time.time()
376
+ if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
377
+ anns = json.load(open(resFile))
378
+ elif type(resFile) == np.ndarray:
379
+ anns = self.loadNumpyAnnotations(resFile)
380
+ else:
381
+ anns = resFile
382
+ assert type(anns) == list, "results in not an array of objects"
383
+ annsImgIds = [ann["image_id"] for ann in anns]
384
+ assert set(annsImgIds) == (
385
+ set(annsImgIds) & set(self.getImgIds())
386
+ ), "Results do not correspond to current coco set"
387
+ if "caption" in anns[0]:
388
+ raise NotImplementedError("Evaluating caption is not supported yet.")
389
+ elif "bbox" in anns[0] and not anns[0]["bbox"] == []:
390
+ res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
391
+ for id, ann in enumerate(anns):
392
+ bb = ann["bbox"]
393
+ x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
394
+ if not "segmentation" in ann:
395
+ ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
396
+ ann["area"] = bb[2] * bb[3]
397
+ ann["id"] = id + 1
398
+ ann["iscrowd"] = 0
399
+ elif "segmentation" in anns[0]:
400
+ raise NotImplementedError("Evaluating caption is not supported yet.")
401
+ elif "keypoints" in anns[0]:
402
+ raise NotImplementedError("Evaluating caption is not supported yet.")
403
+ print("DONE (t={:0.2f}s)".format(time.time() - tic))
404
+
405
+ res.dataset["annotations"] = anns
406
+ res.createIndex()
407
+ return res
408
+
409
+ def download(self, tarDir=None, imgIds=[]):
410
+ """
411
+ Download COCO images from mscoco.org server.
412
+ :param tarDir (str): COCO results directory name
413
+ imgIds (list): images to be downloaded
414
+ :return:
415
+ """
416
+ if tarDir is None:
417
+ print("Please specify target directory")
418
+ return -1
419
+ if len(imgIds) == 0:
420
+ imgs = self.imgs.values()
421
+ else:
422
+ imgs = self.loadImgs(imgIds)
423
+ N = len(imgs)
424
+ if not os.path.exists(tarDir):
425
+ os.makedirs(tarDir)
426
+ for i, img in enumerate(imgs):
427
+ tic = time.time()
428
+ fname = os.path.join(tarDir, img["file_name"])
429
+ if not os.path.exists(fname):
430
+ urlretrieve(img["coco_url"], fname)
431
+ print(
432
+ "downloaded {}/{} images (t={:0.1f}s)".format(i, N, time.time() - tic)
433
+ )
434
+
435
+ def loadNumpyAnnotations(self, data):
436
+ """
437
+ Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
438
+ :param data (numpy.ndarray)
439
+ :return: annotations (python nested list)
440
+ """
441
+ print("Converting ndarray to lists...")
442
+ assert type(data) == np.ndarray
443
+ print(data.shape)
444
+ assert data.shape[1] == 7
445
+ N = data.shape[0]
446
+ ann = []
447
+ for i in range(N):
448
+ if i % 1000000 == 0:
449
+ print("{}/{}".format(i, N))
450
+ ann += [
451
+ {
452
+ "image_id": int(data[i, 0]),
453
+ "bbox": [data[i, 1], data[i, 2], data[i, 3], data[i, 4]],
454
+ "score": data[i, 5],
455
+ "category_id": int(data[i, 6]),
456
+ }
457
+ ]
458
+ return ann
459
+
460
+ def annToRLE(self, ann):
461
+ """
462
+ Convert annotation which can be polygons, uncompressed RLE to RLE.
463
+ :return: binary mask (numpy 2D array)
464
+ """
465
+ t = self.imgs[ann["image_id"]]
466
+ h, w = t["height"], t["width"]
467
+ segm = ann["segmentation"]
468
+ if type(segm) == list:
469
+ raise NotImplementedError("This type is not is not supported yet.")
470
+ # polygon -- a single object might consist of multiple parts
471
+ # we merge all parts into one mask rle code
472
+ # rles = maskUtils.frPyObjects(segm, h, w)
473
+ # rle = maskUtils.merge(rles)
474
+ elif type(segm["counts"]) == list:
475
+ raise NotImplementedError("This type is not is not supported yet.")
476
+ # uncompressed RLE
477
+ # rle = maskUtils.frPyObjects(segm, h, w)
478
+ else:
479
+ # rle
480
+ rle = ann["segmentation"]
481
+ return rle
482
+
483
+ def annToMask(self, ann):
484
+ """
485
+ Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
486
+ :return: binary mask (numpy 2D array)
487
+ """
488
+ rle = self.annToRLE(ann)
489
+ # m = maskUtils.decode(rle)
490
+ raise NotImplementedError("This type is not is not supported yet.")
491
+ return m
coco_metrics/pycocotools/cocoeval.py ADDED
@@ -0,0 +1,632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This code is basically a copy and paste from the original cocoapi repo:
2
+ # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py
3
+ # with the following changes have been made:
4
+ # * Replace the usage of mask (maskUtils) by MaskEvaluator.
5
+ # * Comment out prints in the evaluate() function.
6
+ # * Include a return of the function evaluate. Inspired
7
+ # by @ybelkada (https://huggingface.co/spaces/ybelkada/cocoevaluate/)
8
+
9
+ __author__ = "tsungyi"
10
+
11
+ import copy
12
+ import datetime
13
+ import time
14
+ from collections import defaultdict
15
+ from packaging import version
16
+
17
+ import numpy as np
18
+
19
+ if version.parse(np.__version__) < version.parse("1.24"):
20
+ dtype_float = np.float
21
+ else:
22
+ dtype_float = np.float32
23
+
24
+ from .mask_utils import MaskEvaluator as maskUtils
25
+
26
+
27
+ class COCOeval:
28
+ # Interface for evaluating detection on the Microsoft COCO dataset.
29
+ #
30
+ # The usage for CocoEval is as follows:
31
+ # cocoGt=..., cocoDt=... # load dataset and results
32
+ # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
33
+ # E.params.recThrs = ...; # set parameters as desired
34
+ # E.evaluate(); # run per image evaluation
35
+ # E.accumulate(); # accumulate per image results
36
+ # E.summarize(); # display summary metrics of results
37
+ # For example usage see evalDemo.m and http://mscoco.org/.
38
+ #
39
+ # The evaluation parameters are as follows (defaults in brackets):
40
+ # imgIds - [all] N img ids to use for evaluation
41
+ # catIds - [all] K cat ids to use for evaluation
42
+ # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
43
+ # recThrs - [0:.01:1] R=101 recall thresholds for evaluation
44
+ # areaRng - [...] A=4 object area ranges for evaluation
45
+ # maxDets - [1 10 100] M=3 thresholds on max detections per image
46
+ # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
47
+ # iouType replaced the now DEPRECATED useSegm parameter.
48
+ # useCats - [1] if true use category labels for evaluation
49
+ # Note: if useCats=0 category labels are ignored as in proposal scoring.
50
+ # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
51
+ #
52
+ # evaluate(): evaluates detections on every image and every category and
53
+ # concats the results into the "evalImgs" with fields:
54
+ # dtIds - [1xD] id for each of the D detections (dt)
55
+ # gtIds - [1xG] id for each of the G ground truths (gt)
56
+ # dtMatches - [TxD] matching gt id at each IoU or 0
57
+ # gtMatches - [TxG] matching dt id at each IoU or 0
58
+ # dtScores - [1xD] confidence of each dt
59
+ # gtIgnore - [1xG] ignore flag for each gt
60
+ # dtIgnore - [TxD] ignore flag for each dt at each IoU
61
+ #
62
+ # accumulate(): accumulates the per-image, per-category evaluation
63
+ # results in "evalImgs" into the dictionary "eval" with fields:
64
+ # params - parameters used for evaluation
65
+ # date - date evaluation was performed
66
+ # counts - [T,R,K,A,M] parameter dimensions (see above)
67
+ # precision - [TxRxKxAxM] precision for every evaluation setting
68
+ # recall - [TxKxAxM] max recall for every evaluation setting
69
+ # Note: precision and recall==-1 for settings with no gt objects.
70
+ #
71
+ # See also coco, mask, pycocoDemo, pycocoEvalDemo
72
+ #
73
+ # Microsoft COCO Toolbox. version 2.0
74
+ # Data, paper, and tutorials available at: http://mscoco.org/
75
+ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
76
+ # Licensed under the Simplified BSD License [see coco/license.txt]
77
+ def __init__(self, cocoGt=None, cocoDt=None, iouType="segm"):
78
+ """
79
+ Initialize CocoEval using coco APIs for gt and dt
80
+ :param cocoGt: coco object with ground truth annotations
81
+ :param cocoDt: coco object with detection results
82
+ :return: None
83
+ """
84
+ if not iouType:
85
+ print("iouType not specified. use default iouType segm")
86
+ self.cocoGt = cocoGt # ground truth COCO API
87
+ self.cocoDt = cocoDt # detections COCO API
88
+ self.evalImgs = defaultdict(
89
+ list
90
+ ) # per-image per-category evaluation results [KxAxI] elements
91
+ self.eval = {} # accumulated evaluation results
92
+ self._gts = defaultdict(list) # gt for evaluation
93
+ self._dts = defaultdict(list) # dt for evaluation
94
+ self.params = Params(iouType=iouType) # parameters
95
+ self._paramsEval = {} # parameters for evaluation
96
+ self.stats = [] # result summarization
97
+ self.ious = {} # ious between all gts and dts
98
+ if not cocoGt is None:
99
+ self.params.imgIds = sorted(cocoGt.getImgIds())
100
+ self.params.catIds = sorted(cocoGt.getCatIds())
101
+
102
+ def _prepare(self):
103
+ """
104
+ Prepare ._gts and ._dts for evaluation based on params
105
+ :return: None
106
+ """
107
+
108
+ def _toMask(anns, coco):
109
+ # modify ann['segmentation'] by reference
110
+ for ann in anns:
111
+ rle = coco.annToRLE(ann)
112
+ ann["segmentation"] = rle
113
+
114
+ p = self.params
115
+ if p.useCats:
116
+ gts = self.cocoGt.loadAnns(
117
+ self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
118
+ )
119
+ dts = self.cocoDt.loadAnns(
120
+ self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
121
+ )
122
+ else:
123
+ gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
124
+ dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
125
+
126
+ # convert ground truth to mask if iouType == 'segm'
127
+ if p.iouType == "segm":
128
+ _toMask(gts, self.cocoGt)
129
+ _toMask(dts, self.cocoDt)
130
+ # set ignore flag
131
+ for gt in gts:
132
+ gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
133
+ gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
134
+ if p.iouType == "keypoints":
135
+ gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
136
+ self._gts = defaultdict(list) # gt for evaluation
137
+ self._dts = defaultdict(list) # dt for evaluation
138
+ for gt in gts:
139
+ self._gts[gt["image_id"], gt["category_id"]].append(gt)
140
+ for dt in dts:
141
+ self._dts[dt["image_id"], dt["category_id"]].append(dt)
142
+ self.evalImgs = defaultdict(list) # per-image per-category evaluation results
143
+ self.eval = {} # accumulated evaluation results
144
+
145
+ def evaluate(self):
146
+ """
147
+ Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
148
+ :return: None
149
+ """
150
+ # tic = time.time()
151
+ # print("Running per image evaluation...")
152
+ p = self.params
153
+ # add backward compatibility if useSegm is specified in params
154
+ if not p.useSegm is None:
155
+ p.iouType = "segm" if p.useSegm == 1 else "bbox"
156
+ # print(
157
+ # "useSegm (deprecated) is not None. Running {} evaluation".format(
158
+ # p.iouType
159
+ # )
160
+ # )
161
+ # print("Evaluate annotation type *{}*".format(p.iouType))
162
+ p.imgIds = list(np.unique(p.imgIds))
163
+ if p.useCats:
164
+ p.catIds = list(np.unique(p.catIds))
165
+ p.maxDets = sorted(p.maxDets)
166
+ self.params = p
167
+
168
+ self._prepare()
169
+ # loop through images, area range, max detection number
170
+ catIds = p.catIds if p.useCats else [-1]
171
+
172
+ if p.iouType == "segm" or p.iouType == "bbox":
173
+ computeIoU = self.computeIoU
174
+ elif p.iouType == "keypoints":
175
+ computeIoU = self.computeOks
176
+ self.ious = {
177
+ (imgId, catId): computeIoU(imgId, catId)
178
+ for imgId in p.imgIds
179
+ for catId in catIds
180
+ }
181
+
182
+ evaluateImg = self.evaluateImg
183
+ maxDet = p.maxDets[-1]
184
+ self.evalImgs = [
185
+ evaluateImg(imgId, catId, areaRng, maxDet)
186
+ for catId in catIds
187
+ for areaRng in p.areaRng
188
+ for imgId in p.imgIds
189
+ ]
190
+ self._paramsEval = copy.deepcopy(self.params)
191
+ ret_evalImgs = np.asarray(self.evalImgs).reshape(
192
+ len(catIds), len(p.areaRng), len(p.imgIds)
193
+ )
194
+ # toc = time.time()
195
+ # print("DONE (t={:0.2f}s).".format(toc - tic))
196
+ return ret_evalImgs
197
+
198
+ def computeIoU(self, imgId, catId):
199
+ p = self.params
200
+ if p.useCats:
201
+ gt = self._gts[imgId, catId]
202
+ dt = self._dts[imgId, catId]
203
+ else:
204
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
205
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
206
+ if len(gt) == 0 and len(dt) == 0:
207
+ return []
208
+ inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
209
+ dt = [dt[i] for i in inds]
210
+ if len(dt) > p.maxDets[-1]:
211
+ dt = dt[0 : p.maxDets[-1]]
212
+
213
+ if p.iouType == "segm":
214
+ g = [g["segmentation"] for g in gt]
215
+ d = [d["segmentation"] for d in dt]
216
+ elif p.iouType == "bbox":
217
+ g = [g["bbox"] for g in gt]
218
+ d = [d["bbox"] for d in dt]
219
+ else:
220
+ raise Exception("unknown iouType for iou computation")
221
+
222
+ # compute iou between each dt and gt region
223
+ iscrowd = [int(o["iscrowd"]) for o in gt]
224
+ ious = maskUtils.iou(d, g, iscrowd)
225
+ return ious
226
+
227
+ def computeOks(self, imgId, catId):
228
+ p = self.params
229
+ # dimention here should be Nxm
230
+ gts = self._gts[imgId, catId]
231
+ dts = self._dts[imgId, catId]
232
+ inds = np.argsort([-d["score"] for d in dts], kind="mergesort")
233
+ dts = [dts[i] for i in inds]
234
+ if len(dts) > p.maxDets[-1]:
235
+ dts = dts[0 : p.maxDets[-1]]
236
+ # if len(gts) == 0 and len(dts) == 0:
237
+ if len(gts) == 0 or len(dts) == 0:
238
+ return []
239
+ ious = np.zeros((len(dts), len(gts)))
240
+ sigmas = p.kpt_oks_sigmas
241
+ vars = (sigmas * 2) ** 2
242
+ k = len(sigmas)
243
+ # compute oks between each detection and ground truth object
244
+ for j, gt in enumerate(gts):
245
+ # create bounds for ignore regions(double the gt bbox)
246
+ g = np.array(gt["keypoints"])
247
+ xg = g[0::3]
248
+ yg = g[1::3]
249
+ vg = g[2::3]
250
+ k1 = np.count_nonzero(vg > 0)
251
+ bb = gt["bbox"]
252
+ x0 = bb[0] - bb[2]
253
+ x1 = bb[0] + bb[2] * 2
254
+ y0 = bb[1] - bb[3]
255
+ y1 = bb[1] + bb[3] * 2
256
+ for i, dt in enumerate(dts):
257
+ d = np.array(dt["keypoints"])
258
+ xd = d[0::3]
259
+ yd = d[1::3]
260
+ if k1 > 0:
261
+ # measure the per-keypoint distance if keypoints visible
262
+ dx = xd - xg
263
+ dy = yd - yg
264
+ else:
265
+ # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
266
+ z = np.zeros((k))
267
+ dx = np.max((z, x0 - xd), axis=0) + np.max((z, xd - x1), axis=0)
268
+ dy = np.max((z, y0 - yd), axis=0) + np.max((z, yd - y1), axis=0)
269
+ e = (dx**2 + dy**2) / vars / (gt["area"] + np.spacing(1)) / 2
270
+ if k1 > 0:
271
+ e = e[vg > 0]
272
+ ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
273
+ return ious
274
+
275
+ def evaluateImg(self, imgId, catId, aRng, maxDet):
276
+ """
277
+ perform evaluation for single category and image
278
+ :return: dict (single image results)
279
+ """
280
+ p = self.params
281
+ if p.useCats:
282
+ gt = self._gts[imgId, catId]
283
+ dt = self._dts[imgId, catId]
284
+ else:
285
+ gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
286
+ dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
287
+ if len(gt) == 0 and len(dt) == 0:
288
+ return None
289
+
290
+ for g in gt:
291
+ if g["ignore"] or (g["area"] < aRng[0] or g["area"] > aRng[1]):
292
+ g["_ignore"] = 1
293
+ else:
294
+ g["_ignore"] = 0
295
+
296
+ # sort dt highest score first, sort gt ignore last
297
+ gtind = np.argsort([g["_ignore"] for g in gt], kind="mergesort")
298
+ gt = [gt[i] for i in gtind]
299
+ dtind = np.argsort([-d["score"] for d in dt], kind="mergesort")
300
+ dt = [dt[i] for i in dtind[0:maxDet]]
301
+ iscrowd = [int(o["iscrowd"]) for o in gt]
302
+ # load computed ious
303
+ ious = (
304
+ self.ious[imgId, catId][:, gtind]
305
+ if len(self.ious[imgId, catId]) > 0
306
+ else self.ious[imgId, catId]
307
+ )
308
+
309
+ T = len(p.iouThrs)
310
+ G = len(gt)
311
+ D = len(dt)
312
+ gtm = np.zeros((T, G))
313
+ dtm = np.zeros((T, D))
314
+ gtIg = np.array([g["_ignore"] for g in gt])
315
+ dtIg = np.zeros((T, D))
316
+ if not len(ious) == 0:
317
+ for tind, t in enumerate(p.iouThrs):
318
+ for dind, d in enumerate(dt):
319
+ # information about best match so far (m=-1 -> unmatched)
320
+ iou = min([t, 1 - 1e-10])
321
+ m = -1
322
+ for gind, g in enumerate(gt):
323
+ # if this gt already matched, and not a crowd, continue
324
+ if gtm[tind, gind] > 0 and not iscrowd[gind]:
325
+ continue
326
+ # if dt matched to reg gt, and on ignore gt, stop
327
+ if m > -1 and gtIg[m] == 0 and gtIg[gind] == 1:
328
+ break
329
+ # continue to next gt unless better match made
330
+ if ious[dind, gind] < iou:
331
+ continue
332
+ # if match successful and best so far, store appropriately
333
+ iou = ious[dind, gind]
334
+ m = gind
335
+ # if match made store id of match for both dt and gt
336
+ if m == -1:
337
+ continue
338
+ dtIg[tind, dind] = gtIg[m]
339
+ dtm[tind, dind] = gt[m]["id"]
340
+ gtm[tind, m] = d["id"]
341
+ # set unmatched detections outside of area range to ignore
342
+ a = np.array([d["area"] < aRng[0] or d["area"] > aRng[1] for d in dt]).reshape(
343
+ (1, len(dt))
344
+ )
345
+ dtIg = np.logical_or(dtIg, np.logical_and(dtm == 0, np.repeat(a, T, 0)))
346
+ # store results for given image and category
347
+ return {
348
+ "image_id": imgId,
349
+ "category_id": catId,
350
+ "aRng": aRng,
351
+ "maxDet": maxDet,
352
+ "dtIds": [d["id"] for d in dt],
353
+ "gtIds": [g["id"] for g in gt],
354
+ "dtMatches": dtm,
355
+ "gtMatches": gtm,
356
+ "dtScores": [d["score"] for d in dt],
357
+ "gtIgnore": gtIg,
358
+ "dtIgnore": dtIg,
359
+ }
360
+
361
+ def accumulate(self, p=None):
362
+ """
363
+ Accumulate per image evaluation results and store the result in self.eval
364
+ :param p: input params for evaluation
365
+ :return: None
366
+ """
367
+ print("Accumulating evaluation results...")
368
+ tic = time.time()
369
+ if not self.evalImgs:
370
+ print("Please run evaluate() first")
371
+ # allows input customized parameters
372
+ if p is None:
373
+ p = self.params
374
+ p.catIds = p.catIds if p.useCats == 1 else [-1]
375
+ T = len(p.iouThrs)
376
+ R = len(p.recThrs)
377
+ K = len(p.catIds) if p.useCats else 1
378
+ A = len(p.areaRng)
379
+ M = len(p.maxDets)
380
+ precision = -np.ones(
381
+ (T, R, K, A, M)
382
+ ) # -1 for the precision of absent categories
383
+ recall = -np.ones((T, K, A, M))
384
+ scores = -np.ones((T, R, K, A, M))
385
+
386
+ # create dictionary for future indexing
387
+ _pe = self._paramsEval
388
+ catIds = _pe.catIds if _pe.useCats else [-1]
389
+ setK = set(catIds)
390
+ setA = set(map(tuple, _pe.areaRng))
391
+ setM = set(_pe.maxDets)
392
+ setI = set(_pe.imgIds)
393
+ # get inds to evaluate
394
+ k_list = [n for n, k in enumerate(p.catIds) if k in setK]
395
+ m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
396
+ a_list = [
397
+ n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA
398
+ ]
399
+ i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
400
+ I0 = len(_pe.imgIds)
401
+ A0 = len(_pe.areaRng)
402
+ # retrieve E at each category, area range, and max number of detections
403
+ for k, k0 in enumerate(k_list):
404
+ Nk = k0 * A0 * I0
405
+ for a, a0 in enumerate(a_list):
406
+ Na = a0 * I0
407
+ for m, maxDet in enumerate(m_list):
408
+ E = [self.evalImgs[Nk + Na + i] for i in i_list]
409
+ E = [e for e in E if not e is None]
410
+ if len(E) == 0:
411
+ continue
412
+ dtScores = np.concatenate([e["dtScores"][0:maxDet] for e in E])
413
+
414
+ # different sorting method generates slightly different results.
415
+ # mergesort is used to be consistent as Matlab implementation.
416
+ inds = np.argsort(-dtScores, kind="mergesort")
417
+ dtScoresSorted = dtScores[inds]
418
+
419
+ dtm = np.concatenate(
420
+ [e["dtMatches"][:, 0:maxDet] for e in E], axis=1
421
+ )[:, inds]
422
+ dtIg = np.concatenate(
423
+ [e["dtIgnore"][:, 0:maxDet] for e in E], axis=1
424
+ )[:, inds]
425
+ gtIg = np.concatenate([e["gtIgnore"] for e in E])
426
+ npig = np.count_nonzero(gtIg == 0)
427
+ if npig == 0:
428
+ continue
429
+ tps = np.logical_and(dtm, np.logical_not(dtIg))
430
+ fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg))
431
+
432
+ tp_sum = np.cumsum(tps, axis=1).astype(dtype=dtype_float)
433
+ fp_sum = np.cumsum(fps, axis=1).astype(dtype=dtype_float)
434
+ for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
435
+ tp = np.array(tp)
436
+ fp = np.array(fp)
437
+ nd = len(tp)
438
+ rc = tp / npig
439
+ pr = tp / (fp + tp + np.spacing(1))
440
+ q = np.zeros((R,))
441
+ ss = np.zeros((R,))
442
+
443
+ if nd:
444
+ recall[t, k, a, m] = rc[-1]
445
+ else:
446
+ recall[t, k, a, m] = 0
447
+
448
+ # numpy is slow without cython optimization for accessing elements
449
+ # use python array gets significant speed improvement
450
+ pr = pr.tolist()
451
+ q = q.tolist()
452
+
453
+ for i in range(nd - 1, 0, -1):
454
+ if pr[i] > pr[i - 1]:
455
+ pr[i - 1] = pr[i]
456
+
457
+ inds = np.searchsorted(rc, p.recThrs, side="left")
458
+ try:
459
+ for ri, pi in enumerate(inds):
460
+ q[ri] = pr[pi]
461
+ ss[ri] = dtScoresSorted[pi]
462
+ except:
463
+ pass
464
+ precision[t, :, k, a, m] = np.array(q)
465
+ scores[t, :, k, a, m] = np.array(ss)
466
+ self.eval = {
467
+ "params": p,
468
+ "counts": [T, R, K, A, M],
469
+ "date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
470
+ "precision": precision,
471
+ "recall": recall,
472
+ "scores": scores,
473
+ }
474
+ toc = time.time()
475
+ print("DONE (t={:0.2f}s).".format(toc - tic))
476
+
477
+ def summarize(self):
478
+ """
479
+ Compute and display summary metrics for evaluation results.
480
+ Note this functin can *only* be applied on the default parameter setting
481
+ """
482
+
483
+ def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
484
+ p = self.params
485
+ iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
486
+ titleStr = "Average Precision" if ap == 1 else "Average Recall"
487
+ typeStr = "(AP)" if ap == 1 else "(AR)"
488
+ iouStr = (
489
+ "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
490
+ if iouThr is None
491
+ else "{:0.2f}".format(iouThr)
492
+ )
493
+
494
+ aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
495
+ mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
496
+ if ap == 1:
497
+ # dimension of precision: [TxRxKxAxM]
498
+ s = self.eval["precision"]
499
+ # IoU
500
+ if iouThr is not None:
501
+ t = np.where(iouThr == p.iouThrs)[0]
502
+ s = s[t]
503
+ s = s[:, :, :, aind, mind]
504
+ else:
505
+ # dimension of recall: [TxKxAxM]
506
+ s = self.eval["recall"]
507
+ if iouThr is not None:
508
+ t = np.where(iouThr == p.iouThrs)[0]
509
+ s = s[t]
510
+ s = s[:, :, aind, mind]
511
+ if len(s[s > -1]) == 0:
512
+ mean_s = -1
513
+ else:
514
+ mean_s = np.mean(s[s > -1])
515
+ print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
516
+ return mean_s
517
+
518
+ def _summarizeDets():
519
+ stats = np.zeros((12,))
520
+ stats[0] = _summarize(1)
521
+ stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
522
+ stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
523
+ stats[3] = _summarize(1, areaRng="small", maxDets=self.params.maxDets[2])
524
+ stats[4] = _summarize(1, areaRng="medium", maxDets=self.params.maxDets[2])
525
+ stats[5] = _summarize(1, areaRng="large", maxDets=self.params.maxDets[2])
526
+ stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
527
+ stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
528
+ stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
529
+ stats[9] = _summarize(0, areaRng="small", maxDets=self.params.maxDets[2])
530
+ stats[10] = _summarize(0, areaRng="medium", maxDets=self.params.maxDets[2])
531
+ stats[11] = _summarize(0, areaRng="large", maxDets=self.params.maxDets[2])
532
+ return stats
533
+
534
+ def _summarizeKps():
535
+ stats = np.zeros((10,))
536
+ stats[0] = _summarize(1, maxDets=20)
537
+ stats[1] = _summarize(1, maxDets=20, iouThr=0.5)
538
+ stats[2] = _summarize(1, maxDets=20, iouThr=0.75)
539
+ stats[3] = _summarize(1, maxDets=20, areaRng="medium")
540
+ stats[4] = _summarize(1, maxDets=20, areaRng="large")
541
+ stats[5] = _summarize(0, maxDets=20)
542
+ stats[6] = _summarize(0, maxDets=20, iouThr=0.5)
543
+ stats[7] = _summarize(0, maxDets=20, iouThr=0.75)
544
+ stats[8] = _summarize(0, maxDets=20, areaRng="medium")
545
+ stats[9] = _summarize(0, maxDets=20, areaRng="large")
546
+ return stats
547
+
548
+ if not self.eval:
549
+ raise Exception("Please run accumulate() first")
550
+ iouType = self.params.iouType
551
+ if iouType == "segm" or iouType == "bbox":
552
+ summarize = _summarizeDets
553
+ elif iouType == "keypoints":
554
+ summarize = _summarizeKps
555
+ self.stats = summarize()
556
+
557
+ def __str__(self):
558
+ self.summarize()
559
+
560
+
561
+ class Params:
562
+ """
563
+ Params for coco evaluation api
564
+ """
565
+
566
+ def setDetParams(self):
567
+ self.imgIds = []
568
+ self.catIds = []
569
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
570
+ self.iouThrs = np.linspace(
571
+ 0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True
572
+ )
573
+ self.recThrs = np.linspace(
574
+ 0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True
575
+ )
576
+ self.maxDets = [1, 10, 100]
577
+ self.areaRng = [
578
+ [0**2, 1e5**2],
579
+ [0**2, 32**2],
580
+ [32**2, 96**2],
581
+ [96**2, 1e5**2],
582
+ ]
583
+ self.areaRngLbl = ["all", "small", "medium", "large"]
584
+ self.useCats = 1
585
+
586
+ def setKpParams(self):
587
+ self.imgIds = []
588
+ self.catIds = []
589
+ # np.arange causes trouble. the data point on arange is slightly larger than the true value
590
+ self.iouThrs = np.linspace(
591
+ 0.5, 0.95, int(np.round((0.95 - 0.5) / 0.05)) + 1, endpoint=True
592
+ )
593
+ self.recThrs = np.linspace(
594
+ 0.0, 1.00, int(np.round((1.00 - 0.0) / 0.01)) + 1, endpoint=True
595
+ )
596
+ self.maxDets = [20]
597
+ self.areaRng = [[0**2, 1e5**2], [32**2, 96**2], [96**2, 1e5**2]]
598
+ self.areaRngLbl = ["all", "medium", "large"]
599
+ self.useCats = 1
600
+ self.kpt_oks_sigmas = (
601
+ np.array(
602
+ [
603
+ 0.26,
604
+ 0.25,
605
+ 0.25,
606
+ 0.35,
607
+ 0.35,
608
+ 0.79,
609
+ 0.79,
610
+ 0.72,
611
+ 0.72,
612
+ 0.62,
613
+ 0.62,
614
+ 1.07,
615
+ 1.07,
616
+ 0.87,
617
+ 0.87,
618
+ 0.89,
619
+ 0.89,
620
+ ]
621
+ )
622
+ / 10.0
623
+ )
624
+
625
+ def __init__(self, iouType="segm"):
626
+ if iouType == "bbox":
627
+ self.setDetParams()
628
+ else:
629
+ raise Exception("iouType not supported")
630
+ self.iouType = iouType
631
+ # useSegm is deprecated
632
+ self.useSegm = None
coco_metrics/pycocotools/mask_utils.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This code is a copy and paste with small modifications of the code:
2
+ # https://github.com/rafaelpadilla/review_object_detection_metrics/blob/main/src/evaluators/coco_evaluator.py
3
+
4
+ from typing import List
5
+
6
+ import numpy as np
7
+
8
+
9
+ class MaskEvaluator(object):
10
+ @staticmethod
11
+ def iou(
12
+ dt: List[List[float]], gt: List[List[float]], iscrowd: List[bool]
13
+ ) -> np.ndarray:
14
+ """
15
+ Calculate the intersection over union (IoU) between detection bounding boxes (dt) and \
16
+ ground truth bounding boxes (gt).
17
+ Reference: https://github.com/rafaelpadilla/review_object_detection_metrics
18
+
19
+ Args:
20
+ dt (List[List[float]]): List of detection bounding boxes in the \
21
+ format [x, y, width, height].
22
+ gt (List[List[float]]): List of ground-truth bounding boxes in the \
23
+ format [x, y, width, height].
24
+ iscrowd (List[bool]): List indicating if each ground-truth bounding box \
25
+ is a crowd region or not.
26
+
27
+ Returns:
28
+ np.ndarray: Array of IoU values of shape (len(dt), len(gt)).
29
+ """
30
+ assert len(iscrowd) == len(gt), "iou(iscrowd=) must have the same length as gt"
31
+ if len(dt) == 0 or len(gt) == 0:
32
+ return []
33
+ ious = np.zeros((len(dt), len(gt)))
34
+ for g_idx, g in enumerate(gt):
35
+ for d_idx, d in enumerate(dt):
36
+ ious[d_idx, g_idx] = _jaccard(d, g, iscrowd[g_idx])
37
+ return ious
38
+
39
+
40
+ def _jaccard(a: List[float], b: List[float], iscrowd: bool) -> float:
41
+ """
42
+ Calculate the Jaccard index (intersection over union) between two bounding boxes.
43
+ For "crowd" regions, we use a modified criteria. If a gt object is
44
+ marked as "iscrowd", we allow a dt to match any subregion of the gt.
45
+ Choosing gt' in the crowd gt that best matches the dt can be done using
46
+ gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
47
+ iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
48
+ For crowd gt regions we use this modified criteria above for the iou.
49
+
50
+ Args:
51
+ a (List[float]): Bounding box coordinates in the format [x, y, width, height].
52
+ b (List[float]): Bounding box coordinates in the format [x, y, width, height].
53
+ iscrowd (bool): Flag indicating if the second bounding box is a crowd region or not.
54
+
55
+ Returns:
56
+ float: Jaccard index between the two bounding boxes.
57
+ """
58
+ xa, ya, x2a, y2a = a[0], a[1], a[0] + a[2], a[1] + a[3]
59
+ xb, yb, x2b, y2b = b[0], b[1], b[0] + b[2], b[1] + b[3]
60
+
61
+ # innermost left x
62
+ xi = max(xa, xb)
63
+ # innermost right x
64
+ x2i = min(x2a, x2b)
65
+ # same for y
66
+ yi = max(ya, yb)
67
+ y2i = min(y2a, y2b)
68
+
69
+ # calculate areas
70
+ Aa = max(x2a - xa, 0.) * max(y2a - ya, 0.)
71
+ Ab = max(x2b - xb, 0.) * max(y2b - yb, 0.)
72
+ Ai = max(x2i - xi, 0.) * max(y2i - yi, 0.)
73
+
74
+ if iscrowd:
75
+ return Ai / Aa
76
+
77
+ return Ai / (Aa + Ab - Ai)
coco_metrics/utils.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import pickle
3
+ from typing import Dict, List, Tuple, Union
4
+ from tqdm import tqdm
5
+ import numpy as np
6
+ import torch
7
+ import torch.distributed as dist
8
+ from datasets import Dataset
9
+
10
+ from coco_metrics.pycocotools.cocoeval import COCOeval
11
+
12
+ # Typings
13
+ _TYPING_BOX = Tuple[float, float, float, float]
14
+ _TYPING_SCORES = List[float]
15
+ _TYPING_LABELS = List[int]
16
+ _TYPING_BOXES = List[_TYPING_BOX]
17
+ _TYPING_PRED_REF = Union[_TYPING_SCORES, _TYPING_LABELS, _TYPING_BOXES]
18
+ _TYPING_PREDICTION = Dict[str, _TYPING_PRED_REF]
19
+ _TYPING_REFERENCE = Dict[str, _TYPING_PRED_REF]
20
+ _TYPING_PREDICTIONS = Dict[int, _TYPING_PREDICTION]
21
+
22
+ def convert_to_xywh(boxes: torch.Tensor) -> torch.Tensor:
23
+ """
24
+ Convert bounding boxes from (xmin, ymin, xmax, ymax) format to (x, y, width, height) format.
25
+
26
+ Args:
27
+ boxes (torch.Tensor): Tensor of shape (N, 4) representing bounding boxes in \
28
+ (xmin, ymin, xmax, ymax) format.
29
+
30
+ Returns:
31
+ torch.Tensor: Tensor of shape (N, 4) representing bounding boxes in (x, y, width, height) \
32
+ format.
33
+ """
34
+ xmin, ymin, xmax, ymax = boxes.unbind(1)
35
+ return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
36
+
37
+
38
+ def create_common_coco_eval(
39
+ coco_eval: COCOeval, img_ids: List[int], eval_imgs: np.ndarray
40
+ ) -> None:
41
+ """
42
+ Create a common COCO evaluation by merging image IDs and evaluation images into the \
43
+ coco_eval object.
44
+
45
+ Args:
46
+ coco_eval: COCOeval evaluation object.
47
+ img_ids (List[int]): Tensor of image IDs.
48
+ eval_imgs (torch.Tensor): Tensor of evaluation images.
49
+ """
50
+ img_ids, eval_imgs = merge(img_ids, eval_imgs)
51
+ img_ids = list(img_ids)
52
+ eval_imgs = list(eval_imgs.flatten())
53
+
54
+ coco_eval.evalImgs = eval_imgs
55
+ coco_eval.params.imgIds = img_ids
56
+ coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
57
+
58
+
59
+ def merge(img_ids: List[int], eval_imgs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
60
+ """
61
+ Merge image IDs and evaluation images from different processes.
62
+
63
+ Args:
64
+ img_ids (List[int]): List of image ID arrays from different processes.
65
+ eval_imgs (np.ndarray): Evaluation images from different processes.
66
+
67
+ Returns:
68
+ Tuple[np.ndarray, np.ndarray]: Merged image IDs and evaluation images.
69
+ """
70
+ all_img_ids = all_gather(img_ids)
71
+ all_eval_imgs = all_gather(eval_imgs)
72
+
73
+ merged_img_ids = []
74
+ for p in all_img_ids:
75
+ merged_img_ids.extend(p)
76
+
77
+ merged_eval_imgs = []
78
+ for p in all_eval_imgs:
79
+ merged_eval_imgs.append(p)
80
+
81
+ merged_img_ids = np.array(merged_img_ids)
82
+ merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
83
+
84
+ # keep only unique (and in sorted order) images
85
+ merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
86
+ merged_eval_imgs = merged_eval_imgs[..., idx]
87
+
88
+ return merged_img_ids, merged_eval_imgs
89
+
90
+
91
+ def all_gather(data: List[int]) -> List[List[int]]:
92
+ """
93
+ Run all_gather on arbitrary picklable data (not necessarily tensors).
94
+
95
+ Args:
96
+ data (List[int]): any picklable object
97
+ Returns:
98
+ List[List[int]]: list of data gathered from each rank
99
+ """
100
+ world_size = get_world_size()
101
+ if world_size == 1:
102
+ return [data]
103
+
104
+ # serialized to a Tensor
105
+ buffer = pickle.dumps(data)
106
+ storage = torch.ByteStorage.from_buffer(buffer)
107
+ tensor = torch.ByteTensor(storage).to("cuda")
108
+
109
+ # obtain Tensor size of each rank
110
+ local_size = torch.tensor([tensor.numel()], device="cuda")
111
+ size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
112
+ dist.all_gather(size_list, local_size)
113
+ size_list = [int(size.item()) for size in size_list]
114
+ max_size = max(size_list)
115
+
116
+ # receiving Tensor from all ranks
117
+ # we pad the tensor because torch all_gather does not support
118
+ # gathering tensors of different shapes
119
+ tensor_list = []
120
+ for _ in size_list:
121
+ tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
122
+ if local_size != max_size:
123
+ padding = torch.empty(
124
+ size=(max_size - local_size,), dtype=torch.uint8, device="cuda"
125
+ )
126
+ tensor = torch.cat((tensor, padding), dim=0)
127
+ dist.all_gather(tensor_list, tensor)
128
+
129
+ data_list = []
130
+ for size, tensor in zip(size_list, tensor_list):
131
+ buffer = tensor.cpu().numpy().tobytes()[:size]
132
+ data_list.append(pickle.loads(buffer))
133
+
134
+ return data_list
135
+
136
+
137
+ def get_world_size() -> int:
138
+ """
139
+ Get the number of processes in the distributed environment.
140
+
141
+ Returns:
142
+ int: Number of processes.
143
+ """
144
+ if not is_dist_avail_and_initialized():
145
+ return 1
146
+ return dist.get_world_size()
147
+
148
+
149
+ def is_dist_avail_and_initialized() -> bool:
150
+ """
151
+ Check if distributed environment is available and initialized.
152
+
153
+ Returns:
154
+ bool: True if distributed environment is available and initialized, False otherwise.
155
+ """
156
+ return dist.is_available() and dist.is_initialized()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ torchvision
3
+ tqdm
4
+ transformers
5
+ datasets
6
+ evaluate
7
+ matplotlib
setup.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from coco_metrics import __version__
2
+
3
+ import subprocess
4
+
5
+ from setuptools import find_packages, setup
6
+ from setuptools.command.develop import develop
7
+ from setuptools.command.egg_info import egg_info
8
+ from setuptools.command.install import install
9
+
10
+
11
+ def custom_command():
12
+ subprocess.call(["pip", "install", "numpy", "cython"])
13
+ subprocess.call(["pip", "install", "-r", "requirements.txt", "--user"])
14
+
15
+ class CustomInstallCommand(install):
16
+ def run(self):
17
+ install.run(self)
18
+ custom_command()
19
+
20
+
21
+ class CustomDevelopCommand(develop):
22
+ def run(self):
23
+ develop.run(self)
24
+ custom_command()
25
+
26
+
27
+ class CustomEggInfoCommand(egg_info):
28
+ def run(self):
29
+ egg_info.run(self)
30
+ custom_command()
31
+
32
+ setup(
33
+ name="coco_metrics",
34
+ description="COCO Metrics for Object Detection and Instance Segmentation",
35
+ version=__version__,
36
+ zip_safe=True,
37
+ url="https://github.com/rafaelpadilla/coco_metrics/",
38
+ packages=["coco_metrics", "coco_metrics.pycocotools"],
39
+ include_package_data=True,
40
+ cmdclass={
41
+ "install": CustomInstallCommand,
42
+ "develop": CustomDevelopCommand,
43
+ "egg_info": CustomEggInfoCommand,
44
+ },
45
+ )