Spaces:
Running
Running
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import datetime | |
import os | |
import datasets | |
import evaluate | |
from seametrics.user_friendly.utils import calculate_from_payload | |
import wandb | |
_CITATION = """\ | |
@InProceedings{huggingface:module, | |
title = {A great new module}, | |
authors={huggingface, Inc.}, | |
year={2020} | |
}\ | |
@article{milan2016mot16, | |
title={MOT16: A benchmark for multi-object tracking}, | |
author={Milan, Anton and Leal-Taix{\'e}, Laura and Reid, Ian and Roth, Stefan and Schindler, Konrad}, | |
journal={arXiv preprint arXiv:1603.00831}, | |
year={2016} | |
} | |
""" | |
_DESCRIPTION = """\ | |
The MOT Metrics module is designed to evaluate multi-object tracking (MOT) | |
algorithms by computing various metrics based on predicted and ground truth bounding | |
boxes. It serves as a crucial tool in assessing the performance of MOT systems, | |
aiding in the iterative improvement of tracking algorithms.""" | |
_KWARGS_DESCRIPTION = """ | |
Calculates how good are predictions given some references, using certain scores | |
Args: | |
predictions: list of predictions to score. Each predictions | |
should be a string with tokens separated by spaces. | |
references: list of reference for each prediction. Each | |
reference should be a string with tokens separated by spaces. | |
max_iou (`float`, *optional*): | |
If specified, this is the minimum Intersection over Union (IoU) threshold to consider a detection as a true positive. | |
Default is 0.5. | |
""" | |
class UserFriendlyMetrics(evaluate.Metric): | |
"""TODO: Short description of my evaluation module.""" | |
def _info(self): | |
# TODO: Specifies the evaluate.EvaluationModuleInfo object | |
return evaluate.MetricInfo( | |
# This is the description that will appear on the modules page. | |
module_type="metric", | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
# This defines the format of each prediction and reference | |
features=datasets.Features( | |
{ | |
"predictions": datasets.Sequence( | |
datasets.Sequence(datasets.Value("float")) | |
), | |
"references": datasets.Sequence( | |
datasets.Sequence(datasets.Value("float")) | |
), | |
} | |
), | |
# Additional links to the codebase or references | |
codebase_urls=["http://github.com/path/to/codebase/of/new_module"], | |
reference_urls=["http://path.to.reference.url/new_module"], | |
) | |
def _download_and_prepare(self, dl_manager): | |
"""Optional: download external resources useful to compute the scores""" | |
# TODO: Download external resources if needed | |
pass | |
def _compute( | |
self, | |
payload, | |
max_iou: float = 0.5, | |
filters={}, | |
recognition_thresholds=[0.3, 0.5, 0.8], | |
debug: bool = False, | |
): | |
"""Returns the scores""" | |
# TODO: Compute the different scores of the module | |
return dummy_values() | |
# return calculate(predictions, references, max_iou) | |
def dummy_values(): | |
return { | |
"model_1": { | |
"overall": { | |
"all": { | |
"tp": 50, | |
"fp": 20, | |
"fn": 10, | |
"precision": 0.71, | |
"recall": 0.83, | |
"f1": 0.76 | |
}, | |
"small": { | |
"tp": 15, | |
"fp": 5, | |
"fn": 2, | |
"precision": 0.75, | |
"recall": 0.88, | |
"f1": 0.81 | |
}, | |
"medium": { | |
"tp": 25, | |
"fp": 10, | |
"fn": 5, | |
"precision": 0.71, | |
"recall": 0.83, | |
"f1": 0.76 | |
}, | |
"large": { | |
"tp": 10, | |
"fp": 5, | |
"fn": 3, | |
"precision": 0.67, | |
"recall": 0.77, | |
"f1": 0.71 | |
} | |
}, | |
"per_sequence": { | |
"sequence_1": { | |
"all": { | |
"tp": 30, | |
"fp": 15, | |
"fn": 7, | |
"precision": 0.67, | |
"recall": 0.81, | |
"f1": 0.73 | |
}, | |
"small": { | |
"tp": 10, | |
"fp": 3, | |
"fn": 1, | |
"precision": 0.77, | |
"recall": 0.91, | |
"f1": 0.83 | |
}, | |
"medium": { | |
"tp": 15, | |
"fp": 7, | |
"fn": 2, | |
"precision": 0.68, | |
"recall": 0.88, | |
"f1": 0.77 | |
}, | |
"large": { | |
"tp": 5, | |
"fp": 2, | |
"fn": 1, | |
"precision": 0.71, | |
"recall": 0.83, | |
"f1": 0.76 | |
} | |
} | |
} | |
}, | |
"model_2": { | |
"overall": { | |
"all": { | |
"tp": 60, | |
"fp": 25, | |
"fn": 15, | |
"precision": 0.71, | |
"recall": 0.80, | |
"f1": 0.75 | |
}, | |
"small": { | |
"tp": 20, | |
"fp": 6, | |
"fn": 3, | |
"precision": 0.77, | |
"recall": 0.87, | |
"f1": 0.82 | |
}, | |
"medium": { | |
"tp": 30, | |
"fp": 12, | |
"fn": 5, | |
"precision": 0.71, | |
"recall": 0.86, | |
"f1": 0.78 | |
}, | |
"large": { | |
"tp": 10, | |
"fp": 7, | |
"fn": 5, | |
"precision": 0.59, | |
"recall": 0.67, | |
"f1": 0.63 | |
} | |
}, | |
"per_sequence": { | |
"sequence_1": { | |
"all": { | |
"tp": 40, | |
"fp": 18, | |
"fn": 8, | |
"precision": 0.69, | |
"recall": 0.83, | |
"f1": 0.75 | |
}, | |
"small": { | |
"tp": 12, | |
"fp": 4, | |
"fn": 2, | |
"precision": 0.75, | |
"recall": 0.86, | |
"f1": 0.80 | |
}, | |
"medium": { | |
"tp": 20, | |
"fp": 8, | |
"fn": 3, | |
"precision": 0.71, | |
"recall": 0.87, | |
"f1": 0.78 | |
}, | |
"large": { | |
"tp": 8, | |
"fp": 6, | |
"fn": 3, | |
"precision": 0.57, | |
"recall": 0.73, | |
"f1": 0.64 | |
} | |
} | |
} | |
} | |
} | |