Spaces:
Running
Running
| from typing import Dict, List | |
| import numpy as np | |
| from evaluation.evaluate_utils.utils import _align_bags | |
| def calculate_f1_score(precision, recall): | |
| if precision + recall == 0: | |
| return 0 # Handle the case to avoid division by zero | |
| return 2 * (precision * recall) / (precision + recall) | |
| def calc_recall(pred: Dict, gold: Dict, use_gold_for_eval: bool): | |
| from evaluation.evaluate_utils.evaluate_factory import get_evaluator_from_gold_answer | |
| recall = [] | |
| for gold_key, gold_value in gold.items(): | |
| pred_value = pred.get(gold_key) | |
| gold_value = fix_number(gold_value) | |
| pred_value = fix_number(pred_value) | |
| if gold_key not in pred: | |
| recall.append(0) | |
| else: | |
| evaluator = ( | |
| get_evaluator_from_gold_answer(type(gold_value)) | |
| if use_gold_for_eval | |
| else get_evaluator_from_gold_answer(type(pred_value)) | |
| ) | |
| if type(pred_value) != type(gold_value): | |
| recall.append(0) | |
| continue | |
| recall.append(evaluator(pred_value, gold_value)) | |
| avg_recall = np.average(recall) | |
| return avg_recall | |
| def fix_number(number): | |
| if type(number) == str: | |
| copy_ans = number | |
| copy_ans = ' '.join(' '.join(' '.join(copy_ans.split('$')).split('%')).split('sqft')).strip() | |
| copy_ans = copy_ans.strip() | |
| copy_ans = copy_ans.replace(',', '.') | |
| try: | |
| return float(copy_ans) | |
| except: | |
| return number | |
| elif type(number) == int: | |
| return float(number) | |
| else: | |
| return number | |
| def evaluate_pair_of_dicts(pred: Dict, gold: Dict): | |
| recall = calc_recall(pred, gold, True) | |
| precision = calc_recall(gold, pred, False) | |
| f1 = calculate_f1_score(precision, recall) | |
| return f1 | |
| def evaluate_dicts(pred: List[Dict], gold: List[Dict]): | |
| if not ( | |
| type(pred) == dict | |
| or len(pred) == 0 | |
| or (type(pred) == list and type(pred[0]) == dict) | |
| ): | |
| return 0 | |
| max_alignment_scores = _align_bags(pred, gold, evaluate_pair_of_dicts) | |
| return np.average(max_alignment_scores) |