|
from typing import Any, Dict, List |
|
|
|
import pandas as pd |
|
|
|
|
|
def evaluate_prediction(prediction: str, clean_answers: list[str] | str) -> int: |
|
"""Evaluate the buzz of a prediction against the clean answers.""" |
|
if isinstance(clean_answers, str): |
|
print("clean_answers is a string") |
|
clean_answers = [clean_answers] |
|
pred = prediction.lower().strip() |
|
if not pred: |
|
return 0 |
|
for answer in clean_answers: |
|
answer = answer.strip().lower() |
|
if answer and answer in pred: |
|
print(f"Found {answer} in {pred}") |
|
return 1 |
|
return 0 |
|
|
|
|
|
def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame: |
|
"""Create a DataFrame for the confidence plot.""" |
|
if not top_k_mode: |
|
return pd.DataFrame( |
|
{ |
|
"position": [r["position"] for r in results], |
|
"confidence": [r["confidence"] for r in results], |
|
"answer": [r["answer"] for r in results], |
|
} |
|
) |
|
|
|
|
|
return _create_top_k_plot_data(results) |
|
|
|
|
|
def _create_top_k_plot_data(results: List[Dict]) -> pd.DataFrame: |
|
"""Create plot data for top-k mode.""" |
|
|
|
top_answers = set() |
|
for r in results: |
|
for g in r.get("guesses", [])[:3]: |
|
if g.get("answer"): |
|
top_answers.add(g.get("answer")) |
|
|
|
top_answers = list(top_answers)[:5] |
|
|
|
|
|
all_data = [] |
|
for position_idx, result in enumerate(results): |
|
position = result["position"] |
|
for answer in top_answers: |
|
confidence = 0 |
|
for guess in result.get("guesses", []): |
|
if guess.get("answer") == answer: |
|
confidence = guess.get("confidence", 0) |
|
break |
|
all_data.append({"position": position, "confidence": confidence, "answer": answer}) |
|
|
|
return pd.DataFrame(all_data) |
|
|
|
|
|
def _create_top_k_dataframe(results: List[Dict]) -> pd.DataFrame: |
|
"""Create a DataFrame for top-k results.""" |
|
df_rows = [] |
|
for result in results: |
|
position = result["position"] |
|
for i, guess in enumerate(result.get("guesses", [])): |
|
df_rows.append( |
|
{ |
|
"position": position, |
|
"answer": guess.get("answer", ""), |
|
"confidence": guess.get("confidence", 0), |
|
"rank": i + 1, |
|
} |
|
) |
|
return pd.DataFrame(df_rows) |
|
|