File size: 2,660 Bytes
193db9d
 
 
 
 
3b39b49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193db9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
from typing import Any, Dict, List

import pandas as pd


def evaluate_prediction(prediction: str, clean_answers: list[str] | str) -> int:
    """Evaluate the buzz of a prediction against the clean answers."""
    if isinstance(clean_answers, str):
        print("clean_answers is a string")
        clean_answers = [clean_answers]
    pred = prediction.lower().strip()
    if not pred:
        return 0
    for answer in clean_answers:
        answer = answer.strip().lower()
        if answer and answer in pred:
            print(f"Found {answer} in {pred}")
            return 1
    return 0


def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame:
    """Create a DataFrame for the confidence plot."""
    if not top_k_mode:
        return pd.DataFrame(
            {
                "position": [r["position"] for r in results],
                "confidence": [r["confidence"] for r in results],
                "answer": [r["answer"] for r in results],
            }
        )

    # For top-k mode, extract and plot top answers
    return _create_top_k_plot_data(results)


def _create_top_k_plot_data(results: List[Dict]) -> pd.DataFrame:
    """Create plot data for top-k mode."""
    # Find top answers across all positions (limited to top 5)
    top_answers = set()
    for r in results:
        for g in r.get("guesses", [])[:3]:  # Get top 3 from each position
            if g.get("answer"):
                top_answers.add(g.get("answer"))

    top_answers = list(top_answers)[:5]  # Limit to 5 total answers

    # Create plot data for each answer
    all_data = []
    for position_idx, result in enumerate(results):
        position = result["position"]
        for answer in top_answers:
            confidence = 0
            for guess in result.get("guesses", []):
                if guess.get("answer") == answer:
                    confidence = guess.get("confidence", 0)
                    break
            all_data.append({"position": position, "confidence": confidence, "answer": answer})

    return pd.DataFrame(all_data)


def _create_top_k_dataframe(results: List[Dict]) -> pd.DataFrame:
    """Create a DataFrame for top-k results."""
    df_rows = []
    for result in results:
        position = result["position"]
        for i, guess in enumerate(result.get("guesses", [])):
            df_rows.append(
                {
                    "position": position,
                    "answer": guess.get("answer", ""),
                    "confidence": guess.get("confidence", 0),
                    "rank": i + 1,
                }
            )
    return pd.DataFrame(df_rows)