Spaces:

qanta-challenge
/

quizbowl-submission

Running

File size: 10,708 Bytes

import json
import logging
import re
from collections import Counter

import matplotlib.pyplot as plt
import pandas as pd


def _make_answer_html(answer: str, clean_answers: list[str] = []) -> str:
    clean_answers = [a for a in clean_answers if len(a.split()) <= 6 and a != answer]
    additional_answers_html = ""
    if clean_answers:
        additional_answers_html = f"<span class='bonus-answer-text'> [or {', '.join(clean_answers)}]</span>"
    return f"""
        <div class='bonus-answer'>
            <span class='bonus-answer-label'>Answer: </span>
            <span class='bonus-answer-text'>{answer}</span>
            {additional_answers_html}
        </div>
    """


def _get_token_classes(confidence, buzz, score) -> str:
    if confidence is None:
        return "token"
    elif not buzz:
        return "token guess-point no-buzz"
    else:
        return f"token guess-point buzz-{score}"


def _create_token_tooltip_html(values) -> str:
    if not values:
        return ""
    confidence = values.get("confidence", 0)
    buzz = values.get("buzz", 0)
    score = values.get("score", 0)
    answer = values.get("answer", "")
    answer_tokens = answer.split()
    if len(answer_tokens) > 10:
        k = len(answer_tokens) - 10
        answer = " ".join(answer_tokens[:10]) + f"...[{k} more words]"

    color = "#a3c9a3" if score else "#ebbec4"  # Light green for correct, light pink for incorrect

    return f"""
        <div class="tooltip card" style="background-color: {color}; border-radius: 8px; padding: 12px; box-shadow: 2px 4px 8px rgba(0, 0, 0, 0.15);">
            <div class="tooltip-content" style="font-family: 'Arial', sans-serif; color: #000;">
                <h4 style="margin: 0 0 8px; color: #000;">💡 Answer</h4>
                <p style="font-weight: bold; margin: 0 0 8px; color: #000;">{answer}</p>
                <p style="margin: 0 0 4px; color: #000;">📊 <b style="color: #000;">Confidence:</b> {confidence:.2f}</p>
                <p style="margin: 0; color: #000;">🔍 <b style="color: #000;">Status:</b> {"✅ Correct" if score else "❌ Incorrect" if buzz else "🚫 No Buzz"}</p>
            </div>
        </div>
    """


def create_token_html(token: str, values: dict, i: int) -> str:
    confidence = values.get("confidence", None)
    buzz = values.get("buzz", 0)
    score = values.get("score", 0)

    # Replace non-word characters for proper display in HTML
    display_token = f"{token} 🚨" if buzz else f"{token} 💭" if values else token
    if not re.match(r"\w+", token):
        display_token = token.replace(" ", "&nbsp;")

    css_class = _get_token_classes(confidence, buzz, score)
    # Add tooltip if we have values for this token
    tooltip_html = _create_token_tooltip_html(values)

    token_html = f'<span id="token-{i}" class="{css_class}" data-index="{i}">{display_token}{tooltip_html}</span>'
    # if i in marker_indices:
    #     token_html += "<span style='color: crimson;'>|</span>"
    return token_html


def create_tossup_html(
    tokens: list[str],
    answer_primary: str,
    clean_answers: list[str],
    marker_indices: list[int] = [],
    eval_points: list[tuple[int, dict]] = [],
) -> str:
    """Create HTML for tokens with hover capability and a colored header for the answer."""
    try:
        ep = dict(eval_points)
        marker_indices = set(marker_indices)

        html_tokens = []
        for i, token in enumerate(tokens):
            token_html = create_token_html(token, ep.get(i, {}), i + 1)
            html_tokens.append(token_html)

        answer_html = _make_answer_html(answer_primary, clean_answers)
        return f"""
        <div class='bonus-container'>
            <div class='bonus-card'>
                <div class='tossup-question'>
                    {"".join(html_tokens)}
            </div>
                {answer_html}
            </div>
        </div>
        """
    except Exception as e:
        logging.error(f"Error creating token HTML: {e}", exc_info=True)
        return f"<div class='token-container'>Error creating tokens: {str(e)}</div>"


def create_bonus_html(leadin: str, parts: list[dict]) -> str:
    # Create HTML for leadin and parts with answers
    leadin_html = f"<div class='bonus-leadin'>{leadin}</div>"
    parts_html = []

    for i, part in enumerate(parts):
        question_text = part["part"]
        answer_html = _make_answer_html(part["answer_primary"], part["clean_answers"])

        "<div class='bonus-part-number'>Part {i + 1}</div>"
        part_html = f"""
                <div class='bonus-part'>
                    <div class='bonus-part-text'><b>#{i + 1}.</b> {question_text}</div>
                    {answer_html}
                </div>
            """
        parts_html.append(part_html)

    html_content = f"""
            <div class='bonus-container'>
                <div class='bonus-card'>
                    {leadin_html}
                    {"".join(parts_html)}
                </div>
            </div>
        """

    # Format clean answers for the answer display
    clean_answers = []
    for i, part in enumerate(parts):
        part_answers = [a for a in part["clean_answers"] if len(a.split()) <= 6]
        clean_answers.append(f"{i + 1}. {', '.join(part_answers)}")

    return html_content


def create_line_plot(eval_points: list[tuple[int, dict]], highlighted_index: int = -1) -> pd.DataFrame:
    """Create a Gradio LinePlot of token values with optional highlighting using DataFrame."""
    try:
        # Create base confidence data
        data = []

        # Add buzz points to the plot
        for i, (v, b) in eval_points:
            color = "#ff4444" if b == 0 else "#228b22"
            data.append(
                {
                    "position": i,
                    "value": v,
                    "type": "buzz",
                    "highlight": True,
                    "color": color,
                }
            )

        if highlighted_index >= 0:
            # Add vertical line for the highlighted token
            data.extend(
                [
                    {
                        "position": highlighted_index,
                        "value": 0,
                        "type": "hover-line",
                        "color": "#000000",
                        "highlight": True,
                    },
                    {
                        "position": highlighted_index,
                        "value": 1,
                        "type": "hover-line",
                        "color": "#000000",
                        "highlight": True,
                    },
                ]
            )

        return pd.DataFrame(data)
    except Exception as e:
        logging.error(f"Error creating line plot: {e}", exc_info=True)
        # Return an empty DataFrame with the expected columns
        return pd.DataFrame(columns=["position", "value", "type", "highlight", "color"])


def create_tossup_confidence_pyplot(
    tokens: list[str], eval_points: list[tuple[int, dict]], highlighted_index: int = -1
) -> plt.Figure:
    """Create a pyplot of token values with optional highlighting."""
    plt.style.use("ggplot")  # Set theme to grid paper
    fig = plt.figure(figsize=(11, 5))  # Set figure size to 11x5
    ax = fig.add_subplot(111)
    x = [0]
    y = [0]
    for i, v in eval_points:
        x.append(i + 1)
        y.append(v["confidence"])

    ax.plot(x, y, "o--", color="#4698cf")
    for i, v in eval_points:
        if not v["buzz"]:
            continue
        confidence = v["confidence"]
        color = "green" if v["score"] else "red"
        ax.plot(i + 1, confidence, "o", color=color)
        if i >= len(tokens):
            print(f"Token index {i} is out of bounds for n_tokens: {len(tokens)}")
        ax.annotate(f"{tokens[i]}", (i + 1, confidence), textcoords="offset points", xytext=(0, 10), ha="center")

    if highlighted_index >= 0:
        # Add light vertical line for the highlighted token from 0 to 1
        ax.axvline(x=highlighted_index + 1, color="#ff9900", linestyle="--", ymin=0, ymax=1)

    ax.set_title("Buzz Confidence")
    ax.set_xlabel("Token Index")
    ax.set_ylabel("Confidence")
    ax.set_xticks(x)
    ax.set_xticklabels(x)
    return fig


def create_scatter_pyplot(token_positions: list[int], scores: list[int]) -> plt.Figure:
    """Create a scatter plot of token positions and scores."""
    plt.style.use("ggplot")
    fig = plt.figure(figsize=(11, 5))
    ax = fig.add_subplot(111)

    counts = Counter(zip(token_positions, scores))
    X = []
    Y = []
    S = []
    for (pos, score), size in counts.items():
        X.append(pos)
        Y.append(score)
        S.append(size * 20)

    ax.scatter(X, Y, color="#4698cf", s=S)

    return fig


def create_bonus_confidence_plot(parts: list[dict], model_outputs: list[dict]) -> plt.Figure:
    """Create confidence plot for bonus parts."""
    plt.style.use("ggplot")
    fig = plt.figure(figsize=(10, 6))
    ax = fig.add_subplot(111)

    # Plot confidence for each part
    x = range(1, len(parts) + 1)
    confidences = [output["confidence"] for output in model_outputs]
    scores = [output["score"] for output in model_outputs]

    # Plot confidence bars
    bars = ax.bar(x, confidences, color="#4698cf")

    # Color bars based on correctness
    for i, score in enumerate(scores):
        bars[i].set_color("green" if score == 1 else "red")

    ax.set_title("Part Confidence")
    ax.set_xlabel("Part Number")
    ax.set_ylabel("Confidence")
    ax.set_xticks(x)
    ax.set_xticklabels([f"Part {i}" for i in x])

    return fig


def update_tossup_plot(highlighted_index: int, state: str) -> pd.DataFrame:
    """Update the plot when a token is hovered; add a vertical line on the plot."""
    try:
        if not state or state == "{}":
            logging.warning("Empty state provided to update_plot")
            return pd.DataFrame()

        highlighted_index = int(highlighted_index) if highlighted_index else None
        logging.info(f"Update plot triggered with token index: {highlighted_index}")

        data = json.loads(state)
        tokens = data.get("tokens", [])
        values = data.get("values", [])

        if not tokens or not values:
            logging.warning("No tokens or values found in state")
            return pd.DataFrame()

        # Create updated plot with highlighting of the token point
        # plot_data = create_line_plot(values, highlighted_index)
        plot_data = create_tossup_confidence_pyplot(tokens, values, highlighted_index)
        return plot_data
    except Exception as e:
        logging.error(f"Error updating plot: {e}")
        return pd.DataFrame()