import os
import pandas as pd
import torch
import gradio as gr
from scipy.stats import percentileofscore
from huggingface_hub import HfApi
from transformers import AutoModelForSequenceClassification, AutoTokenizer
API        = HfApi()
REPO_ID    = "Tman212/question_complexity_scoring"  # change to your space
HISTORY_FN = "files/score_history.csv"              # track it in 'files/'
# ─── 1) Load model & tokenizer ───────────────────────────────────────────────
MODEL_DIR = "./model"
tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_DIR, local_files_only=True
)
model.eval()
def reveal_history(key: str):
    """Return the history CSV path only if the entered key matches."""
    if key and key == os.environ.get("ADMIN_PASSWORD"):
        return "files/question_history.csv"
    else:
        # returning None makes Gradio show nothing / keep disabled
        return None
# Path for persistent history
HISTORY_FILE = "score_history.csv"
def persist_history():
    """Stage & commit the local CSV back into the Space repo."""
    API.upload_file(
        path_or_fileobj=HISTORY_FN,
        path_in_repo=HISTORY_FN,
        repo_id=REPO_ID,
        repo_type="space",
        token=os.environ["HF_TOKEN"],
        commit_message="🔄 update scoreboard history"
    )
# ─── Coaching helper ───────────────────────────────────────────────────────────
TIP_MAP = {
    1: "❓ To move to Understand, add “Why do you think…?”",
    2: "🔧 For Apply, ask “How would you use X in Y?”",
    3: "🔍 To reach Analyze, prompt “What are the components of…?”",
    4: "⚖️ For Evaluate, ask “What criteria would you use to judge…?”",
    5: "💡 For Create, try “Can you design or propose…?”",
    6: "🎉 You’re at Create: combine ideas for novelty!"
}
def get_coaching(rounded_scores):
    """
    Returns a single coaching tip for the *lowest* complexity question
    in the current session, so you get targeted advice on what to bump up.
    """
    if not rounded_scores:
        return ""
    # 1) Find the minimum level in this session
    min_level = min(rounded_scores)
    # 2) Look up the tip
    tip       = TIP_MAP.get(min_level, "")
    # 3) Build a little message
    html = (
        "<div class='coaching-tips'>"
        "<strong>💡 Coaching Tip:</strong> "
        f"Your lowest‐scoring question is at level {min_level}. "
        f"{tip}"
        "</div>"
    )
    return html

# ─── 2) Scoring logic with continuous Bloom & Scoreboard ────────────────────
def score_questions(uploaded_file, text, ignore_validation=False):
    # 1) gather
    if uploaded_file:
        df_in    = pd.read_excel(uploaded_file)
        questions = df_in["response"].astype(str).tolist()
    else:
        questions = [q.strip() for q in text.splitlines() if q.strip()]

    if not ignore_validation:
        valid_qs, invalid_qs = [], []
        for q in questions:
            if len(q.split()) >= 2 and q.endswith(("?", "？")):
                valid_qs.append(q)
            else:
                invalid_qs.append(q)
        if invalid_qs:
            warning_html = (
                "<div style='color:red; padding:1rem; border:1px solid #F00;'>"
                "<strong>The entries below weren’t scored because they didn’t look like full questions "
                "(≥2 words + trailing '?'):</strong><br>"
                + "".join(f"– {iq}<br>" for iq in invalid_qs)
                + "</div>"
            )
            # return early (table, slider, bloom, stats, coaching)
            return None, 1, "", warning_html, ""

        questions = valid_qs
    else:
        # skip validation, proceed with all questions
        questions = [q for q in questions if q.strip()]

    # 3) predict & score
    inputs    = tokenizer(questions, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        raw_logits = model(**inputs).logits.mean(dim=1).tolist()
    cont_scores    = [((r + 3) / 6) * 5 + 1 for r in raw_logits]
    rounded_scores = [int(round(c)) for c in cont_scores]

    # 4) build display table
    df_out = pd.DataFrame({
        "Question": questions,
        "Complexity Score": rounded_scores
    })

    # 5) append to full‐history CSV
    os.makedirs("files", exist_ok=True)
    hist_path = "files/question_history.csv"
    new_hist = pd.DataFrame({
      "Question": questions,
      "Rounded Score": rounded_scores,
      "Continuous Score": cont_scores
    })
    if os.path.exists(hist_path):
        old = pd.read_csv(hist_path)
        combined = pd.concat([old, new_hist], ignore_index=True)
    else:
        combined = new_hist
    combined.to_csv(hist_path, index=False)

    # 6) scoreboard stats (same as you have) …
    all_hist    = combined["Continuous Score"].tolist()
    overall_avg = sum(all_hist) / len(all_hist)
    session_avg = sum(cont_scores) / len(cont_scores)
    pct         = percentileofscore(all_hist, session_avg, kind="mean")
    record_max  = max(all_hist)
    stats_html  = f"""<div style='…'>… your stats here …</div>"""

    # 9) Build Bloom HTML using the rounded session average
    avg_level = max(1, min(6, int(round(session_avg))))
    bloom_map = {
        1: ("Remember",  "Recall facts and basic concepts."),
        2: ("Understand","Explain ideas or concepts."),
        3: ("Apply",     "Use information in new situations."),
        4: ("Analyze",   "Draw connections among ideas."),
        5: ("Evaluate",  "Justify a stance or decision."),
        6: ("Create",    "Produce new or original work.")
    }
    name, desc = bloom_map[avg_level]
    bloom_html = (
        f"<div class='avg-container'>Way to go! Your average rounded complexity score is "
        f"<span class='avg-score'>{avg_level}</span></div>"
        "<div class='avg-container'>This score correlates with the Bloom Category:</div>"
        f"<div class='avg-container'><strong>{name}</strong>: {desc}</div><hr/>"
        "<ul class='bloom-list'>"
        + "".join(f"<li><strong>{i} – {lvl}</strong>: {d}</li>"
                  for i,(lvl,d) in bloom_map.items())
        + "</ul>"
    )

    # 10) Coaching HTML
    coaching_html = get_coaching(rounded_scores)

    # Return 6 outputs
    return df_out, avg_level, bloom_html, stats_html, coaching_html, hist_path

# ─── 3) Custom CSS ─────────────────────────────────────────────────────────────
custom_css = """
body, .gradio-container {
    background-color: #FFFFFF !important;
    font-family: Arial, sans-serif;
    color: #333333 !important;
}
/* Banner */
#banner_img { width:100% !important; max-height:130px!important; object-fit:contain!important; margin-bottom:1rem!important; }
#banner_img .gr-image-tools { display:none!important; }
/* Panels */
.gradio-container .input-area,
.gradio-container .output-area { background-color:#F9F9F9!important; border:1px solid #E0E0E0!important; border-radius:6px!important; padding:1rem!important; }
/* Labels */
.gradio-container label { font-weight:bold!important; }
/* Inputs */
.gradio-container textarea,
.gradio-container input[type="file"] { background-color:#FFFFFF!important; border:1px solid #CCCCCC!important; border-radius:4px!important; padding:0.5rem!important; width:100%!important; box-sizing:border-box; }
/* Button */
.gradio-container .gr-button { background-color:#A0A0A0!important; color:#FFFFFF!important; border:none!important; border-radius:4px!important; padding:0.75rem 1.5rem!important; margin-top:1rem!important; }
.gradio-container .gr-button:hover { background-color:#808080!important; }
/* Slider */
.avg-slider .gr-slider { margin-top:1rem!important; }
/* Avg text */
.avg-container { font-family:"Segoe UI",sans-serif!important; font-weight:bold!important; font-size:1.25rem!important; text-align:center!important; margin:0.5rem 0!important; }
/* Bloom list */
.bloom-list { list-style:none!important; padding-left:0!important; margin-top:0.5rem!important; }
.bloom-list li { margin:0.25rem 0!important; }
/* Citation */
.cite-title { font-size:2rem!important; font-weight:bold!important; text-align:center!important; margin-top:2rem!important; }
.cite-text  { font-size:1rem!important; color:#555555!important; text-align:center!important; max-width:800px!important; margin:0.5rem auto 2rem auto!important; }
"""

# ─── 4) Blocks layout ───────────────────────────────────────────────────────────
with gr.Blocks(css=custom_css) as demo:
    # ── Theme toggles (optional) ────────────────────────────────────────
    gr.HTML(
        """
        <div id="theme_toggle" style="text-align:right; margin-bottom:1rem;">
          <a href="?__theme=dark" style="margin-right:1rem; text-decoration:none;">🌙 Dark Mode</a>
          <a href="?__theme=light"            style="text-decoration:none;">☀️ Light Mode</a>
        </div>
        """
    )

    # ── HEADER: logo + title/blurb ─────────────────────────────────
    with gr.Row():
        with gr.Column(scale=1, min_width=150):
            gr.Image("banner.png", show_label=False, interactive=False, elem_id="banner_img")
        with gr.Column(scale=3):
            gr.Markdown("## PrediQT – Predicting question complexity")
            gr.Markdown(
                "The complexity scores generated using this model are based on a Large "
                "Language Model trained on thousands of human responses. The model is "
                "strongly correlated with human ratings of question complexity.\n\n"
                "PrediQT’s scoring is grounded in the hierarchical Bloom taxonomy framework, "
                "which classifies cognitive tasks from basic recall through creative synthesis. "
                "By aligning the LLM’s continuous outputs to Bloom’s six levels—Remember, Understand, "
                "Apply, Analyze, Evaluate, and Create—the app ensures that each complexity score "
                "reflects well-established educational standards."
            )

    # ── INSTRUCTIONS ─────────────────────────────────────────────────────
    gr.Markdown(
        "Either upload an Excel file **or** paste questions below.\n\n"
        "- **Upload** → returns a table with a new **Complexity Score** column.\n"
        "- **Paste**  → returns one score per line."
    )
    # ── DISCLAIMER ──────────────────────────────────────────────────────
    gr.Markdown(
        "**Disclaimer:** All questions asked by users are collected **anonymously** and used only "
        "for scientific purposes. By uploading and scoring questions, you consent to your data being "
        "used for research purposes."
    )
    # ── MAIN UI ─────────────────────────────────────────────────────────
    with gr.Row():
        with gr.Column():
            excel     = gr.File(
                label="Upload an Excel (.xlsx) with a “response” column",
                file_types=[".xlsx"],
                type="filepath"
            )
            text_in   = gr.Textbox(label="Or paste one question per line…", lines=6)
            ignore_validation = gr.Checkbox(label="Ignore validation (allow any text)", value=False)
            score_btn = gr.Button("Score my data")

        with gr.Column():
            df_out        = gr.Dataframe(label="Questions & Complexity Scores", interactive=False)
            avg_slider    = gr.Slider(label="Average Complexity Score",
                                      minimum=1, maximum=6, step=1,
                                      interactive=False, elem_classes="avg-slider")
            bloom_html    = gr.HTML()
            stats_box     = gr.HTML()
            coaching_box  = gr.HTML()

    # Hook up the scoring button (must return exactly 5 outputs)
    score_btn.click(
        fn=score_questions,
        inputs=[excel, text_in, ignore_validation],
        outputs=[df_out, avg_slider, bloom_html, stats_box, coaching_box]
    )

    # ── ADMIN DOWNLOAD SECTION ────────────────────────────────────────
    gr.Markdown("**Admin only**: download full question history", 
                elem_id="admin_header", 
                visible=True)
    admin_key      = gr.Textbox(
        label="Admin key (password)", 
        type="password", 
        placeholder="Enter admin password…"
    )
    download_hist  = gr.File(
        label="Download full question history",
        interactive=False,
        type="filepath"
    )
    reveal_btn     = gr.Button("Reveal CSV")

    # Hook up the button to reveal_history()
    reveal_btn.click(
        fn=reveal_history,
        inputs=[admin_key],
        outputs=[download_hist]
    )

    # CITE
    gr.Markdown("Cite", elem_classes="cite-title")
    gr.Markdown(
        "Raz, T., Luchini, S., Beaty, R., & Kenett, Y. N. (2024). "
        "Automated Scoring of Open-Ended Question Complexity: A Large Language Model Approach. "
        "Research Square https://doi.org/10.21203/rs.3.rs-3890828/v1",
        elem_classes="cite-text"
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0")