import os import pandas as pd import torch import gradio as gr from scipy.stats import percentileofscore from huggingface_hub import HfApi from transformers import AutoModelForSequenceClassification, AutoTokenizer API = HfApi() REPO_ID = "Tman212/question_complexity_scoring" # change to your space HISTORY_FN = "files/score_history.csv" # track it in 'files/' # ─── 1) Load model & tokenizer ─────────────────────────────────────────────── MODEL_DIR = "./model" tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, local_files_only=True) model = AutoModelForSequenceClassification.from_pretrained( MODEL_DIR, local_files_only=True ) model.eval() def reveal_history(key: str): """Return the history CSV path only if the entered key matches.""" if key and key == os.environ.get("ADMIN_PASSWORD"): return "files/question_history.csv" else: # returning None makes Gradio show nothing / keep disabled return None # Path for persistent history HISTORY_FILE = "score_history.csv" def persist_history(): """Stage & commit the local CSV back into the Space repo.""" API.upload_file( path_or_fileobj=HISTORY_FN, path_in_repo=HISTORY_FN, repo_id=REPO_ID, repo_type="space", token=os.environ["HF_TOKEN"], commit_message="🔄 update scoreboard history" ) # ─── Coaching helper ─────────────────────────────────────────────────────────── TIP_MAP = { 1: "❓ To move to Understand, add “Why do you think…?”", 2: "🔧 For Apply, ask “How would you use X in Y?”", 3: "🔍 To reach Analyze, prompt “What are the components of…?”", 4: "⚖️ For Evaluate, ask “What criteria would you use to judge…?”", 5: "💡 For Create, try “Can you design or propose…?”", 6: "🎉 You’re at Create: combine ideas for novelty!" } def get_coaching(rounded_scores): """ Returns a single coaching tip for the *lowest* complexity question in the current session, so you get targeted advice on what to bump up. """ if not rounded_scores: return "" # 1) Find the minimum level in this session min_level = min(rounded_scores) # 2) Look up the tip tip = TIP_MAP.get(min_level, "") # 3) Build a little message html = ( "
" "💡 Coaching Tip: " f"Your lowest‐scoring question is at level {min_level}. " f"{tip}" "
" ) return html # ─── 2) Scoring logic with continuous Bloom & Scoreboard ──────────────────── def score_questions(uploaded_file, text, ignore_validation=False): # 1) gather if uploaded_file: df_in = pd.read_excel(uploaded_file) questions = df_in["response"].astype(str).tolist() else: questions = [q.strip() for q in text.splitlines() if q.strip()] if not ignore_validation: valid_qs, invalid_qs = [], [] for q in questions: if len(q.split()) >= 2 and q.endswith(("?", "?")): valid_qs.append(q) else: invalid_qs.append(q) if invalid_qs: warning_html = ( "
" "The entries below weren’t scored because they didn’t look like full questions " "(≥2 words + trailing '?'):
" + "".join(f"– {iq}
" for iq in invalid_qs) + "
" ) # return early (table, slider, bloom, stats, coaching) return None, 1, "", warning_html, "" questions = valid_qs else: # skip validation, proceed with all questions questions = [q for q in questions if q.strip()] # 3) predict & score inputs = tokenizer(questions, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): raw_logits = model(**inputs).logits.mean(dim=1).tolist() cont_scores = [((r + 3) / 6) * 5 + 1 for r in raw_logits] rounded_scores = [int(round(c)) for c in cont_scores] # 4) build display table df_out = pd.DataFrame({ "Question": questions, "Complexity Score": rounded_scores }) # 5) append to full‐history CSV os.makedirs("files", exist_ok=True) hist_path = "files/question_history.csv" new_hist = pd.DataFrame({ "Question": questions, "Rounded Score": rounded_scores, "Continuous Score": cont_scores }) if os.path.exists(hist_path): old = pd.read_csv(hist_path) combined = pd.concat([old, new_hist], ignore_index=True) else: combined = new_hist combined.to_csv(hist_path, index=False) # 6) scoreboard stats (same as you have) … all_hist = combined["Continuous Score"].tolist() overall_avg = sum(all_hist) / len(all_hist) session_avg = sum(cont_scores) / len(cont_scores) pct = percentileofscore(all_hist, session_avg, kind="mean") record_max = max(all_hist) stats_html = f"""
… your stats here …
""" # 9) Build Bloom HTML using the rounded session average avg_level = max(1, min(6, int(round(session_avg)))) bloom_map = { 1: ("Remember", "Recall facts and basic concepts."), 2: ("Understand","Explain ideas or concepts."), 3: ("Apply", "Use information in new situations."), 4: ("Analyze", "Draw connections among ideas."), 5: ("Evaluate", "Justify a stance or decision."), 6: ("Create", "Produce new or original work.") } name, desc = bloom_map[avg_level] bloom_html = ( f"
Way to go! Your average rounded complexity score is " f"{avg_level}
" "
This score correlates with the Bloom Category:
" f"
{name}: {desc}

" "" ) # 10) Coaching HTML coaching_html = get_coaching(rounded_scores) # Return 6 outputs return df_out, avg_level, bloom_html, stats_html, coaching_html, hist_path # ─── 3) Custom CSS ───────────────────────────────────────────────────────────── custom_css = """ body, .gradio-container { background-color: #FFFFFF !important; font-family: Arial, sans-serif; color: #333333 !important; } /* Banner */ #banner_img { width:100% !important; max-height:130px!important; object-fit:contain!important; margin-bottom:1rem!important; } #banner_img .gr-image-tools { display:none!important; } /* Panels */ .gradio-container .input-area, .gradio-container .output-area { background-color:#F9F9F9!important; border:1px solid #E0E0E0!important; border-radius:6px!important; padding:1rem!important; } /* Labels */ .gradio-container label { font-weight:bold!important; } /* Inputs */ .gradio-container textarea, .gradio-container input[type="file"] { background-color:#FFFFFF!important; border:1px solid #CCCCCC!important; border-radius:4px!important; padding:0.5rem!important; width:100%!important; box-sizing:border-box; } /* Button */ .gradio-container .gr-button { background-color:#A0A0A0!important; color:#FFFFFF!important; border:none!important; border-radius:4px!important; padding:0.75rem 1.5rem!important; margin-top:1rem!important; } .gradio-container .gr-button:hover { background-color:#808080!important; } /* Slider */ .avg-slider .gr-slider { margin-top:1rem!important; } /* Avg text */ .avg-container { font-family:"Segoe UI",sans-serif!important; font-weight:bold!important; font-size:1.25rem!important; text-align:center!important; margin:0.5rem 0!important; } /* Bloom list */ .bloom-list { list-style:none!important; padding-left:0!important; margin-top:0.5rem!important; } .bloom-list li { margin:0.25rem 0!important; } /* Citation */ .cite-title { font-size:2rem!important; font-weight:bold!important; text-align:center!important; margin-top:2rem!important; } .cite-text { font-size:1rem!important; color:#555555!important; text-align:center!important; max-width:800px!important; margin:0.5rem auto 2rem auto!important; } """ # ─── 4) Blocks layout ─────────────────────────────────────────────────────────── with gr.Blocks(css=custom_css) as demo: # ── Theme toggles (optional) ──────────────────────────────────────── gr.HTML( """
🌙 Dark Mode ☀️ Light Mode
""" ) # ── HEADER: logo + title/blurb ───────────────────────────────── with gr.Row(): with gr.Column(scale=1, min_width=150): gr.Image("banner.png", show_label=False, interactive=False, elem_id="banner_img") with gr.Column(scale=3): gr.Markdown("## PrediQT – Predicting question complexity") gr.Markdown( "The complexity scores generated using this model are based on a Large " "Language Model trained on thousands of human responses. The model is " "strongly correlated with human ratings of question complexity.\n\n" "PrediQT’s scoring is grounded in the hierarchical Bloom taxonomy framework, " "which classifies cognitive tasks from basic recall through creative synthesis. " "By aligning the LLM’s continuous outputs to Bloom’s six levels—Remember, Understand, " "Apply, Analyze, Evaluate, and Create—the app ensures that each complexity score " "reflects well-established educational standards." ) # ── INSTRUCTIONS ───────────────────────────────────────────────────── gr.Markdown( "Either upload an Excel file **or** paste questions below.\n\n" "- **Upload** → returns a table with a new **Complexity Score** column.\n" "- **Paste** → returns one score per line." ) # ── DISCLAIMER ────────────────────────────────────────────────────── gr.Markdown( "**Disclaimer:** All questions asked by users are collected **anonymously** and used only " "for scientific purposes. By uploading and scoring questions, you consent to your data being " "used for research purposes." ) # ── MAIN UI ───────────────────────────────────────────────────────── with gr.Row(): with gr.Column(): excel = gr.File( label="Upload an Excel (.xlsx) with a “response” column", file_types=[".xlsx"], type="filepath" ) text_in = gr.Textbox(label="Or paste one question per line…", lines=6) ignore_validation = gr.Checkbox(label="Ignore validation (allow any text)", value=False) score_btn = gr.Button("Score my data") with gr.Column(): df_out = gr.Dataframe(label="Questions & Complexity Scores", interactive=False) avg_slider = gr.Slider(label="Average Complexity Score", minimum=1, maximum=6, step=1, interactive=False, elem_classes="avg-slider") bloom_html = gr.HTML() stats_box = gr.HTML() coaching_box = gr.HTML() # Hook up the scoring button (must return exactly 5 outputs) score_btn.click( fn=score_questions, inputs=[excel, text_in, ignore_validation], outputs=[df_out, avg_slider, bloom_html, stats_box, coaching_box] ) # ── ADMIN DOWNLOAD SECTION ──────────────────────────────────────── gr.Markdown("**Admin only**: download full question history", elem_id="admin_header", visible=True) admin_key = gr.Textbox( label="Admin key (password)", type="password", placeholder="Enter admin password…" ) download_hist = gr.File( label="Download full question history", interactive=False, type="filepath" ) reveal_btn = gr.Button("Reveal CSV") # Hook up the button to reveal_history() reveal_btn.click( fn=reveal_history, inputs=[admin_key], outputs=[download_hist] ) # CITE gr.Markdown("Cite", elem_classes="cite-title") gr.Markdown( "Raz, T., Luchini, S., Beaty, R., & Kenett, Y. N. (2024). " "Automated Scoring of Open-Ended Question Complexity: A Large Language Model Approach. " "Research Square https://doi.org/10.21203/rs.3.rs-3890828/v1", elem_classes="cite-text" ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0")