import gradio as gr, numpy as np, pandas as pd, json, joblib, librosa, torch, os from pathlib import Path from transformers import AutoProcessor, AutoModelForCTC import numpy as np import plotly.express as px # --------- CONSTANTS / CANON / INVENTORIES ---------- CANON = { "t͡ʃ":"tʃ","d͡ʒ":"dʒ","ʧ":"tʃ","ʤ":"dʒ", "r":"ɹ","g":"ɡ","ɫ":"l" } CONSONANTS = { "p","b","t","d","k","ɡ","ʔ","ɾ", "f","v","θ","ð","s","z","ʃ","ʒ","h", "m","n","ŋ","l","ɫ","ɹ","r","j","w","tʃ","dʒ","ʧ","ʤ" } VOWELS = {"i","ɪ","e","eɪ","ɛ","æ","a","ɑ","ɒ","ʌ","ə","ɚ","ɝ","o","oʊ","ɔ","u","ʊ","aɪ","aʊ","ɔɪ","ɜː","əʊ"} PAL_FRIC = {"ʃ","ʒ"}; PAL_AFFR = {"tʃ","dʒ"} ALV_FRIC = {"s","z"}; ALV_STOP = {"t","d"} DROP = {"|","","","","","sil","spn","nsn"," "} def is_c(p): return p in CONSONANTS # --- Session helpers (NEW) --- PROCS = ["fronting","gliding","depalatalisation","finalc_del"] # Use a separate dictionary for plot labels to handle wrapping DISP_PLOT = { "fronting": "Fronting", "gliding": "Gliding", "depalatalisation": "Depalatalisation", "finalc_del": "Final Consonant
Deletion", } DISP = { "fronting": "Fronting", "gliding": "Gliding", "depalatalisation": "Depalatalisation", "finalc_del": "Final Consonant Deletion", "none": "None" } def approx_error_timestamp(pairs, sr, n_samples): """Very rough: map first mismatch index to time by proportional position.""" mism_idx = next((i for i,(e,p) in enumerate(pairs) if not(e and p and e==p)), None) if mism_idx is None: return 0.0 return float(mism_idx / max(len(pairs),1)) * (n_samples / float(sr)) def run_one(word, audio): """ Wrap infer_one(audio, word) but also return pairs and timestamp. Accepts gradio audio (sr, np.array) or filepath. """ out = infer_one(audio, word) exp_seq = out["expected"].split() prd_seq = out["produced"].split() pairs = align(exp_seq, prd_seq) # get raw audio length for timestamp estimation if isinstance(audio, tuple): sr, y = audio n = len(y) else: import soundfile as sf y, sr = sf.read(str(audio)) n = len(y) t = approx_error_timestamp(pairs, sr, n) process = out.get("final_pred","none") return { **out, "pairs": pairs, "timestamp": round(t, 2), "process": process, } # --------- LOAD ARTIFACTS ---------- ROOT = Path(__file__).parent BEST = joblib.load(ROOT/"best_heads.joblib") ENC = joblib.load(ROOT/"onehot_enc.joblib") KEPT = json.load(open(ROOT/"kept_features.json")) TAU = json.load(open(ROOT/"thresholds.json")) EXP = json.load(open(ROOT/"expected_phones_IPA_norm.json")) procs = ["fronting","gliding","depalatalisation","finalc_del"] # --------- ASR (phoneme CTC) ---------- MODEL_ID = "vitouphy/wav2vec2-xls-r-300m-timit-phoneme" device = "cuda" if torch.cuda.is_available() else "cpu" processor = AutoProcessor.from_pretrained(MODEL_ID) model = AutoModelForCTC.from_pretrained(MODEL_ID) # IMPORTANT: only quantize on CPU; do NOT move a quantized model to CUDA. if device == "cuda": model = model.to(device).eval() else: model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8).eval() # Optional warmup (kept lightweight and device-safe) with torch.inference_mode(): x = processor(np.zeros(16000, dtype=np.float32), sampling_rate=16000, return_tensors="pt") if device == "cuda": x = x.to(device) _ = model(**x).logits def ctc_collapse(ids, blank_id): out, prev = [], None for i in ids: if i == blank_id or i == prev: prev = i continue out.append(i) prev = i return out PH_MAP = { "b":"b","d":"d","f":"f","g":"ɡ","hh":"h","hv":"h","jh":"dʒ","ch":"tʃ","k":"k","l":"l", "m":"m","n":"n","ng":"ŋ","p":"p","r":"ɹ","s":"s","sh":"ʃ","t":"t","th":"θ","dh":"ð", "v":"v","w":"w","y":"j","z":"z","zh":"ʒ","dx":"ɾ","q":"ʔ" } def recognize_phones(audio_path: Path): wav, sr = librosa.load(str(audio_path), sr=16000, mono=True) feats = processor(wav, sampling_rate=16000, return_tensors="pt") if device == "cuda": feats = feats.to(device) with torch.no_grad(): logits = model(**feats).logits pred_ids = logits.argmax(-1)[0].detach().cpu().tolist() blank_id = getattr(processor.tokenizer, "pad_token_id", None) if blank_id is None: blank_id = getattr(model.config, "pad_token_id", 0) ids = ctc_collapse(pred_ids, blank_id) toks = processor.tokenizer.convert_ids_to_tokens(ids) toks = [t for t in toks if t not in DROP] ipa = [PH_MAP.get(t, t) for t in toks] ipa = [p.replace("͡","") for p in ipa] ipa = [CANON.get(p, p) for p in ipa] seq=[] for t in ipa: if not t.strip(): continue if not seq or t != seq[-1]: seq.append(t) return seq # --------- CLEANERS ---------- def squash_epenthesis(exp_seq, prod_seq): if len(prod_seq)>=2 and prod_seq[-2]=="n" and prod_seq[-1] in ("t","d"): if exp_seq and exp_seq[-1] in ("k","ɡ"): return prod_seq[:-2] + [prod_seq[-1]] return prod_seq def collapse_vocalized_l(exp_seq, prod_seq): if exp_seq and exp_seq[-1] in {"l","ɫ"} and len(prod_seq)>=2 and prod_seq[-1] in {"l","ɫ"} and prod_seq[-2] in VOWELS: i = len(prod_seq)-2 while i>=0 and prod_seq[i] in VOWELS: i -= 1 return prod_seq[:i+1] + [prod_seq[-1]] return prod_seq # --------- ALIGNMENT ---------- def align(a,b): m,n=len(a),len(b) dp=[[(0,None)]*(n+1) for _ in range(m+1)] for i in range(1,m+1): dp[i][0]=(i,('del',i-1,None)) for j in range(1,n+1): dp[0][j]=(j,('ins',None,j-1)) for i in range(1,m+1): for j in range(1,n+1): cost=0 if a[i-1]==b[j-1] else 1 cands=[ (dp[i-1][j][0]+1,('del',i-1,None)), (dp[i][j-1][0]+1,('ins',None,j-1)), (dp[i-1][j-1][0]+cost,('sub' if cost else 'match',i-1,j-1)) ] dp[i][j]=min(cands,key=lambda x:x[0]) pairs=[]; i=m; j=n while i>0 or j>0: _,(op,ei,pj)=dp[i][j] if op in ('match','sub'): pairs.append((a[ei],b[pj])); i-=1; j-=1 elif op=='del': pairs.append((a[ei],None)); i-=1 else: pairs.append((None,b[pj])); j-=1 return list(reversed(pairs)) # --------- RULES ---------- def detect_fronting(pairs): return any(e in {"k","ɡ"} and p in {"t","d"} for e,p in pairs if e and p) def detect_gliding(pairs): return any(e in {"ɹ","l"} and p=="w" for e,p in pairs if e and p) def detect_depalatalisation(exp_seq, prod_seq, pairs): if any(e in PAL_FRIC and p in ALV_FRIC for e,p in pairs if e and p): return True if any(e in PAL_AFFR and p in (ALV_STOP|ALV_FRIC) for e,p in pairs if e and p): return True if ("tʃ" in exp_seq and {"t","s"}.issubset(set(prod_seq))) or \ ("dʒ" in exp_seq and {"d","z"}.issubset(set(prod_seq))): return True return False def detect_finalc_del(exp_seq, prod_seq): return bool(exp_seq) and is_c(exp_seq[-1]) and (len(prod_seq)==0 or not is_c(prod_seq[-1])) # --------- FEATURES ---------- def features_from_seqs(exp_seq, prd_seq): pairs = align(exp_seq, prd_seq) # onset length k=0 for p in exp_seq: if is_c(p): k+=1 else: break onset_len = k coda_len_exp = sum(1 for p in reversed(exp_seq) if is_c(p)) coda_len_prd = sum(1 for p in reversed(prd_seq) if is_c(p)) # edits subs = sum(1 for e,p in pairs if e and p and e!=p) dels = sum(1 for e,p in pairs if e and (p is None)) ins = sum(1 for e,p in pairs if (e is None) and p) csubs = sum(1 for e,p in pairs if e and p and is_c(e) and is_c(p) and e!=p) cdels = sum(1 for e,p in pairs if e and is_c(e) and p is None) cins = sum(1 for e,p in pairs if (e is None) and p and is_c(p)) # patterns velar_to_alv = int(any(e in {"k","ɡ"} and p in {"t","d"} for e,p in pairs if e and p)) liquid_to_w = int(any(e in {"ɹ","l"} and p=="w" for e,p in pairs if e and p)) pal_to_alv = int(any(((e in (PAL_FRIC|PAL_AFFR)) and (p in (ALV_FRIC|ALV_STOP))) for e,p in pairs if e and p)) # last consonants exp_last_c = exp_seq[-1] if (exp_seq and is_c(exp_seq[-1])) else "NONE" prd_last_c = next((p for p in reversed(prd_seq) if is_c(p)), "NONE") final_match = float(exp_last_c == prd_last_c) base = pd.Series({ "onset_len": onset_len, "coda_len_exp": coda_len_exp, "coda_len_prd": coda_len_prd, "subs": subs, "dels": dels, "ins": ins, "csubs": csubs, "cdels": cdels, "cins": cins, "velar_to_alv": velar_to_alv, "liquid_to_w": liquid_to_w, "pal_to_alv": pal_to_alv, "final_match": final_match, }, dtype=float) enc_cols = list(getattr(ENC, "feature_names_in_", ["exp_last_c","prd_last_c"])) cat = ENC.transform(pd.DataFrame([[exp_last_c, prd_last_c]], columns=enc_cols)).ravel() base_names = list(base.index) enc_cat_names = list(ENC.get_feature_names_out(enc_cols)) X_cols_full = base_names + enc_cat_names def map_kept(name: str) -> str | None: if name in X_cols_full: return name if name.startswith("exp_last_"): cand = name.replace("exp_last_", f"{enc_cols[0]}_") if cand in X_cols_full: return cand if name.startswith("prd_last_"): cand = name.replace("prd_last_", f"{enc_cols[1]}_") if cand in X_cols_full: return cand return None mapped = [m for k in KEPT if (m := map_kept(k)) is not None] keep_idx = [X_cols_full.index(nm) for nm in mapped] f_full = np.hstack([base.values, cat]).astype(float) return f_full[keep_idx].reshape(1, -1), pairs # --------- INFERENCE --------- def infer_one(audio, word): # Save Gradio audio to a temp wav if isinstance(audio, tuple): # gr.Audio returns (sr, numpy) sr, y = audio path = ROOT/"_tmp.wav" import soundfile as sf sf.write(str(path), y, sr) audio_path = path else: audio_path = Path(audio) # expected phones word_l = word.strip().lower() exp_seq = [CANON.get(p,p) for p in EXP[word_l]] # produce phones prd_seq = recognize_phones(audio_path) prd_seq = squash_epenthesis(exp_seq, prd_seq) prd_seq = collapse_vocalized_l(exp_seq, prd_seq) # rules pairs = align(exp_seq, prd_seq) rule_flags = { "fronting": detect_fronting(pairs), "gliding": detect_gliding(pairs), "depalatalisation": detect_depalatalisation(exp_seq, prd_seq, pairs), "finalc_del": detect_finalc_del(exp_seq, prd_seq), } rule_pred = next((p for p in procs if rule_flags[p]), "none") # learned heads f_kept, pairs = features_from_seqs(exp_seq, prd_seq) probs = {p: float(BEST[p].predict_proba(f_kept)[0,1]) for p in procs} # hybrid if rule_pred != "none": final = rule_pred else: best, score = max(probs.items(), key=lambda kv: kv[1]) final = best if score >= TAU.get(best, 0.95) else "none" # PCC cc = sum(1 for e,p in pairs if e and p and is_c(e) and e==p) nn = sum(is_c(p) for p in exp_seq) pcc = (100.0*cc/nn) if nn else 0.0 return { "word": word_l, "expected": " ".join(exp_seq), "produced": " ".join(prd_seq), "rule_pred": rule_pred, "final_pred": final, "probs": probs, "PCC": f"{pcc:.1f}%" } # ---------- Pretty labels ---------- PROCS = ["fronting","gliding","depalatalisation","finalc_del"] DISP_PLOT = { "fronting": "Fronting", "gliding": "Gliding", "depalatalisation": "Depalatalisation", "finalc_del": "Final Consonant Deletion", } DISP = { "fronting":"Fronting", "gliding":"Gliding", "depalatalisation":"Depalatalisation", "finalc_del":"Final Consonant Deletion", "none":"None", } # ================= Definitions ================= PROCESS_DEFINITIONS = { "fronting": "Replacing sounds made in the back of the mouth (like 'k' and 'g') with sounds made in the front (like 't' and 'd'). Example: 'car' becomes 'tar'.", "gliding": "Replacing 'r' or 'l' sounds with 'w' or 'y'. Example: 'rabbit' becomes 'wabbit'.", "depalatalisation": "Replacing a 'sh', 'ch', or 'j' sound with a simpler sound made at the front of the mouth. Example: 'shoe' becomes 'sue'.", "finalc_del": "Leaving off the last consonant sound in a word. Example: 'cat' becomes 'ca'.", } # Create a modified version of PROCESS_DEFINITIONS for better tooltip wrapping PROCESS_DEFINITIONS_TOOLTIP = { "fronting": "Replacing sounds made in the back of the
mouth (like 'k' and 'g') with sounds made
in the front (like 't' and 'd').
Eg. 'car' becomes 'tar'.", "gliding": "Replacing 'r' or 'l' sounds with 'w' or 'y'.
Eg. 'rabbit' becomes 'wabbit'.", "depalatalisation": "Replacing a 'sh', 'ch', or 'j' sound with a
simpler sound made at the front of the mouth.
Eg. 'shoe' becomes 'sue'.", "finalc_del": "Leaving off the last consonant sound in a word.
Eg. 'cat' becomes 'ca'.", } # ================= THEME + CSS ================= THEME = gr.themes.Soft(primary_hue="emerald").set( # Set the primary color for buttons, focus rings, etc. # We use a neutral hue like 'gray' to avoid conflicts, but then # override the specific color variables. body_background_fill="#FFFFFF", button_primary_background_fill="#038c15", # Primary color for solid buttons button_primary_background_fill_hover="#055910", # A slightly darker hover color button_primary_text_color="white", button_secondary_background_fill="#DCD9D0", button_secondary_background_fill_hover="#C5C2B8", button_secondary_text_color="black", border_color_accent="#495A58", # Used for active/selected borders (e.g., checkbox) # The default primary_hue still sets many variables, so we override the key ones ) CSS = """ @import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@400;600;800&display=swap'); /* ---------- Base / container ---------- */ * { box-sizing: border-box; font-family: 'Montserrat', system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif; } html, body { width: 100%; overflow-x: hidden; } .gradio-container { max-width: 1100px !important; width: 100% !important; margin: 0 auto; padding: 0px 0px; } img, canvas, svg, video { max-width: 100%; height: auto; } /* ---------- Cards / layout ---------- */ .section { background:#fff; border:1px solid #E7E5DF; border-radius:18px; padding:28px; } .start-btn-wrapper { display: flex; justify-content: flex-end; width: 100%; padding:10px;} .start-btn button { max-width: 250px; min-width:150px; width: 200px; align-self: flex-end;} .center-col { max-width:560px; margin:0 auto; width:100%; } /*--------- New style for the header container --------- */ .header-container { display: flex; align-items: center; /* Vertically center the items */ justify-content: space-between; /* Push items to the ends */ margin-bottom: 10px; /* Add some space below the header line */ } /* Modify existing or add new style to control the title text */ .header-container h1 { margin: 0px 20px 0px 0px !important; /* Remove the default top/bottom margin from the title */ } /* Add this to the end of your existing CSS block */ .waveform-container { display: flex; justify-content: center; align-items: center; /* Align bars to the center */ height: 40px; /* Adjust height as needed */ margin: 0; gap: 8px; } /* ---------- Waveform bar ---------- */ .wave-bar { width: 8px; /* Width of each bar */ background: #055910; /* Primary color */ height: 5px; animation: pulse-wave 1.5s ease-in-out infinite both; border-radius: 4px; /* Rounded tops */ } /* Delay the start of each bar's animation for a staggered effect */ .wave-bar:nth-child(1) { animation-delay: -1.0s; } .wave-bar:nth-child(2) { animation-delay: -0.5s; } .wave-bar:nth-child(3) { animation-delay: 0s; } /* The keyframes for the pulsing animation */ @keyframes pulse-wave { 0% { height: 5px; } 50% { height: 50px; } /* Max height */ 100% { height: 5px; } } /* ---------- New Intro Screen Styles ---------- */ .steps-container { display: grid; grid-template-columns: 1fr; gap: 0.5rem; width: 100%; margin-bottom: 28px; } .step-card { padding: 1.5rem; border: 1px solid #E7E5DF; border-radius: 18px; background: #f9fafb; } .step-card h4 { margin-top: 0; margin-bottom: 0.5rem; color: #055910; font-size: 1rem; } .step-card p { margin: 0; color: #454545; font-size: 0.9rem; line-height: 1.5; } @media (min-width: 768px) { .steps-container { grid-template-columns: repeat(4, 1fr); } } /* ---------- Intro sub-step #5, For note about End Now and View Results and Process detection ---------- */ .intro-footer-text { text-align: center; color: #64748b; font-size: 12px; line-height: 1.6; padding: 0; } .intro-footer-text b { color: #3F4A49; } hr.soft-divider { border: none; border-top: 0px solid #E7E5DF; margin-top: 0px; margin-bottom: 0px; } /* ---------- Big word ---------- */ #bigword, #bigword * { font-size: clamp(50px, 10vw, 132px) !important; line-height: 1.04 !important; font-weight: 800 !important; text-align: center !important; color: #3F4A49 !important; margin: 8px 0 0 !important; overflow-wrap: anywhere; word-break: break-word; } /* ---------- Progress + fixed heights ---------- */ .progress-wrap { width:100%; height:12px; background:#DCD9D0; border-radius:9999px; overflow:hidden; margin-bottom:12px; margin-left:6px; } .progress-bar { height:100%; background:#055910; width:0%; transition: width .25s ease; } .progress-text-wrap { padding: 0 14px; width: 100%; margin: 0; } #assess_shell { min-height: 680px; display:flex; flex-direction:column; gap:16px; } #bigword_wrap { min-height: 100px; display:flex; align-items:flex-end; justify-content:center; } #audio_wrap { min-height: 100px; position: relative; display:flex; flex-direction:column; align-items:center; justify-content:center; text-align:center; color: #000000; } #actions_wrap { min-height: 100px; display:flex; flex-direction:column; align-items:center; gap:10px; } /* ---------- Primary action buttons ---------- */ .next-btn button { width: clamp(220px, 60vw, 320px); } .endlink button{ background: #3F4A49 !important; color: #fff !important; box-shadow: none !important; padding: 8px 16px !important; height: auto !important; font-size: 15px !important; border-radius: 6px !important; font-weight: 600 !important; border: 1px solid #fff !important; } .endlink { margin-top: 14px; } /* ---------- Metrics ---------- */ .metric { border:1px solid #E7E5DF; border-radius:14px; padding:16px 18px; background:#fff; } .metric.outcome-box { padding: 16px 18px;} .metric.error-box { padding: 16px 18px; margin-bottom:20px;} .metric .label { font-size:14px; color:#64748b; margin-bottom:6px; } .metric .value { font-size:40px; font-weight:800; color:#3F4A49; line-height:1.1; } .metric .sub { font-size:13px; color:#64748b; margin-top:4px; } .plot-wrap .container { padding: 8px !important; } /* FIX: CSS to hide the Plotly toolbar */ .js-plotly-plot .plotly .modebar { display: none !important; } #error-examples-title { margin: 24px 0 16px 0 !important; } .error-process-label p { font-size: 1.1rem !important; font-weight: 600 !important; color: #3F4A49 !important; margin-bottom: 12px !important; padding-bottom: 10px !important; border-bottom: 1px solid #E7E5DF; } .results-actions { display: flex; justify-content: flex-end; gap: 12px; margin-top: 24px; } .definitions-table { border-collapse: collapse; width: 100%; border: 1px solid #E7E5DF; font-size: 12px; } .definitions-table td { padding: 8px; vertical-align: top; border: 1px solid #E7E5DF; } .definitions-table td:first-child { padding-right: 24px; width: 200px; } /* =================================================================== */ /* RECORDER CONTROLS */ /* =================================================================== */ .wrapper.svelte-1oiuk2f { display: contents; } div.svelte-1nguped { background: #ffffff; } label.float.svelte-j0zqjt.svelte-j0zqjt { visibility: hidden; } #audio_wrap .record-button, #audio_wrap .stop-button { display: inline-flex; align-items: center; justify-content: center; height: 56px; width: clamp(240px, 90vw, 420px); margin: 8px auto 0; font-size: 18px; font-weight: 600; border-radius: 0 !important; align-self: center; color: #000000; } #audio_wrap .pause-button, #audio_wrap .resume-button { display: none !important; } #audio_wrap .stop-button { display: none; } #audio_wrap .record-button[disabled], #audio_wrap .record-button[aria-disabled="true"], #audio_wrap .record-button[aria-pressed="true"] { display: none !important; } #audio_wrap .record-button[disabled] ~ .stop-button, #audio_wrap .record-button[aria-disabled="true"] ~ .stop-button, #audio_wrap .record-button[aria-pressed="true"] ~ .stop-button { display: inline-flex !important; } #audio_wrap .record-button::before, #audio_wrap .stop-button::before { content:""; width:12px; height:12px; border-radius:9999px; margin-right:14px; } #audio_wrap .record-button::before { background:#d53f3f; } #audio_wrap .stop-button::before { background:#000; } #audio_wrap select { display:block; margin: 10px auto 0; min-height: 32px; font-size: 13px; padding: 0 10px; max-width: 100%; background: transparent; border: 1px solid var(--block-border-color, #E7E5DF); border-radius: 10px; color: #505050; } /*-------- Record & Stop Button Animation --------*/ @keyframes pulse-record { 0% { box-shadow: 0 0 0 0 rgba(239, 57, 57, 0.7); } 70% { box-shadow: 0 0 0 10px rgba(239, 57, 57, 0); } 100% { box-shadow: 0 0 0 0 rgba(239, 57, 57, 0); } } @keyframes pulse-stop { 0% { box-shadow: 0 0 0 0 rgba(5, 150, 105, 0.7); } 70% { box-shadow: 0 0 0 10px rgba(5, 150, 105, 0); } 100% { box-shadow: 0 0 0 0 rgba(5, 150, 105, 0); } } .record-button.svelte-1oiuk2f { border: 1px solid #ef3939; color: #000000; border-radius: 9999px; animation: pulse-record 1.2s infinite; } .stop-button.svelte-1oiuk2f { border: 1px solid #059669; color: #000000; border-radius: 9999px; animation: pulse-stop 1.2s infinite; } /* Add this to your CSS, e.g., before the "Mobile tightening" section */ .disclaimer-box { font-size: 10px; /* Small font */ color: #64748b; /* Muted, modern gray */ line-height: 1.6; border-top: 1px solid #E7E5DF; /* Neat separator line */ padding-top: 24px; margin-top: 15px; } .disclaimer-box p { margin: 0 0 15px 0; /* Space between paragraphs */ } .disclaimer-box strong { color: #3F4A49; /* Make the titles (Disclaimer/Acknowledgements) stand out */ } .disclaimer-box code { background-color: #f3f4f6; padding: 2px 5px; border-radius: 4px; font-size: 10px; } /* ---------- Accessibility & motion ---------- */ .gr-status, .gr-loading-text, [aria-busy="true"], .svelte-16h4h2l { visibility: visible !important; opacity: 1 !important; height: auto !important; } #audio_wrap .record-button:focus-visible, #audio_wrap .stop-button:focus-visible, #audio_wrap select:focus-visible, .next-btn button:focus-visible, .endlink button:focus-visible { outline: 3px solid #10B981; outline-offset: 2px; } @media (prefers-reduced-motion: reduce) { .progress-bar { transition: none; } } /* ---------- Mobile tightening ---------- */ @media (max-width: 767px) { .section { padding: 20px; } .metric .value { font-size: 25px; } .metric .sub { font-size: 9px; } .gradio-container { padding: 20px 0px; } .definitions-table td:first-child { width: 120px; } .metric .label .rwd-word { display: block !important; margin: 0 !important; padding: 0 !important; } .metric .label { display: block !important; white-space: normal !important; /* Allow content to break lines */ height: auto !important; } } /* ---------- Print ---------- */ @media print { #intro-view, #assess-view, .results-actions, #definitions-accordion { display: none !important; } .section { border: none !important; padding: 0 !important; box-shadow: none !important; } .gradio-container { max-width: 100% !important; padding: 0 !important; } } """ # ================= UI ================= WORDS = sorted(EXP.keys()) # --- Definitions Table Generation --- TABLE_ROWS = "" # Use the desired display order (PROCS) for key in PROCS: # Use DISP for the bold display name in the first column name = DISP.get(key, key) # Use PROCESS_DEFINITIONS for the description in the second column definition = PROCESS_DEFINITIONS_TOOLTIP.get(key, "Definition not found.") # Generate one table row row = f""" {name} {definition} """ TABLE_ROWS += row DEFINITIONS_TABLE_HTML = f""" {TABLE_ROWS}
""" # --- Disclaimer --- DISCLAIMER_HTML = f"""

Disclaimer
This tool is an AI-powered screener and is not a substitute for a formal diagnosis. The model may make errors. Please consult a certified speech pathologist or care provider to discuss these results. For your privacy, no voice recordings or personal data are stored after your session ends. This screener uses words adapted from The Quick Screener (Bowen, 1996). Phoneme detection is powered by the {MODEL_ID} model.

""" with gr.Blocks(theme=THEME, css=CSS) as demo: # --- STATE --- st_words = gr.State(WORDS) st_index = gr.State(0) st_records = gr.State([]) # --- INTRO (Redesigned) --- with gr.Group(visible=True, elem_id="intro-view") as intro_view: with gr.Column(elem_classes=["section"]): # --- COMBINED HEADER WITH WAVEFORM AND TITLE --- gr.HTML(f"""

Speechling Soundcheck

""") gr.HTML("""

Step 1

Find a quiet space and get ready to speak.

Step 2

Press Start, you’ll see a word in large text.

Step 3

Press Record, say the word, then press Stop.

Step 4

Click Next to continue. End at any point to see results.

""") with gr.Row(elem_classes=["start-btn-wrapper"]): start_btn = gr.Button("Start", variant="primary", elem_classes=["start-btn"]) gr.HTML("""
""") gr.HTML(DISCLAIMER_HTML) # --- ASSESSMENT --- with gr.Group(visible=False, elem_id="assess-view") as assess_view: with gr.Column(elem_classes=["section"], elem_id="assess_shell"): prog_html = gr.HTML('
') prog_txt = gr.Markdown("", elem_id="subtle") with gr.Row(elem_id="bigword_wrap"): big_word = gr.HTML("", elem_id="bigword") with gr.Column(elem_classes=["center-col"]): with gr.Column(elem_id="audio_wrap"): mic = gr.Audio(sources=["microphone","upload"], type="numpy", label="Audio") with gr.Column(elem_id="actions_wrap"): next_btn = gr.Button("Next", variant="primary", interactive=False, elem_classes=["next-btn"]) end_btn = gr.Button("End now & view results", elem_classes=["endlink"]) warn = gr.Markdown("", visible=False) gr.HTML(DISCLAIMER_HTML) # --- RESULTS (Updated structure) --- with gr.Group(visible=False) as results_view: with gr.Column(elem_classes=["section"]): gr.Markdown("# Results \n") with gr.Row(): metric_words = gr.HTML('
Total Words Attempted
' '
0
0 / 0 words
') metric_pcc = gr.HTML('
Correct Consonants Detected
' '
0%
0 / 0 consonants
') outcome_summary = gr.Markdown(visible=False, elem_classes=["metric", "outcome-box"]) with gr.Group(visible=False) as error_details_group: bar = gr.Plot(elem_classes=["plot-wrap"]) gr.Markdown("### Error Samples ", elem_id="error-examples-title") EXAMPLE_OUTPUTS = [] ex_components = {} for p in PROCS: with gr.Group(elem_classes=["metric", "error-box"], visible=False) as ex_group: ex_hdr = gr.Markdown(visible=False, elem_classes=["error-process-label"]) ex_cap = gr.Markdown(visible=False) ex_audio = gr.Audio(interactive=False, visible=False) ex_components[p] = (ex_group, ex_hdr, ex_cap, ex_audio) EXAMPLE_OUTPUTS.extend([ex_group, ex_hdr, ex_cap, ex_audio]) with gr.Accordion("See Definitions", open=False, elem_id="definitions-accordion"): gr.HTML(DEFINITIONS_TABLE_HTML) csv_download = gr.File(visible=False) with gr.Row(elem_classes=["results-actions"]): export_btn = gr.Button("Export Results", elem_classes=["endlink"]) print_btn = gr.Button("Print", elem_classes=["endlink"]) restart = gr.Button("Restart", elem_classes=["endlink"]) gr.HTML(DISCLAIMER_HTML) # ================= HELPERS ================= def _progress_ui(words, i): N = len(words); i = max(0, min(i, max(N-1,0))) pct = int(round(100*(i / max(N,1)))) ph = f'
' pt = f'
Step {i+1} of {N}
' if N else f'
Step 0 of 0
' return ph, pt, f"
{words[i] if N else '—'}
" def _blank_metrics_payload(): """Zeroed metrics + empty plot + hidden example blocks.""" m_words = ('
Words Attempted
' '
0
0 / 0 words
') m_pcc = ('
Consonants Correct
' '
0%
0 / 0 consonants
') updates = [] for p in PROCS: updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)]) return m_words, m_pcc, None, gr.update(visible=False), gr.update(visible=False), *updates def _export_csv(records): if not records: return gr.update(visible=False) processed = [] for r in records: out = run_one(r["word"], r["audio"]) processed.append({ "word": out["word"], "expected_phonemes": out["expected"], "produced_phonemes": out["produced"], "pcc": out["PCC"], "final_prediction": DISP.get(out.get("final_pred", "none"), out.get("final_pred", "none")), "fronting_prob": out["probs"].get("fronting"), "gliding_prob": out["probs"].get("gliding"), "depalatalisation_prob": out["probs"].get("depalatalisation"), "finalc_del_prob": out["probs"].get("finalc_del"), }) df = pd.DataFrame(processed) filepath = ROOT / "speech_screener_results.csv" df.to_csv(filepath, index=False) return gr.update(value=str(filepath), visible=True) def _compute_results_buffered(records): processed, total_cc, total_nn = [], 0, 0 for r in records: out = run_one(r["word"], r["audio"]) processed.append({**r, **out}) exp_seq = out["expected"].split(); prd_seq = out["produced"].split() pairs = align(exp_seq, prd_seq) cc = sum(1 for e,p in pairs if e and p and is_c(e) and e==p) nn = sum(1 for e in exp_seq if is_c(e)) total_cc += cc; total_nn += nn attempted = len(processed) pcc_pct = round((100.0 * total_cc / total_nn), 1) if total_nn else 0.0 counts = {k:0 for k in PROCS} example = {k: None for k in PROCS} for r in processed: p = r.get("process","none") if p in counts: counts[p] += 1 if example[p] is None: example[p] = r has_errors = any(c > 0 for c in counts.values()) # 💡--- START OF MODIFICATIONS ---💡 # 1. Calculate total errors total_errors = sum(counts.values()) # 2. Define the link to keep the f-string clean SPA_LINK = "https://therapyworks.com/wp-content/uploads/2022/11/Phonological_Processes_chart.jpg" if has_errors: # Use a larger header, emoji, and the error count outcome_md = f""" ### Outcome A total of **{total_errors} potential speech sound error patterns** were detected. For guidance on typical development, see the [Phonological Process Chart from TherapyWorks]({SPA_LINK}). If you have any concerns, please consult a certified speech pathologist. """ else: # Use a larger header and emoji for a clear "all clear" outcome_md = """ ### Outcome No potential speech sound error patterns were detected from the screened words. If you still have concerns about speech development, it is always best to consult a certified speech pathologist. """ # 💡--- END OF MODIFICATIONS ---💡 # Force the order of the y-axis category_order = [DISP_PLOT[p] for p in PROCS] plot_data = pd.DataFrame({ "Process": [DISP_PLOT[p] for p in PROCS], "Count": [counts[p] for p in PROCS] }) # 💡 ADD THIS: Map the process short codes to their full definitions plot_data["Definition"] = [PROCESS_DEFINITIONS_TOOLTIP[p] for p in PROCS] plot_data['Process'] = pd.Categorical(plot_data['Process'], categories=category_order, ordered=True) plot_data = plot_data.sort_values('Process') fig = px.bar( plot_data, x="Count", y="Process", orientation='h', text="Count", custom_data=['Definition'] ) fig.update_layout( title={ 'text': "Count of Errors", 'y':0.9, 'x':0, 'xanchor': 'left', 'yanchor': 'top', 'font': {'size': 16.5, 'color': '#1f2937', 'weight': 'bold'} }, yaxis=dict( title=None, tickfont=dict(size=14), automargin=True, ticklabelstandoff=10, tickformat='
', tickangle=0, ), margin=dict(l=150, r=20, t=50, b=20), xaxis=dict( title=None, tickmode='linear', dtick=1 ), hoverlabel=dict( bgcolor="white", font_size=12, # Setting a fixed width often forces the text to wrap within this constraint # Use a value that looks good on a small screen. namelength=-1 # Ensures the name label (y) is not truncated ), paper_bgcolor='rgba(0,0,0,0)', plot_bgcolor='rgba(0,0,0,0)', font_family="Montserrat", font_color="#3F4A49" ) fig.update_traces( textposition='inside', insidetextanchor='end', textfont_color='white', texttemplate='%{text} ', marker_color='#3F4A49', hovertemplate='%{y}
' + # The Process name 'Count: %{x}

' + # The Count '%{customdata[0]}' + # The Definition with
inside '' ) m_words = (f'
' f'
' f'Total ' f'Words ' f'Attempted' f'
' f'
{attempted}
' f'
{attempted} / {len(WORDS)} words
' f'
') m_pcc = (f'
' f'
' f'Correct ' f'Consonants ' f'Detected' f'
' f'
{pcc_pct:.1f}%
' f'
{total_cc} / {total_nn} consonants
' f'
') updates = [] for p in PROCS: ex = example[p] if ex is None: updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)]) else: ts = ex.get("timestamp", 0.0) updates.extend([ gr.update(visible=True), gr.update(visible=True, value=f"**{DISP[p]}**"), gr.update( visible=True, value=(f"**Word:** {ex['word']} \n" f"**Expected phonemes:** `{ex['expected']}` \n" f"**Produced phonemes:** `{ex['produced']}`") ), gr.update(visible=True, value=ex["audio"], label=f"{ex['word']} (~{ts:.2f}s)") ]) return m_words, m_pcc, fig, outcome_md, has_errors, *updates # ================= CALLBACKS ================= def _init_session(): seq = WORDS i = 0 ph, pt, word_html = _progress_ui(seq, i) return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), seq, i, [], ph, pt, word_html, gr.update(value=None), gr.update(interactive=False), gr.update(visible=False, value="") ) def _save_and_next(words, i, records, audio): if audio is None: ph, pt, word_html = _progress_ui(words, i) m_words, m_pcc, fig, outcome, show_errors, *example_updates = _blank_metrics_payload() return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), records, i, ph, pt, word_html, gr.update(), gr.update(interactive=False), gr.update(visible=True, value="⚠️ Please record the word before clicking **Next**."), m_words, m_pcc, fig, outcome, show_errors, *example_updates ) new_records = records + [{"word": words[i], "audio": audio}] j = i + 1 if j < len(words): ph, pt, word_html = _progress_ui(words, j) m_words, m_pcc, fig, outcome, show_errors, *example_updates = _blank_metrics_payload() return ( gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), new_records, j, ph, pt, word_html, gr.update(value=None), gr.update(interactive=False), gr.update(visible=False, value=""), m_words, m_pcc, fig, outcome, show_errors, *example_updates ) m_words, m_pcc, fig, outcome, show_errors, *example_updates = _compute_results_buffered(new_records) return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), new_records, j, "", "", "", gr.update(value=None), gr.update(interactive=False), gr.update(visible=False, value=""), m_words, m_pcc, fig, gr.update(value=outcome, visible=True), gr.update(visible=show_errors), *example_updates ) def _end_now(words, i, records, audio): if audio is not None and i < len(words): records = records + [{"word": words[i], "audio": audio}] m_words, m_pcc, fig, outcome, show_errors, *example_updates = _compute_results_buffered(records) return ( gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), st_words, i, records, "", "", "", gr.update(value=None), gr.update(), gr.update(visible=False, value=""), m_words, m_pcc, fig, gr.update(value=outcome, visible=True), gr.update(visible=show_errors), *example_updates) def _restart(): ph = '
' m_words, m_pcc, bar, outcome, show_errors, *example_updates = _blank_metrics_payload() return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), WORDS, 0, ph, f"Step 1 of {len(WORDS)}", f"
{WORDS[0] if WORDS else '—'}
", gr.update(value=None), gr.update(interactive=False), gr.update(visible=False, value=""), m_words, m_pcc, bar, outcome, show_errors, *example_updates) # --- wiring --- start_btn.click( _init_session, inputs=[], outputs=[intro_view, assess_view, results_view, st_words, st_index, st_records, prog_html, prog_txt, big_word, mic, next_btn, warn], show_progress="minimal" ) mic.start_recording(lambda: gr.update(interactive=False), outputs=[next_btn], queue=False) mic.stop_recording(lambda: gr.update(interactive=True), outputs=[next_btn], queue=False) next_btn.click( _save_and_next, inputs=[st_words, st_index, st_records, mic], outputs=[intro_view, assess_view, results_view, st_records, st_index, prog_html, prog_txt, big_word, mic, next_btn, warn, metric_words, metric_pcc, bar, outcome_summary, error_details_group, *EXAMPLE_OUTPUTS], show_progress="minimal" ) end_btn.click( _end_now, inputs=[st_words, st_index, st_records, mic], outputs=[intro_view, assess_view, results_view, st_words, st_index, st_records, prog_html, prog_txt, big_word, mic, next_btn, warn, metric_words, metric_pcc, bar, outcome_summary, error_details_group, *EXAMPLE_OUTPUTS], show_progress="minimal" ) restart.click( _restart, inputs=[], outputs=[intro_view, assess_view, results_view, st_words, st_index, prog_html, prog_txt, big_word, mic, next_btn, warn, metric_words, metric_pcc, bar, outcome_summary, error_details_group, *EXAMPLE_OUTPUTS], show_progress="minimal" ) export_btn.click( _export_csv, inputs=[st_records], outputs=[csv_download] ) print_btn.click(None, None, None, js="() => { window.print(); }") # Expose + LAUNCH (crucial for HF Spaces) app = demo if __name__ == "__main__": # Spaces honors PORT; queue is recommended for audio workloads demo.queue().launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))