Speechling Soundcheck

import gradio as gr, numpy as np, pandas as pd, json, joblib, librosa, torch, os
from pathlib import Path
from transformers import AutoProcessor, AutoModelForCTC
import numpy as np
import plotly.express as px

# --------- CONSTANTS / CANON / INVENTORIES ----------
CANON = {
    "t͡ʃ":"tʃ","d͡ʒ":"dʒ","ʧ":"tʃ","ʤ":"dʒ",
    "r":"ɹ","g":"ɡ","ɫ":"l"
}
CONSONANTS = {
    "p","b","t","d","k","ɡ","ʔ","ɾ",
    "f","v","θ","ð","s","z","ʃ","ʒ","h",
    "m","n","ŋ","l","ɫ","ɹ","r","j","w","tʃ","dʒ","ʧ","ʤ"
}
VOWELS = {"i","ɪ","e","eɪ","ɛ","æ","a","ɑ","ɒ","ʌ","ə","ɚ","ɝ","o","oʊ","ɔ","u","ʊ","aɪ","aʊ","ɔɪ","ɜː","əʊ"}
PAL_FRIC = {"ʃ","ʒ"}; PAL_AFFR = {"tʃ","dʒ"}
ALV_FRIC = {"s","z"}; ALV_STOP = {"t","d"}
DROP = {"|","<s>","</s>","<pad>","<unk>","sil","spn","nsn"," "}

def is_c(p): return p in CONSONANTS

# --- Session helpers (NEW) ---
PROCS = ["fronting","gliding","depalatalisation","finalc_del"]

# Use a separate dictionary for plot labels to handle wrapping
DISP_PLOT = {
    "fronting": "Fronting",
    "gliding": "Gliding",
    "depalatalisation": "Depalatalisation",
    "finalc_del": "Final Consonant<br>Deletion",
}
DISP = {
    "fronting": "Fronting",
    "gliding": "Gliding",
    "depalatalisation": "Depalatalisation",
    "finalc_del": "Final Consonant Deletion",
    "none": "None"
}

def approx_error_timestamp(pairs, sr, n_samples):
    """Very rough: map first mismatch index to time by proportional position."""
    mism_idx = next((i for i,(e,p) in enumerate(pairs) if not(e and p and e==p)), None)
    if mism_idx is None:
        return 0.0
    return float(mism_idx / max(len(pairs),1)) * (n_samples / float(sr))

def run_one(word, audio):
    """
    Wrap infer_one(audio, word) but also return pairs and timestamp.
    Accepts gradio audio (sr, np.array) or filepath.
    """
    out = infer_one(audio, word)
    exp_seq = out["expected"].split()
    prd_seq = out["produced"].split()
    pairs   = align(exp_seq, prd_seq)

    # get raw audio length for timestamp estimation
    if isinstance(audio, tuple):
        sr, y = audio
        n = len(y)
    else:
        import soundfile as sf
        y, sr = sf.read(str(audio))
        n = len(y)

    t = approx_error_timestamp(pairs, sr, n)
    process = out.get("final_pred","none")
    return {
        **out,
        "pairs": pairs,
        "timestamp": round(t, 2),
        "process": process,
    }

# --------- LOAD ARTIFACTS ----------
ROOT = Path(__file__).parent
BEST = joblib.load(ROOT/"best_heads.joblib")
ENC  = joblib.load(ROOT/"onehot_enc.joblib")
KEPT = json.load(open(ROOT/"kept_features.json"))
TAU  = json.load(open(ROOT/"thresholds.json"))
EXP  = json.load(open(ROOT/"expected_phones_IPA_norm.json"))

procs = ["fronting","gliding","depalatalisation","finalc_del"]

# --------- ASR (phoneme CTC) ----------
MODEL_ID = "vitouphy/wav2vec2-xls-r-300m-timit-phoneme"
device = "cuda" if torch.cuda.is_available() else "cpu"

processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForCTC.from_pretrained(MODEL_ID)

# IMPORTANT: only quantize on CPU; do NOT move a quantized model to CUDA.
if device == "cuda":
    model = model.to(device).eval()
else:
    model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8).eval()

# Optional warmup (kept lightweight and device-safe)
with torch.inference_mode():
    x = processor(np.zeros(16000, dtype=np.float32), sampling_rate=16000, return_tensors="pt")
    if device == "cuda":
        x = x.to(device)
    _ = model(**x).logits

def ctc_collapse(ids, blank_id):
    out, prev = [], None
    for i in ids:
        if i == blank_id or i == prev:
            prev = i
            continue
        out.append(i)
        prev = i
    return out

PH_MAP = {
  "b":"b","d":"d","f":"f","g":"ɡ","hh":"h","hv":"h","jh":"dʒ","ch":"tʃ","k":"k","l":"l",
  "m":"m","n":"n","ng":"ŋ","p":"p","r":"ɹ","s":"s","sh":"ʃ","t":"t","th":"θ","dh":"ð",
  "v":"v","w":"w","y":"j","z":"z","zh":"ʒ","dx":"ɾ","q":"ʔ"
}

def recognize_phones(audio_path: Path):
    wav, sr = librosa.load(str(audio_path), sr=16000, mono=True)
    feats = processor(wav, sampling_rate=16000, return_tensors="pt")
    if device == "cuda":
        feats = feats.to(device)
    with torch.no_grad():
        logits = model(**feats).logits
    pred_ids = logits.argmax(-1)[0].detach().cpu().tolist()

    blank_id = getattr(processor.tokenizer, "pad_token_id", None)
    if blank_id is None:
        blank_id = getattr(model.config, "pad_token_id", 0)
    ids  = ctc_collapse(pred_ids, blank_id)
    toks = processor.tokenizer.convert_ids_to_tokens(ids)
    toks = [t for t in toks if t not in DROP]
    ipa  = [PH_MAP.get(t, t) for t in toks]
    ipa  = [p.replace("͡","") for p in ipa]
    ipa  = [CANON.get(p, p) for p in ipa]

    seq=[]
    for t in ipa:
        if not t.strip(): continue
        if not seq or t != seq[-1]: seq.append(t)
    return seq

# --------- CLEANERS ----------
def squash_epenthesis(exp_seq, prod_seq):
    if len(prod_seq)>=2 and prod_seq[-2]=="n" and prod_seq[-1] in ("t","d"):
        if exp_seq and exp_seq[-1] in ("k","ɡ"):
            return prod_seq[:-2] + [prod_seq[-1]]
    return prod_seq

def collapse_vocalized_l(exp_seq, prod_seq):
    if exp_seq and exp_seq[-1] in {"l","ɫ"} and len(prod_seq)>=2 and prod_seq[-1] in {"l","ɫ"} and prod_seq[-2] in VOWELS:
        i = len(prod_seq)-2
        while i>=0 and prod_seq[i] in VOWELS: i -= 1
        return prod_seq[:i+1] + [prod_seq[-1]]
    return prod_seq

# --------- ALIGNMENT ----------
def align(a,b):
    m,n=len(a),len(b)
    dp=[[(0,None)]*(n+1) for _ in range(m+1)]
    for i in range(1,m+1): dp[i][0]=(i,('del',i-1,None))
    for j in range(1,n+1): dp[0][j]=(j,('ins',None,j-1))
    for i in range(1,m+1):
        for j in range(1,n+1):
            cost=0 if a[i-1]==b[j-1] else 1
            cands=[
                (dp[i-1][j][0]+1,('del',i-1,None)),
                (dp[i][j-1][0]+1,('ins',None,j-1)),
                (dp[i-1][j-1][0]+cost,('sub' if cost else 'match',i-1,j-1))
            ]
            dp[i][j]=min(cands,key=lambda x:x[0])
    pairs=[]; i=m; j=n
    while i>0 or j>0:
        _,(op,ei,pj)=dp[i][j]
        if op in ('match','sub'): pairs.append((a[ei],b[pj])); i-=1; j-=1
        elif op=='del': pairs.append((a[ei],None)); i-=1
        else: pairs.append((None,b[pj])); j-=1
    return list(reversed(pairs))

# --------- RULES ----------
def detect_fronting(pairs):
    return any(e in {"k","ɡ"} and p in {"t","d"} for e,p in pairs if e and p)
def detect_gliding(pairs):
    return any(e in {"ɹ","l"} and p=="w" for e,p in pairs if e and p)
def detect_depalatalisation(exp_seq, prod_seq, pairs):
    if any(e in PAL_FRIC and p in ALV_FRIC for e,p in pairs if e and p): return True
    if any(e in PAL_AFFR and p in (ALV_STOP|ALV_FRIC) for e,p in pairs if e and p): return True
    if ("tʃ" in exp_seq and {"t","s"}.issubset(set(prod_seq))) or \
       ("dʒ" in exp_seq and {"d","z"}.issubset(set(prod_seq))): return True
    return False
def detect_finalc_del(exp_seq, prod_seq):
    return bool(exp_seq) and is_c(exp_seq[-1]) and (len(prod_seq)==0 or not is_c(prod_seq[-1]))

# --------- FEATURES ----------
def features_from_seqs(exp_seq, prd_seq):
    pairs = align(exp_seq, prd_seq)

    # onset length
    k=0
    for p in exp_seq:
        if is_c(p): k+=1
        else: break
    onset_len = k

    coda_len_exp = sum(1 for p in reversed(exp_seq) if is_c(p))
    coda_len_prd = sum(1 for p in reversed(prd_seq) if is_c(p))

    # edits
    subs = sum(1 for e,p in pairs if e and p and e!=p)
    dels = sum(1 for e,p in pairs if e and (p is None))
    ins  = sum(1 for e,p in pairs if (e is None) and p)
    csubs = sum(1 for e,p in pairs if e and p and is_c(e) and is_c(p) and e!=p)
    cdels = sum(1 for e,p in pairs if e and is_c(e) and p is None)
    cins  = sum(1 for e,p in pairs if (e is None) and p and is_c(p))

    # patterns
    velar_to_alv = int(any(e in {"k","ɡ"} and p in {"t","d"} for e,p in pairs if e and p))
    liquid_to_w  = int(any(e in {"ɹ","l"} and p=="w" for e,p in pairs if e and p))
    pal_to_alv   = int(any(((e in (PAL_FRIC|PAL_AFFR)) and (p in (ALV_FRIC|ALV_STOP))) for e,p in pairs if e and p))

    # last consonants
    exp_last_c = exp_seq[-1] if (exp_seq and is_c(exp_seq[-1])) else "NONE"
    prd_last_c = next((p for p in reversed(prd_seq) if is_c(p)), "NONE")
    final_match = float(exp_last_c == prd_last_c)

    base = pd.Series({
        "onset_len": onset_len,
        "coda_len_exp": coda_len_exp, "coda_len_prd": coda_len_prd,
        "subs": subs, "dels": dels, "ins": ins,
        "csubs": csubs, "cdels": cdels, "cins": cins,
        "velar_to_alv": velar_to_alv, "liquid_to_w": liquid_to_w, "pal_to_alv": pal_to_alv,
        "final_match": final_match,
    }, dtype=float)

    enc_cols = list(getattr(ENC, "feature_names_in_", ["exp_last_c","prd_last_c"]))
    cat = ENC.transform(pd.DataFrame([[exp_last_c, prd_last_c]], columns=enc_cols)).ravel()

    base_names = list(base.index)
    enc_cat_names = list(ENC.get_feature_names_out(enc_cols))
    X_cols_full = base_names + enc_cat_names

    def map_kept(name: str) -> str | None:
        if name in X_cols_full:
            return name
        if name.startswith("exp_last_"):
            cand = name.replace("exp_last_", f"{enc_cols[0]}_")
            if cand in X_cols_full: return cand
        if name.startswith("prd_last_"):
            cand = name.replace("prd_last_", f"{enc_cols[1]}_")
            if cand in X_cols_full: return cand
        return None

    mapped = [m for k in KEPT if (m := map_kept(k)) is not None]
    keep_idx = [X_cols_full.index(nm) for nm in mapped]
    f_full = np.hstack([base.values, cat]).astype(float)
    return f_full[keep_idx].reshape(1, -1), pairs

# --------- INFERENCE ---------
def infer_one(audio, word):
    # Save Gradio audio to a temp wav
    if isinstance(audio, tuple):  # gr.Audio returns (sr, numpy)
        sr, y = audio
        path = ROOT/"_tmp.wav"
        import soundfile as sf
        sf.write(str(path), y, sr)
        audio_path = path
    else:
        audio_path = Path(audio)

    # expected phones
    word_l = word.strip().lower()
    exp_seq = [CANON.get(p,p) for p in EXP[word_l]]

    # produce phones
    prd_seq = recognize_phones(audio_path)
    prd_seq = squash_epenthesis(exp_seq, prd_seq)
    prd_seq = collapse_vocalized_l(exp_seq, prd_seq)

    # rules
    pairs = align(exp_seq, prd_seq)
    rule_flags = {
        "fronting": detect_fronting(pairs),
        "gliding": detect_gliding(pairs),
        "depalatalisation": detect_depalatalisation(exp_seq, prd_seq, pairs),
        "finalc_del": detect_finalc_del(exp_seq, prd_seq),
    }
    rule_pred = next((p for p in procs if rule_flags[p]), "none")

    # learned heads
    f_kept, pairs = features_from_seqs(exp_seq, prd_seq)
    probs = {p: float(BEST[p].predict_proba(f_kept)[0,1]) for p in procs}

    # hybrid
    if rule_pred != "none":
        final = rule_pred
    else:
        best, score = max(probs.items(), key=lambda kv: kv[1])
        final = best if score >= TAU.get(best, 0.95) else "none"

    # PCC
    cc = sum(1 for e,p in pairs if e and p and is_c(e) and e==p)
    nn = sum(is_c(p) for p in exp_seq)
    pcc = (100.0*cc/nn) if nn else 0.0

    return {
        "word": word_l,
        "expected": " ".join(exp_seq),
        "produced": " ".join(prd_seq),
        "rule_pred": rule_pred,
        "final_pred": final,
        "probs": probs,
        "PCC": f"{pcc:.1f}%"
    }

# ---------- Pretty labels ----------
PROCS = ["fronting","gliding","depalatalisation","finalc_del"]
DISP_PLOT = {
    "fronting": "Fronting",
    "gliding": "Gliding",
    "depalatalisation": "Depalatalisation",
    "finalc_del": "Final Consonant Deletion",
}
DISP = {
    "fronting":"Fronting",
    "gliding":"Gliding",
    "depalatalisation":"Depalatalisation",
    "finalc_del":"Final Consonant Deletion",
    "none":"None",
}

# ================= Definitions =================
PROCESS_DEFINITIONS = {
    "fronting": "Replacing sounds made in the back of the mouth (like 'k' and 'g') with sounds made in the front (like 't' and 'd'). Example: 'car' becomes 'tar'.",
    "gliding": "Replacing 'r' or 'l' sounds with 'w' or 'y'. Example: 'rabbit' becomes 'wabbit'.",
    "depalatalisation": "Replacing a 'sh', 'ch', or 'j' sound with a simpler sound made at the front of the mouth. Example: 'shoe' becomes 'sue'.",
    "finalc_del": "Leaving off the last consonant sound in a word. Example: 'cat' becomes 'ca'.",
}

# Create a modified version of PROCESS_DEFINITIONS for better tooltip wrapping
PROCESS_DEFINITIONS_TOOLTIP = {
    "fronting": "Replacing sounds made in the back of the <br> mouth (like 'k' and 'g') with sounds made <br> in the front (like 't' and 'd'). <br>Eg. 'car' becomes 'tar'.",
    "gliding": "Replacing 'r' or 'l' sounds with 'w' or 'y'. <br>Eg. 'rabbit' becomes 'wabbit'.",
    "depalatalisation": "Replacing a 'sh', 'ch', or 'j' sound with a <br> simpler sound made at the front of the mouth. <br>Eg. 'shoe' becomes 'sue'.",
    "finalc_del": "Leaving off the last consonant sound in a word. <br>Eg. 'cat' becomes 'ca'.",
}

# ================= THEME + CSS =================
THEME = gr.themes.Soft(primary_hue="emerald").set(
    # Set the primary color for buttons, focus rings, etc.
    # We use a neutral hue like 'gray' to avoid conflicts, but then
    # override the specific color variables.
    body_background_fill="#FFFFFF",
    button_primary_background_fill="#038c15",   # Primary color for solid buttons
    button_primary_background_fill_hover="#055910", # A slightly darker hover color
    button_primary_text_color="white",
    button_secondary_background_fill="#DCD9D0",
    button_secondary_background_fill_hover="#C5C2B8",
    button_secondary_text_color="black",
    border_color_accent="#495A58",              # Used for active/selected borders (e.g., checkbox)
    # The default primary_hue still sets many variables, so we override the key ones
    
)

CSS = """
@import url('https://fonts.googleapis.com/css2?family=Montserrat:wght@400;600;800&display=swap');

/* ---------- Base / container ---------- */
* { box-sizing: border-box; font-family: 'Montserrat', system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif; }
html, body { width: 100%; overflow-x: hidden; }
.gradio-container { max-width: 1100px !important; width: 100% !important; margin: 0 auto; padding: 0px 0px; }
img, canvas, svg, video { max-width: 100%; height: auto; }

/* ---------- Cards / layout ---------- */
.section { background:#fff; border:1px solid #E7E5DF; border-radius:18px; padding:28px; }
.start-btn-wrapper { display: flex; justify-content: flex-end; width: 100%; padding:10px;}
.start-btn button { max-width: 250px; min-width:150px; width: 200px; align-self: flex-end;}
.center-col { max-width:560px; margin:0 auto; width:100%; }

/*--------- New style for the header container --------- */
.header-container {
    display: flex;
    align-items: center; /* Vertically center the items */
    justify-content: space-between; /* Push items to the ends */
    margin-bottom: 10px; /* Add some space below the header line */
}

/* Modify existing or add new style to control the title text */
.header-container h1 { 
    margin: 0px 20px 0px 0px !important; /* Remove the default top/bottom margin from the title */
}

/* Add this to the end of your existing CSS block */
.waveform-container {
    display: flex;
    justify-content: center;
    align-items: center; /* Align bars to the center */
    height: 40px; /* Adjust height as needed */
    margin: 0;
    gap: 8px;
}

/* ---------- Waveform bar ---------- */
.wave-bar {
    width: 8px; /* Width of each bar */
    background: #055910; /* Primary color */
    height: 5px;
    animation: pulse-wave 1.5s ease-in-out infinite both;
    border-radius: 4px; /* Rounded tops */
}

/* Delay the start of each bar's animation for a staggered effect */
.wave-bar:nth-child(1) { animation-delay: -1.0s; }
.wave-bar:nth-child(2) { animation-delay: -0.5s; }
.wave-bar:nth-child(3) { animation-delay: 0s;     }

/* The keyframes for the pulsing animation */
@keyframes pulse-wave {
    0% { height: 5px; }
    50% { height: 50px; } /* Max height */
    100% { height: 5px; }
}

/* ---------- New Intro Screen Styles ---------- */
.steps-container {
    display: grid;
    grid-template-columns: 1fr;
    gap: 0.5rem;
    width: 100%;
    margin-bottom: 28px;
}
.step-card {
    padding: 1.5rem;
    border: 1px solid #E7E5DF;
    border-radius: 18px;
    background: #f9fafb;
}
.step-card h4 {
    margin-top: 0;
    margin-bottom: 0.5rem;
    color: #055910;
    font-size: 1rem;
}
.step-card p {
    margin: 0;
    color: #454545;
    font-size: 0.9rem;
    line-height: 1.5;
}

@media (min-width: 768px) {
    .steps-container {
        grid-template-columns: repeat(4, 1fr);
    }
}

/* ---------- Intro sub-step #5, For note about End Now and View Results and Process detection ---------- */
.intro-footer-text {
    text-align: center;
    color: #64748b;
    font-size: 12px;
    line-height: 1.6;
    padding: 0;
}
.intro-footer-text b {
    color: #3F4A49;
}
hr.soft-divider {
    border: none;
    border-top: 0px solid #E7E5DF;
    margin-top: 0px;
    margin-bottom: 0px;
}


/* ---------- Big word ---------- */
#bigword, #bigword * {
  font-size: clamp(50px, 10vw, 132px) !important;
  line-height: 1.04 !important;
  font-weight: 800 !important;
  text-align: center !important;
  color: #3F4A49 !important;
  margin: 8px 0 0 !important;
  overflow-wrap: anywhere; word-break: break-word;
}

/* ---------- Progress + fixed heights ---------- */
.progress-wrap { width:100%; height:12px; background:#DCD9D0; border-radius:9999px; overflow:hidden; margin-bottom:12px; margin-left:6px; }
.progress-bar  { height:100%; background:#055910; width:0%; transition: width .25s ease; }

.progress-text-wrap {
    padding: 0 14px;
    width: 100%;
    margin: 0;
}

#assess_shell { min-height: 680px; display:flex; flex-direction:column; gap:16px; }
#bigword_wrap { min-height: 100px; display:flex; align-items:flex-end; justify-content:center; }
#audio_wrap   { min-height: 100px; position: relative; display:flex; flex-direction:column; align-items:center; justify-content:center; text-align:center; color: #000000; }
#actions_wrap { min-height: 100px; display:flex; flex-direction:column; align-items:center; gap:10px; }

/* ---------- Primary action buttons ---------- */
.next-btn button { width: clamp(220px, 60vw, 320px); }

.endlink button{
  background: #3F4A49 !important;
  color: #fff !important;
  box-shadow: none !important;
  padding: 8px 16px !important; 
  height: auto !important;
  font-size: 15px !important;
  border-radius: 6px !important;
  font-weight: 600 !important;
  border: 1px solid #fff !important;
}
.endlink { margin-top: 14px; }

/* ---------- Metrics ---------- */
.metric { border:1px solid #E7E5DF; border-radius:14px; padding:16px 18px; background:#fff; }
.metric.outcome-box { padding: 16px 18px;}
.metric.error-box { padding: 16px 18px; margin-bottom:20px;}
.metric .label { font-size:14px; color:#64748b; margin-bottom:6px; }
.metric .value { font-size:40px; font-weight:800; color:#3F4A49; line-height:1.1; }
.metric .sub   { font-size:13px; color:#64748b; margin-top:4px; }
.plot-wrap .container { padding: 8px !important; }

/* FIX: CSS to hide the Plotly toolbar */
.js-plotly-plot .plotly .modebar {
    display: none !important;
}

#error-examples-title {
    margin: 24px 0 16px 0 !important;
}

.error-process-label p {
    font-size: 1.1rem !important;
    font-weight: 600 !important;
    color: #3F4A49 !important;
    margin-bottom: 12px !important;
    padding-bottom: 10px !important;
    border-bottom: 1px solid #E7E5DF;
}

.results-actions {
    display: flex;
    justify-content: flex-end;
    gap: 12px;
    margin-top: 24px;
}

.definitions-table {
    border-collapse: collapse;
    width: 100%;
    border: 1px solid #E7E5DF;
    font-size: 12px;
}
.definitions-table td {
    padding: 8px;
    vertical-align: top;
    border: 1px solid #E7E5DF;
}
.definitions-table td:first-child {
    padding-right: 24px;
    width: 200px;
}


/* =================================================================== */
/* RECORDER CONTROLS                            */
/* =================================================================== */

.wrapper.svelte-1oiuk2f {
    display: contents;
}

div.svelte-1nguped {
    background: #ffffff;
}

label.float.svelte-j0zqjt.svelte-j0zqjt {
    visibility: hidden;
}

#audio_wrap .record-button,
#audio_wrap .stop-button {
  display: inline-flex;
  align-items: center;
  justify-content: center;
  height: 56px;
  width: clamp(240px, 90vw, 420px);
  margin: 8px auto 0;
  font-size: 18px;
  font-weight: 600;
  border-radius: 0 !important;
  align-self: center;
  color: #000000;
}

#audio_wrap .pause-button,
#audio_wrap .resume-button { display: none !important; }

#audio_wrap .stop-button { display: none; }

#audio_wrap .record-button[disabled],
#audio_wrap .record-button[aria-disabled="true"],
#audio_wrap .record-button[aria-pressed="true"] { display: none !important; }

#audio_wrap .record-button[disabled]      ~ .stop-button,
#audio_wrap .record-button[aria-disabled="true"]  ~ .stop-button,
#audio_wrap .record-button[aria-pressed="true"]   ~ .stop-button { display: inline-flex !important; }

#audio_wrap .record-button::before,
#audio_wrap .stop-button::before {
  content:"";
  width:12px; height:12px; border-radius:9999px; margin-right:14px;
}
#audio_wrap .record-button::before { background:#d53f3f; }
#audio_wrap .stop-button::before   { background:#000;    }

#audio_wrap select {
  display:block;
  margin: 10px auto 0;
  min-height: 32px;
  font-size: 13px;
  padding: 0 10px;
  max-width: 100%;
  background: transparent;
  border: 1px solid var(--block-border-color, #E7E5DF);
  border-radius: 10px;
  color: #505050;
}

/*-------- Record & Stop Button Animation --------*/

@keyframes pulse-record {
  0% {
    box-shadow: 0 0 0 0 rgba(239, 57, 57, 0.7);
  }
  70% {
    box-shadow: 0 0 0 10px rgba(239, 57, 57, 0);
  }
  100% {
    box-shadow: 0 0 0 0 rgba(239, 57, 57, 0);
  }
}

@keyframes pulse-stop {
  0% {
    box-shadow: 0 0 0 0 rgba(5, 150, 105, 0.7);
  }
  70% {
    box-shadow: 0 0 0 10px rgba(5, 150, 105, 0);
  }
  100% {
    box-shadow: 0 0 0 0 rgba(5, 150, 105, 0);
  }
}

.record-button.svelte-1oiuk2f {
    border: 1px solid #ef3939;
    color: #000000;
    border-radius: 9999px; 
    animation: pulse-record 1.2s infinite;
}

.stop-button.svelte-1oiuk2f {
    border: 1px solid #059669;
    color: #000000;
    border-radius: 9999px;
    animation: pulse-stop 1.2s infinite;
}

/* Add this to your CSS, e.g., before the "Mobile tightening" section */

.disclaimer-box {
    font-size: 10px;        /* Small font */
    color: #64748b;        /* Muted, modern gray */
    line-height: 1.6;
    border-top: 1px solid #E7E5DF; /* Neat separator line */
    padding-top: 24px;
    margin-top: 15px;
}

.disclaimer-box p {
    margin: 0 0 15px 0;     /* Space between paragraphs */
}

.disclaimer-box strong {
    color: #3F4A49;        /* Make the titles (Disclaimer/Acknowledgements) stand out */
}

.disclaimer-box code {
    background-color: #f3f4f6;
    padding: 2px 5px;
    border-radius: 4px;
    font-size: 10px;
}

/* ---------- Accessibility & motion ---------- */
.gr-status, .gr-loading-text, [aria-busy="true"], .svelte-16h4h2l {
    visibility: visible !important;
    opacity: 1 !important;
    height: auto !important;
}

#audio_wrap .record-button:focus-visible,
#audio_wrap .stop-button:focus-visible,
#audio_wrap select:focus-visible,
.next-btn button:focus-visible,
.endlink button:focus-visible { outline: 3px solid #10B981; outline-offset: 2px; }


@media (prefers-reduced-motion: reduce) { .progress-bar { transition: none; } }

/* ---------- Mobile tightening ---------- */
@media (max-width: 767px) {
  .section { padding: 20px; }
  .metric .value { font-size: 25px; }
  .metric .sub { font-size: 9px; }
  .gradio-container { padding: 20px 0px; }
  .definitions-table td:first-child { width: 120px; }
  .metric .label .rwd-word {
        display: block !important; 
        margin: 0 !important;
        padding: 0 !important;
    }
  .metric .label {
        display: block !important;
        white-space: normal !important; /* Allow content to break lines */
        height: auto !important;
    }    
}

/* ---------- Print ---------- */
@media print {
  #intro-view, #assess-view, .results-actions, #definitions-accordion { 
      display: none !important; 
  }
  .section { 
      border: none !important; 
      padding: 0 !important; 
      box-shadow: none !important;
  }
  .gradio-container { max-width: 100% !important; padding: 0 !important; }
}
"""


# ================= UI =================
WORDS = sorted(EXP.keys())

# --- Definitions Table Generation ---
TABLE_ROWS = ""
# Use the desired display order (PROCS)
for key in PROCS:
    # Use DISP for the bold display name in the first column
    name = DISP.get(key, key)
    # Use PROCESS_DEFINITIONS for the description in the second column
    definition = PROCESS_DEFINITIONS_TOOLTIP.get(key, "Definition not found.")
    
    # Generate one table row
    row = f"""
        <tr>
            <td><b>{name}</b></td>
            <td>{definition}</td>
        </tr>
    """
    TABLE_ROWS += row

DEFINITIONS_TABLE_HTML = f"""
    <table class="definitions-table">
        {TABLE_ROWS}
    </table>
"""

# --- Disclaimer ---

DISCLAIMER_HTML = f"""
<div class="disclaimer-box">
    <p>
        <strong>Disclaimer</strong><br> This tool is an AI-powered screener and is not a substitute for a formal diagnosis. 
        The model may make errors. Please consult a certified speech pathologist or care provider to discuss these results. 
        For your privacy, no voice recordings or personal data are stored after your session ends.
        This screener uses words adapted from The Quick Screener (Bowen, 1996). 
        Phoneme detection is powered by the <code>{MODEL_ID}</code> model.
    </p>
</div>
"""

with gr.Blocks(theme=THEME, css=CSS) as demo:
    # --- STATE ---
    st_words   = gr.State(WORDS)
    st_index   = gr.State(0)
    st_records = gr.State([])

    # --- INTRO (Redesigned) ---
    with gr.Group(visible=True, elem_id="intro-view") as intro_view:
        with gr.Column(elem_classes=["section"]):
            # --- COMBINED HEADER WITH WAVEFORM AND TITLE ---
            gr.HTML(f"""
                <div class="header-container">
                    <h1>Speechling Soundcheck</h1>
                    <div class="waveform-container">
                        <div class="wave-bar"></div>
                        <div class="wave-bar"></div>
                        <div class="wave-bar"></div>
                    </div>
                </div>
            """)

            gr.HTML("""
                <div class="steps-container">
                    <div class="step-card"><h4>Step 1</h4><p>Find a quiet space and get ready to speak.</p></div>
                    <div class="step-card"><h4>Step 2</h4><p>Press <b>Start</b>, you’ll see a word in large text.</p></div>
                    <div class="step-card"><h4>Step 3</h4><p>Press <b>Record</b>, say the word, then press <b>Stop</b>.</p></div>
                    <div class="step-card"><h4>Step 4</h4><p>Click <b>Next</b> to continue. End at any point to see results.</p></div>
                </div>
            """)

            with gr.Row(elem_classes=["start-btn-wrapper"]):
                start_btn = gr.Button("Start", variant="primary", elem_classes=["start-btn"])

            gr.HTML("""
                <hr class="soft-divider">
                <div class="intro-footer-text">
                    <p>
                    Detects <b>Fronting</b>, <b>Gliding</b>, <b>Depalatalisation</b>, and <b>Final Consonant Deletion<b>
                    </p>
                </div>
            """)

            gr.HTML(DISCLAIMER_HTML)

    
    # --- ASSESSMENT ---
    with gr.Group(visible=False, elem_id="assess-view") as assess_view:
        with gr.Column(elem_classes=["section"], elem_id="assess_shell"):
            prog_html = gr.HTML('<div class="progress-wrap"><div id="pbar" class="progress-bar" style="width:0%"></div></div>')
            prog_txt  = gr.Markdown("", elem_id="subtle")
            with gr.Row(elem_id="bigword_wrap"):
                big_word  = gr.HTML("", elem_id="bigword")

            with gr.Column(elem_classes=["center-col"]):
                with gr.Column(elem_id="audio_wrap"):
                    mic = gr.Audio(sources=["microphone","upload"], type="numpy", label="Audio")
                with gr.Column(elem_id="actions_wrap"):
                    next_btn  = gr.Button("Next", variant="primary", interactive=False, elem_classes=["next-btn"])
                    end_btn   = gr.Button("End now & view results", elem_classes=["endlink"])
                    warn      = gr.Markdown("", visible=False)
            
            gr.HTML(DISCLAIMER_HTML)

    # --- RESULTS (Updated structure) ---
    with gr.Group(visible=False) as results_view:
        with gr.Column(elem_classes=["section"]):
            gr.Markdown("# Results \n")
            with gr.Row():
                metric_words = gr.HTML('<div class="metric"><div class="label">Total Words Attempted</div>'
                                       '<div class="value">0</div><div class="sub">0 / 0 words</div></div>')
                metric_pcc   = gr.HTML('<div class="metric"><div class="label">Correct Consonants Detected</div>'
                                       '<div class="value">0%</div><div class="sub">0 / 0 consonants</div></div>')
            
            outcome_summary = gr.Markdown(visible=False, elem_classes=["metric", "outcome-box"])

            with gr.Group(visible=False) as error_details_group:
                bar = gr.Plot(elem_classes=["plot-wrap"])
                gr.Markdown("### Error Samples ", elem_id="error-examples-title")
            
            EXAMPLE_OUTPUTS = []
            ex_components = {}
            for p in PROCS:
                with gr.Group(elem_classes=["metric", "error-box"], visible=False) as ex_group:
                    ex_hdr = gr.Markdown(visible=False, elem_classes=["error-process-label"])
                    ex_cap = gr.Markdown(visible=False)
                    ex_audio = gr.Audio(interactive=False, visible=False)
                    ex_components[p] = (ex_group, ex_hdr, ex_cap, ex_audio)
                    EXAMPLE_OUTPUTS.extend([ex_group, ex_hdr, ex_cap, ex_audio])

            with gr.Accordion("See Definitions", open=False, elem_id="definitions-accordion"):
                gr.HTML(DEFINITIONS_TABLE_HTML)

            csv_download = gr.File(visible=False)
            with gr.Row(elem_classes=["results-actions"]):
                export_btn = gr.Button("Export Results", elem_classes=["endlink"])
                print_btn = gr.Button("Print", elem_classes=["endlink"])
                restart  = gr.Button("Restart", elem_classes=["endlink"])
    
            gr.HTML(DISCLAIMER_HTML)
    

    # ================= HELPERS =================
    def _progress_ui(words, i):
        N = len(words); i = max(0, min(i, max(N-1,0)))
        pct = int(round(100*(i / max(N,1))))
        ph = f'<div class="progress-wrap"><div id="pbar" class="progress-bar" style="width:{pct}%"></div></div>'
        pt = f'<div class="progress-text-wrap">Step {i+1} of {N}</div>' if N else f'<div class="progress-text-wrap">Step 0 of 0</div>'
        return ph, pt, f"<div>{words[i] if N else '—'}</div>"
    
    def _blank_metrics_payload():
        """Zeroed metrics + empty plot + hidden example blocks."""
        m_words = ('<div class="metric"><div class="label">Words Attempted</div>'
                '<div class="value">0</div><div class="sub">0 / 0 words</div></div>')
        m_pcc = ('<div class="metric"><div class="label">Consonants Correct</div>'
                '<div class="value">0%</div><div class="sub">0 / 0 consonants</div></div>')
        
        updates = []
        for p in PROCS:
            updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)])
        
        return m_words, m_pcc, None, gr.update(visible=False), gr.update(visible=False), *updates

    def _export_csv(records):
        if not records:
            return gr.update(visible=False)

        processed = []
        for r in records:
            out = run_one(r["word"], r["audio"])
            processed.append({
                "word": out["word"],
                "expected_phonemes": out["expected"],
                "produced_phonemes": out["produced"],
                "pcc": out["PCC"],
                "final_prediction": DISP.get(out.get("final_pred", "none"), out.get("final_pred", "none")),
                "fronting_prob": out["probs"].get("fronting"),
                "gliding_prob": out["probs"].get("gliding"),
                "depalatalisation_prob": out["probs"].get("depalatalisation"),
                "finalc_del_prob": out["probs"].get("finalc_del"),
            })

        df = pd.DataFrame(processed)
        filepath = ROOT / "speech_screener_results.csv"
        df.to_csv(filepath, index=False)
        
        return gr.update(value=str(filepath), visible=True)

    def _compute_results_buffered(records):
        processed, total_cc, total_nn = [], 0, 0
        for r in records:
            out = run_one(r["word"], r["audio"])
            processed.append({**r, **out})
            exp_seq = out["expected"].split(); prd_seq = out["produced"].split()
            pairs = align(exp_seq, prd_seq)
            cc = sum(1 for e,p in pairs if e and p and is_c(e) and e==p)
            nn = sum(1 for e in exp_seq if is_c(e))
            total_cc += cc; total_nn += nn
        attempted = len(processed)
        pcc_pct = round((100.0 * total_cc / total_nn), 1) if total_nn else 0.0

        counts = {k:0 for k in PROCS}
        example = {k: None for k in PROCS}
        for r in processed:
            p = r.get("process","none")
            if p in counts:
                counts[p] += 1
                if example[p] is None:
                    example[p] = r

        has_errors = any(c > 0 for c in counts.values())

        # 💡--- START OF MODIFICATIONS ---💡

        # 1. Calculate total errors
        total_errors = sum(counts.values())
        
        # 2. Define the link to keep the f-string clean
        SPA_LINK = "https://therapyworks.com/wp-content/uploads/2022/11/Phonological_Processes_chart.jpg"

        if has_errors:
            # Use a larger header, emoji, and the error count
            outcome_md = f"""
            ### Outcome
            A total of **{total_errors} potential speech sound error patterns** were detected.
            
            For guidance on typical development, see the [Phonological Process Chart from TherapyWorks]({SPA_LINK}).
            
            If you have any concerns, please consult a certified speech pathologist.
            """
        else:
            # Use a larger header and emoji for a clear "all clear"
            outcome_md = """
            ### Outcome
            No potential speech sound error patterns were detected from the screened words.
            
            If you still have concerns about speech development, it is always best to consult a certified speech pathologist.
            """
        
        # 💡--- END OF MODIFICATIONS ---💡

        # Force the order of the y-axis
        category_order = [DISP_PLOT[p] for p in PROCS]
        plot_data = pd.DataFrame({
            "Process": [DISP_PLOT[p] for p in PROCS],
            "Count": [counts[p] for p in PROCS]
        })

        # 💡 ADD THIS: Map the process short codes to their full definitions
        plot_data["Definition"] = [PROCESS_DEFINITIONS_TOOLTIP[p] for p in PROCS]
        plot_data['Process'] = pd.Categorical(plot_data['Process'], categories=category_order, ordered=True)
        plot_data = plot_data.sort_values('Process')


        fig = px.bar(
            plot_data,
            x="Count",
            y="Process",
            orientation='h',
            text="Count",
            custom_data=['Definition']
        )
        fig.update_layout(
            title={
                'text': "Count of Errors",
                'y':0.9,
                'x':0,
                'xanchor': 'left',
                'yanchor': 'top',
                'font': {'size': 16.5, 'color': '#1f2937', 'weight': 'bold'}
            },
            yaxis=dict(
                title=None,
                tickfont=dict(size=14),
                automargin=True,
                ticklabelstandoff=10,
                tickformat='<br>',
                tickangle=0,
            ),
            margin=dict(l=150, r=20, t=50, b=20),
            xaxis=dict(
                title=None,
                tickmode='linear',
                dtick=1
            ),
            hoverlabel=dict(
                bgcolor="white",
                font_size=12,
                # Setting a fixed width often forces the text to wrap within this constraint
                # Use a value that looks good on a small screen.
                namelength=-1 # Ensures the name label (y) is not truncated
            ),
            paper_bgcolor='rgba(0,0,0,0)',
            plot_bgcolor='rgba(0,0,0,0)',
            font_family="Montserrat",
            font_color="#3F4A49"
        )
        fig.update_traces(
            textposition='inside',
            insidetextanchor='end',
            textfont_color='white',
            texttemplate='<b>%{text} </b>',
            marker_color='#3F4A49',
            hovertemplate='<b>%{y}</b><br>' + # The Process name
                          'Count: %{x}<br><br>' + # The Count
                          '%{customdata[0]}' + # The Definition with <br> inside
                          '<extra></extra>'
        )

        m_words = (f'<div class="metric">'
                            f'<div class="label">'
                                f'<span class="rwd-word">Total</span> '
                                f'<span class="rwd-word">Words</span> '
                                f'<span class="rwd-word">Attempted</span>'
                            f'</div>'
                            f'<div class="value">{attempted}</div>'
                            f'<div class="sub">{attempted} / {len(WORDS)} words</div>'
                        f'</div>')
        
        m_pcc = (f'<div class="metric">'
                        f'<div class="label">'
                            f'<span class="rwd-word">Correct</span> '
                            f'<span class="rwd-word">Consonants</span> '
                            f'<span class="rwd-word">Detected</span>'
                        f'</div>'
                        f'<div class="value">{pcc_pct:.1f}%</div>'
                        f'<div class="sub">{total_cc} / {total_nn} consonants</div>'
                        f'</div>')

        updates = []
        for p in PROCS:
            ex = example[p]
            if ex is None:
                updates.extend([gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)])
            else:
                ts = ex.get("timestamp", 0.0)
                updates.extend([
                    gr.update(visible=True),
                    gr.update(visible=True, value=f"**{DISP[p]}**"),
                    gr.update(
                        visible=True,
                        value=(f"**Word:** {ex['word']}  \n"
                               f"**Expected phonemes:** `{ex['expected']}`  \n"
                               f"**Produced phonemes:** `{ex['produced']}`")
                    ),
                    gr.update(visible=True, value=ex["audio"], label=f"{ex['word']} (~{ts:.2f}s)")
                ])

        return m_words, m_pcc, fig, outcome_md, has_errors, *updates

    # ================= CALLBACKS =================
    def _init_session():
        seq = WORDS
        i = 0
        ph, pt, word_html = _progress_ui(seq, i)
        return (
            gr.update(visible=False), gr.update(visible=True), gr.update(visible=False),
            seq, i, [],
            ph, pt, word_html,
            gr.update(value=None), gr.update(interactive=False), gr.update(visible=False, value="")
        )

    def _save_and_next(words, i, records, audio):
        if audio is None:
            ph, pt, word_html = _progress_ui(words, i)
            m_words, m_pcc, fig, outcome, show_errors, *example_updates = _blank_metrics_payload()
            return (
                gr.update(visible=False), gr.update(visible=True), gr.update(visible=False),
                records, i,
                ph, pt, word_html,
                gr.update(), gr.update(interactive=False),
                gr.update(visible=True, value="⚠️ Please record the word before clicking **Next**."),
                m_words, m_pcc, fig, outcome, show_errors, *example_updates
            )

        new_records = records + [{"word": words[i], "audio": audio}]
        j = i + 1

        if j < len(words):
            ph, pt, word_html = _progress_ui(words, j)
            m_words, m_pcc, fig, outcome, show_errors, *example_updates = _blank_metrics_payload()
            return (
                gr.update(visible=False), gr.update(visible=True), gr.update(visible=False),
                new_records, j,
                ph, pt, word_html,
                gr.update(value=None), gr.update(interactive=False),
                gr.update(visible=False, value=""),
                m_words, m_pcc, fig, outcome, show_errors, *example_updates
            )

        m_words, m_pcc, fig, outcome, show_errors, *example_updates = _compute_results_buffered(new_records)
        return (
            gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
            new_records, j,
            "", "", "",
            gr.update(value=None), gr.update(interactive=False),
            gr.update(visible=False, value=""),
            m_words, m_pcc, fig, gr.update(value=outcome, visible=True), gr.update(visible=show_errors), *example_updates
        )  

    def _end_now(words, i, records, audio):
        if audio is not None and i < len(words):
            records = records + [{"word": words[i], "audio": audio}]
        m_words, m_pcc, fig, outcome, show_errors, *example_updates = _compute_results_buffered(records)
        return (
            gr.update(visible=False), gr.update(visible=False), gr.update(visible=True),
            st_words, i, records, "", "", "", gr.update(value=None), gr.update(),
            gr.update(visible=False, value=""),
            m_words, m_pcc, fig, gr.update(value=outcome, visible=True), gr.update(visible=show_errors), *example_updates)

    def _restart():
        ph = '<div class="progress-wrap"><div id="pbar" class="progress-bar" style="width:0%"></div></div>'
        m_words, m_pcc, bar, outcome, show_errors, *example_updates = _blank_metrics_payload()
        return (gr.update(visible=True), gr.update(visible=False), gr.update(visible=False),
                WORDS, 0, ph, f"Step 1 of {len(WORDS)}", f"<div>{WORDS[0] if WORDS else '—'}</div>",
                gr.update(value=None), gr.update(interactive=False), gr.update(visible=False, value=""),
                m_words, m_pcc, bar, outcome, show_errors, *example_updates)

    # --- wiring ---
    start_btn.click(
        _init_session, inputs=[],
        outputs=[intro_view, assess_view, results_view, st_words, st_index, st_records,
                prog_html, prog_txt, big_word, mic, next_btn, warn],
        show_progress="minimal"
    )

    mic.start_recording(lambda: gr.update(interactive=False), outputs=[next_btn], queue=False)
    mic.stop_recording(lambda: gr.update(interactive=True),  outputs=[next_btn], queue=False)

    next_btn.click(
        _save_and_next, inputs=[st_words, st_index, st_records, mic],
        outputs=[intro_view, assess_view, results_view, st_records, st_index,
                prog_html, prog_txt, big_word, mic, next_btn, warn,
                metric_words, metric_pcc, bar, outcome_summary, error_details_group,
                *EXAMPLE_OUTPUTS],
        show_progress="minimal"
    )

    end_btn.click(
        _end_now,
        inputs=[st_words, st_index, st_records, mic],
        outputs=[intro_view, assess_view, results_view, st_words, st_index, st_records,
                prog_html, prog_txt, big_word, mic, next_btn, warn,
                metric_words, metric_pcc, bar, outcome_summary, error_details_group,
                 *EXAMPLE_OUTPUTS],
        show_progress="minimal"
    )

    restart.click(
        _restart, inputs=[],
        outputs=[intro_view, assess_view, results_view, st_words, st_index,
                prog_html, prog_txt, big_word, mic, next_btn, warn,
                metric_words, metric_pcc, bar, outcome_summary, error_details_group,
                *EXAMPLE_OUTPUTS],
        show_progress="minimal"
    )

    export_btn.click(
        _export_csv,
        inputs=[st_records],
        outputs=[csv_download]
    )
    
    print_btn.click(None, None, None, js="() => { window.print(); }")


# Expose + LAUNCH (crucial for HF Spaces)
app = demo

if __name__ == "__main__":
    # Spaces honors PORT; queue is recommended for audio workloads
    demo.queue().launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))