import os import gradio as gr import pandas as pd from optimizer import optimization BASE_DIR = os.path.dirname(__file__) SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv") AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*") AA_ALLOWED = set(AA_ORDER) def _clean_aa_seq(raw: str) -> str: """ Keep only allowed amino-acid symbols defined in AA_ORDER. Removes spaces, punctuation, digits, and any other letters. Example: ' mkk ll;ptaA ' -> 'MKKLLPTAA' """ s = (raw or "").upper() return "".join(ch for ch in s if ch in AA_ALLOWED) def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame: rows = [] order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent) for aa in order: mix = aa_percent.get(aa, {}) if not mix: rows.append([aa, "—"]) continue parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0])) cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts) rows.append([aa, cell]) return pd.DataFrame(rows, columns=["AA", "Codon percentage"]) # NOTE: run() params MUST match the number/order of Interface inputs def run(aa_seq: str, use_wobble: bool): cleaned = _clean_aa_seq(aa_seq) if not cleaned: raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.") # pass the checkbox value straight through designed_nt, aa_percent, gc_percent, _ = optimization( summary_path=SUMMARY_PATH, aa_seq=cleaned, use_wobble=use_wobble, # ← your current optimizer flag name wobble_scale=1.0, # adjust if you expose a slider later use_percent_intervals=True, ) aa_table = aa_percent_to_onecol_df(aa_percent, digits=0) if not isinstance(gc_percent, pd.DataFrame): gc_percent = pd.DataFrame(gc_percent) return designed_nt, aa_table, gc_percent # ---- Gradio Interface ---- iface = gr.Interface( fn=run, inputs=[ gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA..."), gr.Checkbox( label="Use wobble score", value=True, # default ON info="When ON, adds a 3rd-base preference (e.g., C favored; A/T slightly penalized) during seeding & filling. Turn OFF to rely only on k-mer motifs." ), ], outputs=[ gr.Textbox(label="Optimized Nucleotide Sequence"), gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True), gr.Dataframe(label="GC Content (%)", wrap=True), ], title="Codon Optimizer", description="Toggle wobble to include/exclude position-3 base scoring in the design.", flagging_mode="never", ) if __name__ == "__main__": # iface.queue().launch() iface.launch()