Spaces:
Running
Running
import os | |
import gradio as gr | |
import pandas as pd | |
from optimizer import optimization | |
BASE_DIR = os.path.dirname(__file__) | |
SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv") | |
AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*") | |
AA_ALLOWED = set(AA_ORDER) | |
def _clean_aa_seq(raw: str) -> str: | |
""" | |
Keep only allowed amino-acid symbols defined in AA_ORDER. | |
Removes spaces, punctuation, digits, and any other letters. | |
Example: ' mkk ll;ptaA ' -> 'MKKLLPTAA' | |
""" | |
s = (raw or "").upper() | |
return "".join(ch for ch in s if ch in AA_ALLOWED) | |
def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame: | |
rows = [] | |
order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent) | |
for aa in order: | |
mix = aa_percent.get(aa, {}) | |
if not mix: | |
rows.append([aa, "β"]) | |
continue | |
parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0])) | |
cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts) | |
rows.append([aa, cell]) | |
return pd.DataFrame(rows, columns=["AA", "Codon percentage"]) | |
# NOTE: run() params MUST match the number/order of Interface inputs | |
def run(aa_seq: str, use_wobble: bool): | |
cleaned = _clean_aa_seq(aa_seq) | |
if not cleaned: | |
raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.") | |
# pass the checkbox value straight through | |
designed_nt, aa_percent, gc_percent, _ = optimization( | |
summary_path=SUMMARY_PATH, | |
aa_seq=cleaned, | |
use_wobble=use_wobble, # β your current optimizer flag name | |
wobble_scale=1.0, # adjust if you expose a slider later | |
use_percent_intervals=True, | |
) | |
aa_table = aa_percent_to_onecol_df(aa_percent, digits=0) | |
if not isinstance(gc_percent, pd.DataFrame): | |
gc_percent = pd.DataFrame(gc_percent) | |
return designed_nt, aa_table, gc_percent | |
# ---- Gradio Interface ---- | |
iface = gr.Interface( | |
fn=run, | |
inputs=[ | |
gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA..."), | |
gr.Checkbox( | |
label="Use wobble score", | |
value=True, # default ON | |
info="When ON, adds a 3rd-base preference (e.g., C favored; A/T slightly penalized) during seeding & filling. Turn OFF to rely only on k-mer motifs." | |
), | |
], | |
outputs=[ | |
gr.Textbox(label="Optimized Nucleotide Sequence"), | |
gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True), | |
gr.Dataframe(label="GC Content (%)", wrap=True), | |
], | |
title="Codon Optimizer", | |
description="Toggle wobble to include/exclude position-3 base scoring in the design.", | |
flagging_mode="never", | |
) | |
if __name__ == "__main__": | |
# iface.queue().launch() | |
iface.launch() | |