codon-optimizer / app.py
farshidk's picture
use_wobble
d7061f7 verified
import os
import gradio as gr
import pandas as pd
from optimizer import optimization
BASE_DIR = os.path.dirname(__file__)
SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv")
AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*")
AA_ALLOWED = set(AA_ORDER)
def _clean_aa_seq(raw: str) -> str:
"""
Keep only allowed amino-acid symbols defined in AA_ORDER.
Removes spaces, punctuation, digits, and any other letters.
Example: ' mkk ll;ptaA ' -> 'MKKLLPTAA'
"""
s = (raw or "").upper()
return "".join(ch for ch in s if ch in AA_ALLOWED)
def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame:
rows = []
order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent)
for aa in order:
mix = aa_percent.get(aa, {})
if not mix:
rows.append([aa, "β€”"])
continue
parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0]))
cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts)
rows.append([aa, cell])
return pd.DataFrame(rows, columns=["AA", "Codon percentage"])
# NOTE: run() params MUST match the number/order of Interface inputs
def run(aa_seq: str, use_wobble: bool):
cleaned = _clean_aa_seq(aa_seq)
if not cleaned:
raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.")
# pass the checkbox value straight through
designed_nt, aa_percent, gc_percent, _ = optimization(
summary_path=SUMMARY_PATH,
aa_seq=cleaned,
use_wobble=use_wobble, # ← your current optimizer flag name
wobble_scale=1.0, # adjust if you expose a slider later
use_percent_intervals=True,
)
aa_table = aa_percent_to_onecol_df(aa_percent, digits=0)
if not isinstance(gc_percent, pd.DataFrame):
gc_percent = pd.DataFrame(gc_percent)
return designed_nt, aa_table, gc_percent
# ---- Gradio Interface ----
iface = gr.Interface(
fn=run,
inputs=[
gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA..."),
gr.Checkbox(
label="Use wobble score",
value=True, # default ON
info="When ON, adds a 3rd-base preference (e.g., C favored; A/T slightly penalized) during seeding & filling. Turn OFF to rely only on k-mer motifs."
),
],
outputs=[
gr.Textbox(label="Optimized Nucleotide Sequence"),
gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True),
gr.Dataframe(label="GC Content (%)", wrap=True),
],
title="Codon Optimizer",
description="Toggle wobble to include/exclude position-3 base scoring in the design.",
flagging_mode="never",
)
if __name__ == "__main__":
# iface.queue().launch()
iface.launch()