Spaces:
Sleeping
Sleeping
File size: 2,832 Bytes
c1f1ea8 326c06d cafd975 af773f6 326c06d b497075 ea4ca8c e4049b2 798ee89 72ae7f9 e4049b2 72ae7f9 e4049b2 72ae7f9 e4049b2 72ae7f9 98d3cfd 72ae7f9 b497075 ea4ca8c b497075 e4049b2 c1f1ea8 b497075 d7061f7 b497075 1357470 326c06d c1f1ea8 72ae7f9 839b6bc e4049b2 cafd975 839b6bc ea4ca8c c1f1ea8 b497075 c1f1ea8 98d3cfd e4049b2 c1f1ea8 cafd975 b497075 ab6ae03 c1f1ea8 3fcfcd1 ea4ca8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import os
import gradio as gr
import pandas as pd
from optimizer import optimization
BASE_DIR = os.path.dirname(__file__)
SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv")
AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*")
AA_ALLOWED = set(AA_ORDER)
def _clean_aa_seq(raw: str) -> str:
"""
Keep only allowed amino-acid symbols defined in AA_ORDER.
Removes spaces, punctuation, digits, and any other letters.
Example: ' mkk ll;ptaA ' -> 'MKKLLPTAA'
"""
s = (raw or "").upper()
return "".join(ch for ch in s if ch in AA_ALLOWED)
def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame:
rows = []
order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent)
for aa in order:
mix = aa_percent.get(aa, {})
if not mix:
rows.append([aa, "—"])
continue
parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0]))
cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts)
rows.append([aa, cell])
return pd.DataFrame(rows, columns=["AA", "Codon percentage"])
# NOTE: run() params MUST match the number/order of Interface inputs
def run(aa_seq: str, use_wobble: bool):
cleaned = _clean_aa_seq(aa_seq)
if not cleaned:
raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.")
# pass the checkbox value straight through
designed_nt, aa_percent, gc_percent, _ = optimization(
summary_path=SUMMARY_PATH,
aa_seq=cleaned,
use_wobble=use_wobble, # ← your current optimizer flag name
wobble_scale=1.0, # adjust if you expose a slider later
use_percent_intervals=True,
)
aa_table = aa_percent_to_onecol_df(aa_percent, digits=0)
if not isinstance(gc_percent, pd.DataFrame):
gc_percent = pd.DataFrame(gc_percent)
return designed_nt, aa_table, gc_percent
# ---- Gradio Interface ----
iface = gr.Interface(
fn=run,
inputs=[
gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA..."),
gr.Checkbox(
label="Use wobble score",
value=True, # default ON
info="When ON, adds a 3rd-base preference (e.g., C favored; A/T slightly penalized) during seeding & filling. Turn OFF to rely only on k-mer motifs."
),
],
outputs=[
gr.Textbox(label="Optimized Nucleotide Sequence"),
gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True),
gr.Dataframe(label="GC Content (%)", wrap=True),
],
title="Codon Optimizer",
description="Toggle wobble to include/exclude position-3 base scoring in the design.",
flagging_mode="never",
)
if __name__ == "__main__":
# iface.queue().launch()
iface.launch()
|