File size: 2,832 Bytes
c1f1ea8
326c06d
cafd975
af773f6
326c06d
 
 
 
b497075
ea4ca8c
e4049b2
798ee89
 
 
 
 
 
 
 
 
72ae7f9
e4049b2
72ae7f9
 
e4049b2
 
72ae7f9
e4049b2
 
72ae7f9
 
98d3cfd
72ae7f9
b497075
 
ea4ca8c
 
 
 
b497075
e4049b2
c1f1ea8
b497075
d7061f7
b497075
1357470
326c06d
c1f1ea8
72ae7f9
839b6bc
 
 
e4049b2
cafd975
839b6bc
ea4ca8c
c1f1ea8
 
b497075
 
 
 
 
 
c1f1ea8
 
 
98d3cfd
e4049b2
c1f1ea8
cafd975
b497075
ab6ae03
c1f1ea8
3fcfcd1
 
ea4ca8c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
import gradio as gr
import pandas as pd
from optimizer import optimization

BASE_DIR = os.path.dirname(__file__)
SUMMARY_PATH = os.path.join(BASE_DIR, "region_sweep_summary.csv")

AA_ORDER = list("ACDEFGHIKLMNPQRSTVWY*")
AA_ALLOWED = set(AA_ORDER)

def _clean_aa_seq(raw: str) -> str:
    """
    Keep only allowed amino-acid symbols defined in AA_ORDER.
    Removes spaces, punctuation, digits, and any other letters.
    Example: ' mkk ll;ptaA ' -> 'MKKLLPTAA'
    """
    s = (raw or "").upper()
    return "".join(ch for ch in s if ch in AA_ALLOWED)

def aa_percent_to_onecol_df(aa_percent: dict, digits: int = 0) -> pd.DataFrame:
    rows = []
    order = AA_ORDER if set(aa_percent).issubset(set(AA_ORDER)) else sorted(aa_percent)
    for aa in order:
        mix = aa_percent.get(aa, {})
        if not mix:
            rows.append([aa, "—"])
            continue
        parts = sorted(mix.items(), key=lambda kv: (-kv[1], kv[0]))
        cell = " - ".join(f"{cod} ({val*100:.{digits}f}%)" for cod, val in parts)
        rows.append([aa, cell])
    return pd.DataFrame(rows, columns=["AA", "Codon percentage"])

# NOTE: run() params MUST match the number/order of Interface inputs
def run(aa_seq: str, use_wobble: bool):
    cleaned = _clean_aa_seq(aa_seq)
    if not cleaned:
        raise gr.Error("Input sequence contains no valid amino-acid characters after cleaning.")

    # pass the checkbox value straight through
    designed_nt, aa_percent, gc_percent, _ = optimization(
        summary_path=SUMMARY_PATH,
        aa_seq=cleaned,
        use_wobble=use_wobble,     # ← your current optimizer flag name
        wobble_scale=1.0,            # adjust if you expose a slider later
        use_percent_intervals=True,
    )

    aa_table = aa_percent_to_onecol_df(aa_percent, digits=0)
    if not isinstance(gc_percent, pd.DataFrame):
        gc_percent = pd.DataFrame(gc_percent)

    return designed_nt, aa_table, gc_percent

# ---- Gradio Interface ----
iface = gr.Interface(
    fn=run,
    inputs=[
        gr.Textbox(label="Amino Acid Sequence", lines=5, placeholder="e.g. MKKLLPTAA..."),
        gr.Checkbox(
            label="Use wobble score",
            value=True,  # default ON
            info="When ON, adds a 3rd-base preference (e.g., C favored; A/T slightly penalized) during seeding & filling. Turn OFF to rely only on k-mer motifs."
        ),
    ],
    outputs=[
        gr.Textbox(label="Optimized Nucleotide Sequence"),
        gr.Dataframe(label="Codon Usage Percent (per AA)", wrap=True),
        gr.Dataframe(label="GC Content (%)", wrap=True),
    ],
    title="Codon Optimizer",
    description="Toggle wobble to include/exclude position-3 base scoring in the design.",
    flagging_mode="never",
)

if __name__ == "__main__":
    # iface.queue().launch()
    iface.launch()