Spaces:

jebin511
/

dnaapi

Sleeping

App Files Files Community

jebin511 commited on Oct 11, 2025

Commit

af1e5ec

verified ·

1 Parent(s): 69bf1ca

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -47

app.py CHANGED Viewed

@@ -1,81 +1,55 @@
 import gradio as gr
 import joblib
 import numpy as np
 from collections import Counter
 from typing import List
-import os
-# --- Helper Functions ---
-BASES = ['A', 'T', 'C', 'G']
 def kmer_counts(seq: str, k=3):
     seq = seq.strip().upper()
     counts = Counter()
     if len(seq) < k:
         return counts
-    for i in range(len(seq) - k + 1):
         counts[seq[i:i+k]] += 1
     return counts
 def vectorize_single(seq: str, vocab: List[str], k=3):
-    X = np.zeros((1, len(vocab)), dtype=float)
     c = kmer_counts(seq, k)
-    for j, kmer in enumerate(vocab):
-        X[0, j] = c.get(kmer, 0)
-    return X
-# --- Load Model ---
-MODEL_PATH = "mutation_model.joblib"
-if not os.path.exists(MODEL_PATH):
-    raise FileNotFoundError(
-        f"⚠️ Model file '{MODEL_PATH}' not found. "
-        "Please upload 'mutation_model.joblib' along with this app."
-    )
-model, vocab = joblib.load(MODEL_PATH)
-# --- Prediction Logic ---
 def predict_sequence(sequence: str):
     if not sequence or len(sequence.strip()) < 3:
-        return {"error": "Please enter a valid DNA sequence (≥3 bases)."}
     X = vectorize_single(sequence, vocab=vocab, k=3)
     pred = model.predict(X)[0]
     prob = float(model.predict_proba(X).max()) if hasattr(model, "predict_proba") else None
     return {
         "sequence": sequence,
         "mutation_detected": bool(pred),
-        "confidence": round(prob, 3) if prob else "N/A"
     }
-# --- Gradio Interface ---
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        <h1 style="text-align:center;">🧬 DNA Mutation Analyzer</h1>
-        <p style="text-align:center;">
-        Upload or paste a DNA sequence to check for possible mutations using a Random Forest ML model.
-        </p>
-        """
-    )
-    with gr.Row():
-        seq_input = gr.Textbox(
-            label="DNA Sequence",
-            placeholder="Enter sequence like ATGCGTACGTTAGC...",
-            lines=2,
-        )
-    analyze_btn = gr.Button("🔍 Analyze Sequence")
-    result = gr.JSON(label="Analysis Result")
-    analyze_btn.click(fn=predict_sequence, inputs=seq_input, outputs=result)
-# --- API Endpoint for Programmatic Access ---
 def api_predict(payload: dict):
     seq = payload.get("sequence", "")
     return predict_sequence(seq)
 if __name__ == "__main__":
-    demo.launch()

+# app.py
 import gradio as gr
 import joblib
 import numpy as np
 from collections import Counter
 from typing import List
+# helper: k-mer extraction / vectorize (k=3)
 def kmer_counts(seq: str, k=3):
     seq = seq.strip().upper()
     counts = Counter()
     if len(seq) < k:
         return counts
+    for i in range(len(seq)-k+1):
         counts[seq[i:i+k]] += 1
     return counts
 def vectorize_single(seq: str, vocab: List[str], k=3):
+    x = np.zeros((1, len(vocab)), dtype=float)
     c = kmer_counts(seq, k)
+    for j,kmer in enumerate(vocab):
+        x[0,j] = c.get(kmer, 0)
+    return x
+# load model+vocab (mutation_model.joblib must be uploaded too)
+model, vocab = joblib.load("mutation_model.joblib")
 def predict_sequence(sequence: str):
     if not sequence or len(sequence.strip()) < 3:
+        return {"error":"sequence too short"}
     X = vectorize_single(sequence, vocab=vocab, k=3)
     pred = model.predict(X)[0]
     prob = float(model.predict_proba(X).max()) if hasattr(model, "predict_proba") else None
     return {
         "sequence": sequence,
         "mutation_detected": bool(pred),
+        "confidence": prob
     }
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# DNA Mutation Detector (Quick Space)")
+    seq_in = gr.Textbox(label="DNA sequence", placeholder="ATGCGTACGTTAGC...")
+    btn = gr.Button("Analyze")
+    out = gr.JSON()
+    btn.click(fn=predict_sequence, inputs=seq_in, outputs=out)
+# Expose a simple inference API endpoint (Gradio provides /api/predict automatically)
+# but we also expose a programmatic function name for convenience:
 def api_predict(payload: dict):
     seq = payload.get("sequence", "")
     return predict_sequence(seq)
 if __name__ == "__main__":
+    demo.launch() change