Spaces:

Arunmass
/

AI_Image_Detector

Sleeping

App Files Files Community

Arunmass commited on Mar 26

Commit

5db0941

verified ·

1 Parent(s): a2ab0d0

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -11

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ import warnings
 warnings.filterwarnings("ignore")
 import gc
 import numpy as np
 import torch
 import torch.nn as nn
@@ -19,6 +21,7 @@ import pywt
 from scipy import stats
 from scipy.ndimage import uniform_filter
 from scipy.fftpack import dct as scipy_dct
 ImageFile.LOAD_TRUNCATED_IMAGES = True
@@ -382,19 +385,63 @@ def predict(image: Image.Image) -> dict:
     }
-def format_output(image_path: str) -> str:
     if not image_path:
-        return _empty_state()
     try:
         image = Image.open(image_path).convert("RGB")
     except Exception as e:
-        return f'<div class="result-error">⚠️ Error loading image: {e}</div>'
     try:
         result = predict(image)
     except Exception as e:
-        return f'<div class="result-error">⚠️ Prediction error: {e}</div>'
     ai_score   = result["ai_prob"]
     real_score = result["real_prob"]
@@ -416,14 +463,14 @@ def format_output(image_path: str) -> str:
     ai_w   = int(ai_score   * 100)
     real_w = int(real_score * 100)
-    verdict_color = "#ff4d6d" if is_ai else "#2ec4b6"
-    verdict_bg    = "rgba(255,77,109,0.12)" if is_ai else "rgba(46,196,182,0.12)"
-    verdict_border= "#ff4d6d" if is_ai else "#2ec4b6"
     bar_ai_color   = "linear-gradient(90deg,#ff6b6b,#ff4d6d)"
     bar_real_color = "linear-gradient(90deg,#43e8d8,#2ec4b6)"
-    return f"""
 <div class="result-card">
   <div class="verdict-block" style="background:{verdict_bg};border:1.5px solid {verdict_border};">
     <div class="verdict-icon-wrap" style="color:{verdict_color};">{icon}</div>
@@ -457,6 +504,13 @@ def format_output(image_path: str) -> str:
 </div>
 """
 def _empty_state() -> str:
     return """
@@ -655,6 +709,23 @@ footer { background: var(--bg) !important; }
     transition: width 0.6s cubic-bezier(.4,0,.2,1);
 }
 /* ── Empty / error states ────────────────────────────────────── */
 .empty-state {
     display: flex;
@@ -725,12 +796,29 @@ with gr.Blocks(css=custom_css, title="AI Image Detector") as demo:
         with gr.Column(scale=1, elem_id="output-panel"):
             gr.HTML('<div class="panel-title">Analysis Result</div>')
-            output_html = gr.HTML(value=_empty_state(), elem_id="output-result")
     gr.HTML('<div id="footer">Created by <strong>Arun Kumar</strong> &mdash; VIT Student</div>')
-    submit_btn.click(fn=format_output, inputs=[image_input], outputs=[output_html])
-    image_input.change(fn=format_output, inputs=[image_input], outputs=[output_html])
 if __name__ == "__main__":
     demo.launch(

 warnings.filterwarnings("ignore")
 import gc
+import asyncio
+import tempfile
 import numpy as np
 import torch
 import torch.nn as nn
 from scipy import stats
 from scipy.ndimage import uniform_filter
 from scipy.fftpack import dct as scipy_dct
+import edge_tts
 ImageFile.LOAD_TRUNCATED_IMAGES = True
     }
+# ── TTS helpers ───────────────────────────────────────────────────────────────
+TTS_VOICE = "en-US-AriaNeural"   # clear, natural Microsoft Edge voice
+def _build_tts_text(verdict: str, conf_text: str, confidence: float,
+                    ai_pct: float, real_pct: float) -> str:
+    """Compose a concise spoken summary of the analysis result."""
+    return (
+        f"Analysis complete. "
+        f"This image appears to be {verdict}. "
+        f"Confidence level: {conf_text}, at {confidence * 100:.1f} percent. "
+        f"AI-generated probability: {ai_pct:.1f} percent. "
+        f"Human-created probability: {real_pct:.1f} percent."
+    )
+async def _synthesize(text: str, output_path: str) -> None:
+    """Run edge-tts synthesis and save to output_path."""
+    communicate = edge_tts.Communicate(text, TTS_VOICE)
+    await communicate.save(output_path)
+def generate_tts_audio(text: str) -> str | None:
+    """
+    Synthesize speech for *text* using Edge TTS.
+    Returns the path to a temporary MP3 file, or None on failure.
+    """
+    try:
+        tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
+        tmp.close()
+        asyncio.run(_synthesize(text, tmp.name))
+        return tmp.name
+    except Exception as e:
+        print(f"[TTS] synthesis failed: {e}")
+        return None
+# ── Output formatter ──────────────────────────────────────────────────────────
+def format_output(image_path: str):
+    """
+    Returns a (html, audio_path) tuple.
+    audio_path is a temp MP3 file path (or None if TTS failed / no image).
+    """
     if not image_path:
+        return _empty_state(), None
     try:
         image = Image.open(image_path).convert("RGB")
     except Exception as e:
+        return f'<div class="result-error">⚠️ Error loading image: {e}</div>', None
     try:
         result = predict(image)
     except Exception as e:
+        return f'<div class="result-error">⚠️ Prediction error: {e}</div>', None
     ai_score   = result["ai_prob"]
     real_score = result["real_prob"]
     ai_w   = int(ai_score   * 100)
     real_w = int(real_score * 100)
+    verdict_color  = "#ff4d6d" if is_ai else "#2ec4b6"
+    verdict_bg     = "rgba(255,77,109,0.12)" if is_ai else "rgba(46,196,182,0.12)"
+    verdict_border = "#ff4d6d" if is_ai else "#2ec4b6"
     bar_ai_color   = "linear-gradient(90deg,#ff6b6b,#ff4d6d)"
     bar_real_color = "linear-gradient(90deg,#43e8d8,#2ec4b6)"
+    html = f"""
 <div class="result-card">
   <div class="verdict-block" style="background:{verdict_bg};border:1.5px solid {verdict_border};">
     <div class="verdict-icon-wrap" style="color:{verdict_color};">{icon}</div>
 </div>
 """
+    # ── TTS ───────────────────────────────────────────────────────────────────
+    tts_text  = _build_tts_text(verdict, conf_text, confidence,
+                                ai_score * 100, real_score * 100)
+    audio_path = generate_tts_audio(tts_text)
+    return html, audio_path
 def _empty_state() -> str:
     return """
     transition: width 0.6s cubic-bezier(.4,0,.2,1);
 }
+/* ── TTS audio player ────────────────────────────────────────── */
+#tts-audio {
+    margin-top: 1.25rem;
+}
+#tts-audio audio {
+    width: 100% !important;
+    border-radius: 8px !important;
+    background: var(--surface2) !important;
+}
+#tts-audio .label-wrap,
+#tts-audio label span {
+    font-size: 0.7rem !important;
+    letter-spacing: 0.1em !important;
+    text-transform: uppercase !important;
+    color: var(--text-dim) !important;
+}
 /* ── Empty / error states ────────────────────────────────────── */
 .empty-state {
     display: flex;
         with gr.Column(scale=1, elem_id="output-panel"):
             gr.HTML('<div class="panel-title">Analysis Result</div>')
+            output_html  = gr.HTML(value=_empty_state(), elem_id="output-result")
+            # ── TTS audio output ──────────────────────────────────────────────
+            tts_audio = gr.Audio(
+                label="🔊 Voice Summary",
+                type="filepath",
+                autoplay=True,
+                show_download_button=False,
+                elem_id="tts-audio",
+                visible=True,
+            )
     gr.HTML('<div id="footer">Created by <strong>Arun Kumar</strong> &mdash; VIT Student</div>')
+    submit_btn.click(
+        fn=format_output,
+        inputs=[image_input],
+        outputs=[output_html, tts_audio],
+    )
+    image_input.change(
+        fn=format_output,
+        inputs=[image_input],
+        outputs=[output_html, tts_audio],
+    )
 if __name__ == "__main__":
     demo.launch(