Arunmass commited on
Commit
5db0941
Β·
verified Β·
1 Parent(s): a2ab0d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -11
app.py CHANGED
@@ -7,6 +7,8 @@ import warnings
7
  warnings.filterwarnings("ignore")
8
 
9
  import gc
 
 
10
  import numpy as np
11
  import torch
12
  import torch.nn as nn
@@ -19,6 +21,7 @@ import pywt
19
  from scipy import stats
20
  from scipy.ndimage import uniform_filter
21
  from scipy.fftpack import dct as scipy_dct
 
22
 
23
  ImageFile.LOAD_TRUNCATED_IMAGES = True
24
 
@@ -382,19 +385,63 @@ def predict(image: Image.Image) -> dict:
382
  }
383
 
384
 
385
- def format_output(image_path: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
386
  if not image_path:
387
- return _empty_state()
388
 
389
  try:
390
  image = Image.open(image_path).convert("RGB")
391
  except Exception as e:
392
- return f'<div class="result-error">⚠️ Error loading image: {e}</div>'
393
 
394
  try:
395
  result = predict(image)
396
  except Exception as e:
397
- return f'<div class="result-error">⚠️ Prediction error: {e}</div>'
398
 
399
  ai_score = result["ai_prob"]
400
  real_score = result["real_prob"]
@@ -416,14 +463,14 @@ def format_output(image_path: str) -> str:
416
  ai_w = int(ai_score * 100)
417
  real_w = int(real_score * 100)
418
 
419
- verdict_color = "#ff4d6d" if is_ai else "#2ec4b6"
420
- verdict_bg = "rgba(255,77,109,0.12)" if is_ai else "rgba(46,196,182,0.12)"
421
- verdict_border= "#ff4d6d" if is_ai else "#2ec4b6"
422
 
423
  bar_ai_color = "linear-gradient(90deg,#ff6b6b,#ff4d6d)"
424
  bar_real_color = "linear-gradient(90deg,#43e8d8,#2ec4b6)"
425
 
426
- return f"""
427
  <div class="result-card">
428
  <div class="verdict-block" style="background:{verdict_bg};border:1.5px solid {verdict_border};">
429
  <div class="verdict-icon-wrap" style="color:{verdict_color};">{icon}</div>
@@ -457,6 +504,13 @@ def format_output(image_path: str) -> str:
457
  </div>
458
  """
459
 
 
 
 
 
 
 
 
460
 
461
  def _empty_state() -> str:
462
  return """
@@ -655,6 +709,23 @@ footer { background: var(--bg) !important; }
655
  transition: width 0.6s cubic-bezier(.4,0,.2,1);
656
  }
657
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
658
  /* ── Empty / error states ────────────────────────────────────── */
659
  .empty-state {
660
  display: flex;
@@ -725,12 +796,29 @@ with gr.Blocks(css=custom_css, title="AI Image Detector") as demo:
725
 
726
  with gr.Column(scale=1, elem_id="output-panel"):
727
  gr.HTML('<div class="panel-title">Analysis Result</div>')
728
- output_html = gr.HTML(value=_empty_state(), elem_id="output-result")
 
 
 
 
 
 
 
 
 
729
 
730
  gr.HTML('<div id="footer">Created by <strong>Arun Kumar</strong> &mdash; VIT Student</div>')
731
 
732
- submit_btn.click(fn=format_output, inputs=[image_input], outputs=[output_html])
733
- image_input.change(fn=format_output, inputs=[image_input], outputs=[output_html])
 
 
 
 
 
 
 
 
734
 
735
  if __name__ == "__main__":
736
  demo.launch(
 
7
  warnings.filterwarnings("ignore")
8
 
9
  import gc
10
+ import asyncio
11
+ import tempfile
12
  import numpy as np
13
  import torch
14
  import torch.nn as nn
 
21
  from scipy import stats
22
  from scipy.ndimage import uniform_filter
23
  from scipy.fftpack import dct as scipy_dct
24
+ import edge_tts
25
 
26
  ImageFile.LOAD_TRUNCATED_IMAGES = True
27
 
 
385
  }
386
 
387
 
388
+ # ── TTS helpers ───────────────────────────────────────────────────────────────
389
+
390
+ TTS_VOICE = "en-US-AriaNeural" # clear, natural Microsoft Edge voice
391
+
392
+
393
+ def _build_tts_text(verdict: str, conf_text: str, confidence: float,
394
+ ai_pct: float, real_pct: float) -> str:
395
+ """Compose a concise spoken summary of the analysis result."""
396
+ return (
397
+ f"Analysis complete. "
398
+ f"This image appears to be {verdict}. "
399
+ f"Confidence level: {conf_text}, at {confidence * 100:.1f} percent. "
400
+ f"AI-generated probability: {ai_pct:.1f} percent. "
401
+ f"Human-created probability: {real_pct:.1f} percent."
402
+ )
403
+
404
+
405
+ async def _synthesize(text: str, output_path: str) -> None:
406
+ """Run edge-tts synthesis and save to output_path."""
407
+ communicate = edge_tts.Communicate(text, TTS_VOICE)
408
+ await communicate.save(output_path)
409
+
410
+
411
+ def generate_tts_audio(text: str) -> str | None:
412
+ """
413
+ Synthesize speech for *text* using Edge TTS.
414
+ Returns the path to a temporary MP3 file, or None on failure.
415
+ """
416
+ try:
417
+ tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
418
+ tmp.close()
419
+ asyncio.run(_synthesize(text, tmp.name))
420
+ return tmp.name
421
+ except Exception as e:
422
+ print(f"[TTS] synthesis failed: {e}")
423
+ return None
424
+
425
+
426
+ # ── Output formatter ──────────────────────────────────────────────────────────
427
+
428
+ def format_output(image_path: str):
429
+ """
430
+ Returns a (html, audio_path) tuple.
431
+ audio_path is a temp MP3 file path (or None if TTS failed / no image).
432
+ """
433
  if not image_path:
434
+ return _empty_state(), None
435
 
436
  try:
437
  image = Image.open(image_path).convert("RGB")
438
  except Exception as e:
439
+ return f'<div class="result-error">⚠️ Error loading image: {e}</div>', None
440
 
441
  try:
442
  result = predict(image)
443
  except Exception as e:
444
+ return f'<div class="result-error">⚠️ Prediction error: {e}</div>', None
445
 
446
  ai_score = result["ai_prob"]
447
  real_score = result["real_prob"]
 
463
  ai_w = int(ai_score * 100)
464
  real_w = int(real_score * 100)
465
 
466
+ verdict_color = "#ff4d6d" if is_ai else "#2ec4b6"
467
+ verdict_bg = "rgba(255,77,109,0.12)" if is_ai else "rgba(46,196,182,0.12)"
468
+ verdict_border = "#ff4d6d" if is_ai else "#2ec4b6"
469
 
470
  bar_ai_color = "linear-gradient(90deg,#ff6b6b,#ff4d6d)"
471
  bar_real_color = "linear-gradient(90deg,#43e8d8,#2ec4b6)"
472
 
473
+ html = f"""
474
  <div class="result-card">
475
  <div class="verdict-block" style="background:{verdict_bg};border:1.5px solid {verdict_border};">
476
  <div class="verdict-icon-wrap" style="color:{verdict_color};">{icon}</div>
 
504
  </div>
505
  """
506
 
507
+ # ── TTS ───────────────────────────────────────────────────────────────────
508
+ tts_text = _build_tts_text(verdict, conf_text, confidence,
509
+ ai_score * 100, real_score * 100)
510
+ audio_path = generate_tts_audio(tts_text)
511
+
512
+ return html, audio_path
513
+
514
 
515
  def _empty_state() -> str:
516
  return """
 
709
  transition: width 0.6s cubic-bezier(.4,0,.2,1);
710
  }
711
 
712
+ /* ── TTS audio player ────────────────────────────────────────── */
713
+ #tts-audio {
714
+ margin-top: 1.25rem;
715
+ }
716
+ #tts-audio audio {
717
+ width: 100% !important;
718
+ border-radius: 8px !important;
719
+ background: var(--surface2) !important;
720
+ }
721
+ #tts-audio .label-wrap,
722
+ #tts-audio label span {
723
+ font-size: 0.7rem !important;
724
+ letter-spacing: 0.1em !important;
725
+ text-transform: uppercase !important;
726
+ color: var(--text-dim) !important;
727
+ }
728
+
729
  /* ── Empty / error states ────────────────────────────────────── */
730
  .empty-state {
731
  display: flex;
 
796
 
797
  with gr.Column(scale=1, elem_id="output-panel"):
798
  gr.HTML('<div class="panel-title">Analysis Result</div>')
799
+ output_html = gr.HTML(value=_empty_state(), elem_id="output-result")
800
+ # ── TTS audio output ──────────────────────────────────────────────
801
+ tts_audio = gr.Audio(
802
+ label="πŸ”Š Voice Summary",
803
+ type="filepath",
804
+ autoplay=True,
805
+ show_download_button=False,
806
+ elem_id="tts-audio",
807
+ visible=True,
808
+ )
809
 
810
  gr.HTML('<div id="footer">Created by <strong>Arun Kumar</strong> &mdash; VIT Student</div>')
811
 
812
+ submit_btn.click(
813
+ fn=format_output,
814
+ inputs=[image_input],
815
+ outputs=[output_html, tts_audio],
816
+ )
817
+ image_input.change(
818
+ fn=format_output,
819
+ inputs=[image_input],
820
+ outputs=[output_html, tts_audio],
821
+ )
822
 
823
  if __name__ == "__main__":
824
  demo.launch(