Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,8 @@ import warnings
|
|
| 7 |
warnings.filterwarnings("ignore")
|
| 8 |
|
| 9 |
import gc
|
|
|
|
|
|
|
| 10 |
import numpy as np
|
| 11 |
import torch
|
| 12 |
import torch.nn as nn
|
|
@@ -19,6 +21,7 @@ import pywt
|
|
| 19 |
from scipy import stats
|
| 20 |
from scipy.ndimage import uniform_filter
|
| 21 |
from scipy.fftpack import dct as scipy_dct
|
|
|
|
| 22 |
|
| 23 |
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 24 |
|
|
@@ -382,19 +385,63 @@ def predict(image: Image.Image) -> dict:
|
|
| 382 |
}
|
| 383 |
|
| 384 |
|
| 385 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 386 |
if not image_path:
|
| 387 |
-
return _empty_state()
|
| 388 |
|
| 389 |
try:
|
| 390 |
image = Image.open(image_path).convert("RGB")
|
| 391 |
except Exception as e:
|
| 392 |
-
return f'<div class="result-error">β οΈ Error loading image: {e}</div>'
|
| 393 |
|
| 394 |
try:
|
| 395 |
result = predict(image)
|
| 396 |
except Exception as e:
|
| 397 |
-
return f'<div class="result-error">β οΈ Prediction error: {e}</div>'
|
| 398 |
|
| 399 |
ai_score = result["ai_prob"]
|
| 400 |
real_score = result["real_prob"]
|
|
@@ -416,14 +463,14 @@ def format_output(image_path: str) -> str:
|
|
| 416 |
ai_w = int(ai_score * 100)
|
| 417 |
real_w = int(real_score * 100)
|
| 418 |
|
| 419 |
-
verdict_color
|
| 420 |
-
verdict_bg
|
| 421 |
-
verdict_border= "#ff4d6d" if is_ai else "#2ec4b6"
|
| 422 |
|
| 423 |
bar_ai_color = "linear-gradient(90deg,#ff6b6b,#ff4d6d)"
|
| 424 |
bar_real_color = "linear-gradient(90deg,#43e8d8,#2ec4b6)"
|
| 425 |
|
| 426 |
-
|
| 427 |
<div class="result-card">
|
| 428 |
<div class="verdict-block" style="background:{verdict_bg};border:1.5px solid {verdict_border};">
|
| 429 |
<div class="verdict-icon-wrap" style="color:{verdict_color};">{icon}</div>
|
|
@@ -457,6 +504,13 @@ def format_output(image_path: str) -> str:
|
|
| 457 |
</div>
|
| 458 |
"""
|
| 459 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 460 |
|
| 461 |
def _empty_state() -> str:
|
| 462 |
return """
|
|
@@ -655,6 +709,23 @@ footer { background: var(--bg) !important; }
|
|
| 655 |
transition: width 0.6s cubic-bezier(.4,0,.2,1);
|
| 656 |
}
|
| 657 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
/* ββ Empty / error states ββββββββββββββββββββββββββββββββββββββ */
|
| 659 |
.empty-state {
|
| 660 |
display: flex;
|
|
@@ -725,12 +796,29 @@ with gr.Blocks(css=custom_css, title="AI Image Detector") as demo:
|
|
| 725 |
|
| 726 |
with gr.Column(scale=1, elem_id="output-panel"):
|
| 727 |
gr.HTML('<div class="panel-title">Analysis Result</div>')
|
| 728 |
-
output_html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 729 |
|
| 730 |
gr.HTML('<div id="footer">Created by <strong>Arun Kumar</strong> — VIT Student</div>')
|
| 731 |
|
| 732 |
-
submit_btn.click(
|
| 733 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 734 |
|
| 735 |
if __name__ == "__main__":
|
| 736 |
demo.launch(
|
|
|
|
| 7 |
warnings.filterwarnings("ignore")
|
| 8 |
|
| 9 |
import gc
|
| 10 |
+
import asyncio
|
| 11 |
+
import tempfile
|
| 12 |
import numpy as np
|
| 13 |
import torch
|
| 14 |
import torch.nn as nn
|
|
|
|
| 21 |
from scipy import stats
|
| 22 |
from scipy.ndimage import uniform_filter
|
| 23 |
from scipy.fftpack import dct as scipy_dct
|
| 24 |
+
import edge_tts
|
| 25 |
|
| 26 |
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
| 27 |
|
|
|
|
| 385 |
}
|
| 386 |
|
| 387 |
|
| 388 |
+
# ββ TTS helpers βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 389 |
+
|
| 390 |
+
TTS_VOICE = "en-US-AriaNeural" # clear, natural Microsoft Edge voice
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
def _build_tts_text(verdict: str, conf_text: str, confidence: float,
|
| 394 |
+
ai_pct: float, real_pct: float) -> str:
|
| 395 |
+
"""Compose a concise spoken summary of the analysis result."""
|
| 396 |
+
return (
|
| 397 |
+
f"Analysis complete. "
|
| 398 |
+
f"This image appears to be {verdict}. "
|
| 399 |
+
f"Confidence level: {conf_text}, at {confidence * 100:.1f} percent. "
|
| 400 |
+
f"AI-generated probability: {ai_pct:.1f} percent. "
|
| 401 |
+
f"Human-created probability: {real_pct:.1f} percent."
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
|
| 405 |
+
async def _synthesize(text: str, output_path: str) -> None:
|
| 406 |
+
"""Run edge-tts synthesis and save to output_path."""
|
| 407 |
+
communicate = edge_tts.Communicate(text, TTS_VOICE)
|
| 408 |
+
await communicate.save(output_path)
|
| 409 |
+
|
| 410 |
+
|
| 411 |
+
def generate_tts_audio(text: str) -> str | None:
|
| 412 |
+
"""
|
| 413 |
+
Synthesize speech for *text* using Edge TTS.
|
| 414 |
+
Returns the path to a temporary MP3 file, or None on failure.
|
| 415 |
+
"""
|
| 416 |
+
try:
|
| 417 |
+
tmp = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
|
| 418 |
+
tmp.close()
|
| 419 |
+
asyncio.run(_synthesize(text, tmp.name))
|
| 420 |
+
return tmp.name
|
| 421 |
+
except Exception as e:
|
| 422 |
+
print(f"[TTS] synthesis failed: {e}")
|
| 423 |
+
return None
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
# ββ Output formatter ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 427 |
+
|
| 428 |
+
def format_output(image_path: str):
|
| 429 |
+
"""
|
| 430 |
+
Returns a (html, audio_path) tuple.
|
| 431 |
+
audio_path is a temp MP3 file path (or None if TTS failed / no image).
|
| 432 |
+
"""
|
| 433 |
if not image_path:
|
| 434 |
+
return _empty_state(), None
|
| 435 |
|
| 436 |
try:
|
| 437 |
image = Image.open(image_path).convert("RGB")
|
| 438 |
except Exception as e:
|
| 439 |
+
return f'<div class="result-error">β οΈ Error loading image: {e}</div>', None
|
| 440 |
|
| 441 |
try:
|
| 442 |
result = predict(image)
|
| 443 |
except Exception as e:
|
| 444 |
+
return f'<div class="result-error">β οΈ Prediction error: {e}</div>', None
|
| 445 |
|
| 446 |
ai_score = result["ai_prob"]
|
| 447 |
real_score = result["real_prob"]
|
|
|
|
| 463 |
ai_w = int(ai_score * 100)
|
| 464 |
real_w = int(real_score * 100)
|
| 465 |
|
| 466 |
+
verdict_color = "#ff4d6d" if is_ai else "#2ec4b6"
|
| 467 |
+
verdict_bg = "rgba(255,77,109,0.12)" if is_ai else "rgba(46,196,182,0.12)"
|
| 468 |
+
verdict_border = "#ff4d6d" if is_ai else "#2ec4b6"
|
| 469 |
|
| 470 |
bar_ai_color = "linear-gradient(90deg,#ff6b6b,#ff4d6d)"
|
| 471 |
bar_real_color = "linear-gradient(90deg,#43e8d8,#2ec4b6)"
|
| 472 |
|
| 473 |
+
html = f"""
|
| 474 |
<div class="result-card">
|
| 475 |
<div class="verdict-block" style="background:{verdict_bg};border:1.5px solid {verdict_border};">
|
| 476 |
<div class="verdict-icon-wrap" style="color:{verdict_color};">{icon}</div>
|
|
|
|
| 504 |
</div>
|
| 505 |
"""
|
| 506 |
|
| 507 |
+
# ββ TTS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 508 |
+
tts_text = _build_tts_text(verdict, conf_text, confidence,
|
| 509 |
+
ai_score * 100, real_score * 100)
|
| 510 |
+
audio_path = generate_tts_audio(tts_text)
|
| 511 |
+
|
| 512 |
+
return html, audio_path
|
| 513 |
+
|
| 514 |
|
| 515 |
def _empty_state() -> str:
|
| 516 |
return """
|
|
|
|
| 709 |
transition: width 0.6s cubic-bezier(.4,0,.2,1);
|
| 710 |
}
|
| 711 |
|
| 712 |
+
/* ββ TTS audio player ββββββββββββββββββββββββββββββββββββββββββ */
|
| 713 |
+
#tts-audio {
|
| 714 |
+
margin-top: 1.25rem;
|
| 715 |
+
}
|
| 716 |
+
#tts-audio audio {
|
| 717 |
+
width: 100% !important;
|
| 718 |
+
border-radius: 8px !important;
|
| 719 |
+
background: var(--surface2) !important;
|
| 720 |
+
}
|
| 721 |
+
#tts-audio .label-wrap,
|
| 722 |
+
#tts-audio label span {
|
| 723 |
+
font-size: 0.7rem !important;
|
| 724 |
+
letter-spacing: 0.1em !important;
|
| 725 |
+
text-transform: uppercase !important;
|
| 726 |
+
color: var(--text-dim) !important;
|
| 727 |
+
}
|
| 728 |
+
|
| 729 |
/* ββ Empty / error states ββββββββββββββββββββββββββββββββββββββ */
|
| 730 |
.empty-state {
|
| 731 |
display: flex;
|
|
|
|
| 796 |
|
| 797 |
with gr.Column(scale=1, elem_id="output-panel"):
|
| 798 |
gr.HTML('<div class="panel-title">Analysis Result</div>')
|
| 799 |
+
output_html = gr.HTML(value=_empty_state(), elem_id="output-result")
|
| 800 |
+
# ββ TTS audio output ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 801 |
+
tts_audio = gr.Audio(
|
| 802 |
+
label="π Voice Summary",
|
| 803 |
+
type="filepath",
|
| 804 |
+
autoplay=True,
|
| 805 |
+
show_download_button=False,
|
| 806 |
+
elem_id="tts-audio",
|
| 807 |
+
visible=True,
|
| 808 |
+
)
|
| 809 |
|
| 810 |
gr.HTML('<div id="footer">Created by <strong>Arun Kumar</strong> — VIT Student</div>')
|
| 811 |
|
| 812 |
+
submit_btn.click(
|
| 813 |
+
fn=format_output,
|
| 814 |
+
inputs=[image_input],
|
| 815 |
+
outputs=[output_html, tts_audio],
|
| 816 |
+
)
|
| 817 |
+
image_input.change(
|
| 818 |
+
fn=format_output,
|
| 819 |
+
inputs=[image_input],
|
| 820 |
+
outputs=[output_html, tts_audio],
|
| 821 |
+
)
|
| 822 |
|
| 823 |
if __name__ == "__main__":
|
| 824 |
demo.launch(
|