Spaces:
Running
Running
Sync from GitHub via hub-sync
Browse files- api/v1/analyze.py +2 -7
- models/model_loader.py +5 -0
- services/image_service.py +71 -0
api/v1/analyze.py
CHANGED
|
@@ -42,7 +42,7 @@ from services.screenshot_service import (
|
|
| 42 |
)
|
| 43 |
from services.ela_service import generate_ela_base64
|
| 44 |
from services.exif_service import extract_exif, rescore_exif_trust
|
| 45 |
-
from services.image_service import classify_image, load_image_from_bytes
|
| 46 |
from services.llm_explainer import generate_llm_summary
|
| 47 |
from schemas.common import ProcessingSummary, Verdict
|
| 48 |
from services.artifact_detector import scan_artifacts
|
|
@@ -396,12 +396,7 @@ async def analyze_image(
|
|
| 396 |
try:
|
| 397 |
vlm_bd = generate_vlm_breakdown(pil, record_id=analysis_id)
|
| 398 |
if vlm_bd:
|
| 399 |
-
clf =
|
| 400 |
-
pil,
|
| 401 |
-
artifact_indicators=indicators,
|
| 402 |
-
exif=exif_summary,
|
| 403 |
-
vlm_breakdown=vlm_bd,
|
| 404 |
-
)
|
| 405 |
stages.append("vlm_evidence_fusion")
|
| 406 |
except Exception as e: # noqa: BLE001
|
| 407 |
logger.warning(f"VLM evidence fusion failed, continuing: {e}")
|
|
|
|
| 42 |
)
|
| 43 |
from services.ela_service import generate_ela_base64
|
| 44 |
from services.exif_service import extract_exif, rescore_exif_trust
|
| 45 |
+
from services.image_service import classify_image, load_image_from_bytes, apply_vlm_to_classification
|
| 46 |
from services.llm_explainer import generate_llm_summary
|
| 47 |
from schemas.common import ProcessingSummary, Verdict
|
| 48 |
from services.artifact_detector import scan_artifacts
|
|
|
|
| 396 |
try:
|
| 397 |
vlm_bd = generate_vlm_breakdown(pil, record_id=analysis_id)
|
| 398 |
if vlm_bd:
|
| 399 |
+
clf = apply_vlm_to_classification(clf, vlm_bd)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
stages.append("vlm_evidence_fusion")
|
| 401 |
except Exception as e: # noqa: BLE001
|
| 402 |
logger.warning(f"VLM evidence fusion failed, continuing: {e}")
|
models/model_loader.py
CHANGED
|
@@ -39,6 +39,7 @@ class ModelLoader:
|
|
| 39 |
cls._instance._efficientnet_detector = None
|
| 40 |
cls._instance._ffpp_model = None
|
| 41 |
cls._instance._ffpp_processor = None
|
|
|
|
| 42 |
cls._instance._densenet_model = None
|
| 43 |
cls._instance._densenet_meta = None
|
| 44 |
cls._instance._densenet_unavailable = False
|
|
@@ -289,6 +290,8 @@ class ModelLoader:
|
|
| 289 |
return None
|
| 290 |
if self._ffpp_model is not None:
|
| 291 |
return self._ffpp_model, self._ffpp_processor
|
|
|
|
|
|
|
| 292 |
|
| 293 |
configured_path = Path(settings.FFPP_MODEL_PATH)
|
| 294 |
repo_root = Path(__file__).resolve().parent.parent.parent
|
|
@@ -306,6 +309,7 @@ class ModelLoader:
|
|
| 306 |
else:
|
| 307 |
tried = ", ".join(str(p) for p in candidates)
|
| 308 |
logger.warning(f"FFPP ViT checkpoint not found. Tried: {tried} — skipping")
|
|
|
|
| 309 |
return None
|
| 310 |
|
| 311 |
try:
|
|
@@ -322,6 +326,7 @@ class ModelLoader:
|
|
| 322 |
return self._ffpp_model, self._ffpp_processor
|
| 323 |
except Exception as e:
|
| 324 |
logger.warning(f"FFPP ViT load failed (continuing without it): {e}")
|
|
|
|
| 325 |
return None
|
| 326 |
|
| 327 |
# ---------- DenseNet121 face-GAN specialist ----------
|
|
|
|
| 39 |
cls._instance._efficientnet_detector = None
|
| 40 |
cls._instance._ffpp_model = None
|
| 41 |
cls._instance._ffpp_processor = None
|
| 42 |
+
cls._instance._ffpp_unavailable = False
|
| 43 |
cls._instance._densenet_model = None
|
| 44 |
cls._instance._densenet_meta = None
|
| 45 |
cls._instance._densenet_unavailable = False
|
|
|
|
| 290 |
return None
|
| 291 |
if self._ffpp_model is not None:
|
| 292 |
return self._ffpp_model, self._ffpp_processor
|
| 293 |
+
if self._ffpp_unavailable:
|
| 294 |
+
return None
|
| 295 |
|
| 296 |
configured_path = Path(settings.FFPP_MODEL_PATH)
|
| 297 |
repo_root = Path(__file__).resolve().parent.parent.parent
|
|
|
|
| 309 |
else:
|
| 310 |
tried = ", ".join(str(p) for p in candidates)
|
| 311 |
logger.warning(f"FFPP ViT checkpoint not found. Tried: {tried} — skipping")
|
| 312 |
+
self._ffpp_unavailable = True
|
| 313 |
return None
|
| 314 |
|
| 315 |
try:
|
|
|
|
| 326 |
return self._ffpp_model, self._ffpp_processor
|
| 327 |
except Exception as e:
|
| 328 |
logger.warning(f"FFPP ViT load failed (continuing without it): {e}")
|
| 329 |
+
self._ffpp_unavailable = True
|
| 330 |
return None
|
| 331 |
|
| 332 |
# ---------- DenseNet121 face-GAN specialist ----------
|
services/image_service.py
CHANGED
|
@@ -525,6 +525,77 @@ def classify_image(
|
|
| 525 |
)
|
| 526 |
|
| 527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 528 |
def preprocess_and_classify(raw_bytes: bytes) -> Tuple[Image.Image, ImageClassification]:
|
| 529 |
"""Convenience: decode bytes → PIL → classify. Returns the PIL image too so
|
| 530 |
downstream steps (heatmap, artifact scan) can reuse it.
|
|
|
|
| 525 |
)
|
| 526 |
|
| 527 |
|
| 528 |
+
def apply_vlm_to_classification(
|
| 529 |
+
clf: ImageClassification,
|
| 530 |
+
vlm_breakdown: "VLMBreakdown",
|
| 531 |
+
) -> ImageClassification:
|
| 532 |
+
"""Fold VLM evidence into an already-computed ImageClassification.
|
| 533 |
+
|
| 534 |
+
Recomputes only the fusion math — no model inference is repeated.
|
| 535 |
+
Returns clf unchanged if evidence_fusion is absent or VLM yields no signal.
|
| 536 |
+
"""
|
| 537 |
+
if clf.evidence_fusion is None:
|
| 538 |
+
return clf
|
| 539 |
+
|
| 540 |
+
vlm_prob = _vlm_fake_probability(vlm_breakdown)
|
| 541 |
+
if vlm_prob is None:
|
| 542 |
+
return clf
|
| 543 |
+
|
| 544 |
+
components = dict(clf.evidence_fusion["components"])
|
| 545 |
+
weights = dict(clf.evidence_fusion["weights"])
|
| 546 |
+
is_video_frame = clf.evidence_fusion.get("is_video_frame", False)
|
| 547 |
+
|
| 548 |
+
components["vlm"] = vlm_prob
|
| 549 |
+
weights["vlm"] = settings.VLM_WEIGHT_FACE
|
| 550 |
+
|
| 551 |
+
total_w = sum(weights.values())
|
| 552 |
+
pre_gating_prob = (
|
| 553 |
+
sum(components[k] * weights[k] for k in weights) / total_w if total_w else 0.0
|
| 554 |
+
)
|
| 555 |
+
pre_gating_prob = max(0.0, min(1.0, pre_gating_prob))
|
| 556 |
+
|
| 557 |
+
general_fake_prob = components.get("general")
|
| 558 |
+
ensemble_prob, gating_reason = _apply_hard_gating(
|
| 559 |
+
fake_prob=pre_gating_prob,
|
| 560 |
+
general_fake_prob=general_fake_prob,
|
| 561 |
+
artifacts=[],
|
| 562 |
+
)
|
| 563 |
+
ensemble_prob, synthetic_reason = _apply_synthetic_still_overrides(
|
| 564 |
+
fake_prob=ensemble_prob,
|
| 565 |
+
general_fake_prob=general_fake_prob,
|
| 566 |
+
is_video_frame=is_video_frame,
|
| 567 |
+
)
|
| 568 |
+
|
| 569 |
+
# Preserve artifact-based gating floor from the original classification
|
| 570 |
+
if clf.gating_applied and "gan_artifact" in clf.gating_applied:
|
| 571 |
+
ensemble_prob = max(ensemble_prob, settings.GATING_FAKE_FLOOR)
|
| 572 |
+
|
| 573 |
+
final_gating_reason = synthetic_reason or gating_reason or clf.gating_applied
|
| 574 |
+
label = "Fake" if ensemble_prob >= 0.5 else "Real"
|
| 575 |
+
|
| 576 |
+
logger.info(
|
| 577 |
+
f"VLM fusion applied: vlm_prob={vlm_prob:.3f} "
|
| 578 |
+
f"pre_gating={pre_gating_prob:.3f} -> {ensemble_prob:.3f} ({label})"
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
return ImageClassification(
|
| 582 |
+
label=label,
|
| 583 |
+
confidence=ensemble_prob,
|
| 584 |
+
all_scores={**clf.all_scores, "vlm_fake_prob": vlm_prob},
|
| 585 |
+
models_used=clf.models_used,
|
| 586 |
+
ensemble_method=clf.ensemble_method,
|
| 587 |
+
calibrator_applied=clf.calibrator_applied,
|
| 588 |
+
no_face_analysis=clf.no_face_analysis,
|
| 589 |
+
evidence_fusion={
|
| 590 |
+
**clf.evidence_fusion,
|
| 591 |
+
"components": components,
|
| 592 |
+
"weights": weights,
|
| 593 |
+
"pre_gating": pre_gating_prob,
|
| 594 |
+
},
|
| 595 |
+
gating_applied=final_gating_reason,
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
|
| 599 |
def preprocess_and_classify(raw_bytes: bytes) -> Tuple[Image.Image, ImageClassification]:
|
| 600 |
"""Convenience: decode bytes → PIL → classify. Returns the PIL image too so
|
| 601 |
downstream steps (heatmap, artifact scan) can reuse it.
|