Spaces:

ar07xd
/

deepshield

Running

App Files Files Community

ar07xd commited on 12 days ago

Commit

fba30db

verified ·

1 Parent(s): cce3df7

Sync from GitHub via hub-sync

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +44 -0
.gitattributes +4 -0
Colab_ViT_Training.ipynb +0 -233
README.md +0 -13
analyze.py +0 -177
api/router.py +1 -0
api/v1/health.py +72 -1
api/v1/history.py +7 -1
api/v1/report.py +41 -6
artifact_detector.py +0 -229
auth.py +0 -30
auth_service.py +0 -67
common.py +0 -88
config.py +34 -1
database.py +0 -30
datasets/__init__.py +0 -0
datasets/build_manifest.py +0 -93
datasets/download_dfdc_sample.py +0 -44
datasets/download_ffhq.py +0 -49
datasets/extract_frames.py +0 -90
datasets/procure_all.ps1 +0 -40
datasets/procure_all.sh +0 -37
db/database.py +41 -17
db/models.py +21 -7
deepshield.db-shm +0 -0
deepshield.db-wal +0 -0
deepshield_13_5bcf1328.pdf +0 -148
deps.py +0 -46
download_ffpp.py +0 -261
ela_service.py +0 -88
exif_service.py +0 -129
file_handler.py +0 -96
generate_colab_nb.py +0 -213
heatmap_generator.py +0 -164
image_service.py +0 -58
llm_explainer.py +0 -191
logs/deepshield.log +949 -0
main.py +100 -2
media/03/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43.webp +0 -0
models/icpr2020dfdc/blazeface/blazeface.pth → media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg +2 -2
media/50/502e5d7120817956b7ed208987ecad441ef95a527ae8f975340f46669330a27c.jpg +0 -0
models/icpr2020dfdc/blazeface/anchors.npy → media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg +2 -2
media/6d/6de55b9fc5bdc37898418b7c25d29080f32053a1825e3a7dc2a2ff9df1292015.jpg +0 -0
media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg +3 -0
media/bf/bf7ec0c425d20a2161b6a55356a869aad486cf7c6a196420b75be117bf8a47cb.webp +0 -0
media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg +3 -0
media/f0/f0eec5199108c2a4476f9b44aa5454ee0506949b5480b11a6578f2bbcb1f954f.jpg +0 -0
media/f1/f1c22499ba7787be66a12c32ab2991df97fc4d25c88560207367214e75d7463c.jpg +0 -0
media/thumbs/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43_400.jpg +0 -0
media/thumbs/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06_400.jpg +0 -0

.env.example ADDED Viewed

	@@ -0,0 +1,44 @@

+# === DeepShield backend config example ===
+# Server
+APP_HOST=0.0.0.0
+APP_PORT=8000
+DEBUG=false
+CORS_ORIGINS=["http://localhost:5173"]
+# === Database ===
+# SQLite (default — zero-config, great for dev / college demo):
+DATABASE_URL=sqlite:///./deepshield.db
+# Postgres (production path — run migrations are applied automatically
+# by init_db via ALTER TABLE when new columns are missing):
+# DATABASE_URL=postgresql+psycopg2://deepshield:CHANGEME@localhost:5432/deepshield
+# Phase 19.1 — SHA-256 dedup cache TTL (days)
+CACHE_TTL_DAYS=30
+# Phase 19.2 — object storage root (content-addressed media + thumbnails)
+MEDIA_ROOT=./media
+# File upload
+MAX_UPLOAD_SIZE_MB=100
+UPLOAD_DIR=./temp_uploads
+# AI models
+PRELOAD_MODELS=true
+DEVICE=cpu
+# LLM explainability (Phase 12)
+LLM_PROVIDER=gemini
+LLM_API_KEY=
+LLM_MODEL=gemini-1.5-flash
+# News lookup (Phase 13)
+NEWS_API_KEY=
+# Auth (REQUIRED in production — generate with python -c "import secrets; print(secrets.token_urlsafe(48))")
+JWT_SECRET_KEY=change-me-in-production
+JWT_ALGORITHM=HS256
+JWT_EXPIRATION_MINUTES=1440
+# Optional metadata writer
+EXIFTOOL_PATH=

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg filter=lfs diff=lfs merge=lfs -text
+media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg filter=lfs diff=lfs merge=lfs -text
+media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg filter=lfs diff=lfs merge=lfs -text
+media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg filter=lfs diff=lfs merge=lfs -text

Colab_ViT_Training.ipynb DELETED Viewed

@@ -1,233 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "1e0e7b4a",
-   "metadata": {},
-   "source": [
-    "# DeepShield: FaceForensics++ ViT Training \n",
-    "Run this entirely in Google Colab.\n",
-    "**Before running**:\n",
-    "1. Go to `Runtime` -> `Change runtime type` -> select **T4 GPU**.\n",
-    "2. Run the cells below sequentially.\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4fe293e7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install timm transformers datasets accelerate evaluate opencv-python\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c9387c0f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# We create the download script inside the Colab environment\n",
-    "download_script = '''#!/usr/bin/env python\n",
-    "import argparse\n",
-    "import os\n",
-    "import urllib.request\n",
-    "import tempfile\n",
-    "import time\n",
-    "import sys\n",
-    "import json\n",
-    "from tqdm import tqdm\n",
-    "from os.path import join\n",
-    "\n",
-    "FILELIST_URL = 'misc/filelist.json'\n",
-    "DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'\n",
-    "DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]\n",
-    "DATASETS = {\n",
-    "    'original': 'original_sequences/youtube',\n",
-    "    'Deepfakes': 'manipulated_sequences/Deepfakes',\n",
-    "    'Face2Face': 'manipulated_sequences/Face2Face',\n",
-    "    'FaceShifter': 'manipulated_sequences/FaceShifter',\n",
-    "    'FaceSwap': 'manipulated_sequences/FaceSwap',\n",
-    "    'NeuralTextures': 'manipulated_sequences/NeuralTextures'\n",
-    "}\n",
-    "ALL_DATASETS = ['original', 'Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']\n",
-    "COMPRESSION = ['raw', 'c23', 'c40']\n",
-    "TYPE = ['videos']\n",
-    "\n",
-    "def download_file(url, out_file):\n",
-    "    os.makedirs(os.path.dirname(out_file), exist_ok=True)\n",
-    "    if not os.path.isfile(out_file):\n",
-    "        urllib.request.urlretrieve(url, out_file)\n",
-    "\n",
-    "def main():\n",
-    "    parser = argparse.ArgumentParser()\n",
-    "    parser.add_argument('output_path', type=str)\n",
-    "    parser.add_argument('-d', '--dataset', type=str, default='all')\n",
-    "    parser.add_argument('-c', '--compression', type=str, default='c40')\n",
-    "    parser.add_argument('-t', '--type', type=str, default='videos')\n",
-    "    parser.add_argument('-n', '--num_videos', type=int, default=50) # Small amount for tutorial\n",
-    "    args = parser.parse_args()\n",
-    "    \n",
-    "    base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'\n",
-    "    \n",
-    "    datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS\n",
-    "    for dataset in datasets:\n",
-    "        dataset_path = DATASETS[dataset]\n",
-    "        print(f'Downloading {args.compression} of {dataset}')\n",
-    "        \n",
-    "        file_pairs = json.loads(urllib.request.urlopen(base_url + FILELIST_URL).read().decode(\"utf-8\"))\n",
-    "        filelist = []\n",
-    "        if 'original' in dataset_path:\n",
-    "            for pair in file_pairs:\n",
-    "                filelist += pair\n",
-    "        else:\n",
-    "            for pair in file_pairs:\n",
-    "                filelist.append('_'.join(pair))\n",
-    "                filelist.append('_'.join(pair[::-1]))\n",
-    "            \n",
-    "        filelist = filelist[:args.num_videos]\n",
-    "        dataset_videos_url = base_url + f'{dataset_path}/{args.compression}/{args.type}/'\n",
-    "        dataset_output_path = join(args.output_path, dataset_path, args.compression, args.type)\n",
-    "        \n",
-    "        for filename in tqdm(filelist):\n",
-    "            download_file(dataset_videos_url + filename + \".mp4\", join(dataset_output_path, filename + \".mp4\"))\n",
-    "\n",
-    "if __name__ == \"__main__\":\n",
-    "    main()\n",
-    "'''\n",
-    "\n",
-    "with open(\"download_ffpp.py\", \"w\") as f:\n",
-    "    f.write(download_script)\n",
-    "\n",
-    "!python download_ffpp.py ./data -d all -c c40 -t videos -n 50\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f33716f6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import cv2\n",
-    "import os\n",
-    "import glob\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "def extract_frames(video_folder, output_folder, label, max_frames=4):\n",
-    "    os.makedirs(output_folder, exist_ok=True)\n",
-    "    videos = glob.glob(os.path.join(video_folder, \"*.mp4\"))\n",
-    "    \n",
-    "    for vid_path in tqdm(videos, desc=f\"Extracting {label}\"):\n",
-    "        vid_name = os.path.basename(vid_path).replace('.mp4','')\n",
-    "        cap = cv2.VideoCapture(vid_path)\n",
-    "        count = 0\n",
-    "        while cap.isOpened() and count < max_frames:\n",
-    "            ret, frame = cap.read()\n",
-    "            if not ret: break\n",
-    "            frame = cv2.resize(frame, (224, 224))\n",
-    "            out_path = os.path.join(output_folder, f\"{vid_name}_f{count}.jpg\")\n",
-    "            cv2.imwrite(out_path, frame)\n",
-    "            count += 1\n",
-    "        cap.release()\n",
-    "\n",
-    "# Extract Real\n",
-    "extract_frames('./data/original_sequences/youtube/c40/videos', './dataset/real', 'real')\n",
-    "\n",
-    "# Extract Fakes\n",
-    "fakes = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']\n",
-    "for f in fakes:\n",
-    "    extract_frames(f'./data/manipulated_sequences/{f}/c40/videos', './dataset/fake', 'fake')\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b79cdd85",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "from datasets import load_dataset\n",
-    "from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer\n",
-    "import torch\n",
-    "\n",
-    "# 1. Load Dataset\n",
-    "dataset = load_dataset('imagefolder', data_dir='./dataset')\n",
-    "# Split into train/validation\n",
-    "dataset = dataset['train'].train_test_split(test_size=0.1)\n",
-    "\n",
-    "# 2. Preprocessor\n",
-    "model_name_or_path = 'google/vit-base-patch16-224-in21k'\n",
-    "processor = ViTImageProcessor.from_pretrained(model_name_or_path)\n",
-    "\n",
-    "def transform(example_batch):\n",
-    "    # Take a list of PIL images and turn them to pixel values\n",
-    "    inputs = processor([x.convert(\"RGB\") for x in example_batch['image']], return_tensors='pt')\n",
-    "    inputs['labels'] = example_batch['label']\n",
-    "    return inputs\n",
-    "\n",
-    "prepared_ds = dataset.with_transform(transform)\n",
-    "\n",
-    "def collate_fn(batch):\n",
-    "    return {\n",
-    "        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),\n",
-    "        'labels': torch.tensor([x['labels'] for x in batch])\n",
-    "    }\n",
-    "\n",
-    "# 3. Load Model\n",
-    "labels = dataset['train'].features['label'].names\n",
-    "model = ViTForImageClassification.from_pretrained(\n",
-    "    model_name_or_path,\n",
-    "    num_labels=len(labels),\n",
-    "    id2label={str(i): c for i, c in enumerate(labels)},\n",
-    "    label2id={c: str(i) for i, c in enumerate(labels)}\n",
-    ")\n",
-    "\n",
-    "training_args = TrainingArguments(\n",
-    "    output_dir=\"./vit-deepshield\",\n",
-    "    per_device_train_batch_size=16,\n",
-    "    eval_strategy=\"steps\",\n",
-    "    num_train_epochs=3,\n",
-    "    fp16=True, # Mixed precision for speed\n",
-    "    save_steps=100,\n",
-    "    eval_steps=100,\n",
-    "    logging_steps=10,\n",
-    "    learning_rate=2e-4,\n",
-    "    save_total_limit=2,\n",
-    "    remove_unused_columns=False,\n",
-    "    push_to_hub=False,\n",
-    "    load_best_model_at_end=True,\n",
-    ")\n",
-    "\n",
-    "import evaluate\n",
-    "metric = evaluate.load(\"accuracy\")\n",
-    "def compute_metrics(p):\n",
-    "    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)\n",
-    "\n",
-    "trainer = Trainer(\n",
-    "    model=model,\n",
-    "    args=training_args,\n",
-    "    data_collator=collate_fn,\n",
-    "    compute_metrics=compute_metrics,\n",
-    "    train_dataset=prepared_ds[\"train\"],\n",
-    "    eval_dataset=prepared_ds[\"test\"],\n",
-    ")\n",
-    "\n",
-    "# 4. Train\n",
-    "train_results = trainer.train()\n",
-    "trainer.save_model(\"deepshield_vit_model\")\n",
-    "processor.save_pretrained(\"deepshield_vit_model\")\n",
-    "trainer.log_metrics(\"train\", train_results.metrics)\n",
-    "trainer.save_metrics(\"train\", train_results.metrics)\n",
-    "trainer.save_state()\n",
-    "print(\"Training Complete! The model is saved to ./deepshield_vit_model\")\n"
-   ]
-  }
- ],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

README.md DELETED Viewed

@@ -1,13 +0,0 @@
----
-title: Deepshield
-emoji: 🛡️
-colorFrom: blue
-colorTo: indigo
-sdk: docker
-app_port: 7860
-pinned: true
----
-# DeepShield Backend
-This space hosts the FastAPI backend for DeepShield.

analyze.py DELETED Viewed

@@ -1,177 +0,0 @@
-from __future__ import annotations
-from typing import List
-from pydantic import BaseModel
-from schemas.common import (
-    ArtifactIndicator,
-    ContradictingEvidence,
-    ExifSummary,
-    LLMExplainabilitySummary,
-    ProcessingSummary,
-    TrustedSource,
-    TruthOverride,
-    Verdict,
-    VLMBreakdown,
-)
-class SensationalismBreakdown(BaseModel):
-    score: int = 0
-    level: str = "Low"
-    exclamation_count: int = 0
-    caps_word_count: int = 0
-    clickbait_matches: int = 0
-    emotional_word_count: int = 0
-    superlative_count: int = 0
-class ManipulationIndicatorOut(BaseModel):
-    pattern_type: str
-    matched_text: str
-    start_pos: int
-    end_pos: int
-    severity: str
-    description: str
-class TextExplainability(BaseModel):
-    fake_probability: float
-    top_label: str
-    all_scores: dict = {}
-    keywords: List[str] = []
-    sensationalism: SensationalismBreakdown = SensationalismBreakdown()
-    manipulation_indicators: List[ManipulationIndicatorOut] = []
-    detected_language: str = "en"       # ISO 639-1 code, e.g. "en", "hi"
-    truth_override: TruthOverride | None = None
-class TextAnalysisResponse(BaseModel):
-    analysis_id: str
-    record_id: int = 0
-    media_type: str = "text"
-    timestamp: str
-    verdict: Verdict
-    explainability: TextExplainability
-    llm_summary: LLMExplainabilitySummary | None = None
-    trusted_sources: List[TrustedSource] = []
-    contradicting_evidence: List[ContradictingEvidence] = []
-    processing_summary: ProcessingSummary
-    responsible_ai_notice: str = (
-        "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
-    )
-class OCRBoxOut(BaseModel):
-    text: str
-    bbox: List[List[int]]
-    confidence: float
-class SuspiciousPhraseOut(BaseModel):
-    text: str
-    bbox: List[List[int]]
-    pattern_type: str
-    severity: str
-    description: str
-class LayoutAnomalyOut(BaseModel):
-    type: str
-    severity: str
-    description: str
-    confidence: float
-class ScreenshotExplainability(BaseModel):
-    extracted_text: str = ""
-    ocr_boxes: List[OCRBoxOut] = []
-    fake_probability: float = 0.0
-    sensationalism: SensationalismBreakdown = SensationalismBreakdown()
-    suspicious_phrases: List[SuspiciousPhraseOut] = []
-    layout_anomalies: List[LayoutAnomalyOut] = []
-    keywords: List[str] = []
-    detected_language: str = "en"
-    truth_override: TruthOverride | None = None
-class ScreenshotAnalysisResponse(BaseModel):
-    analysis_id: str
-    record_id: int = 0
-    media_type: str = "screenshot"
-    timestamp: str
-    verdict: Verdict
-    explainability: ScreenshotExplainability
-    llm_summary: LLMExplainabilitySummary | None = None
-    trusted_sources: List[TrustedSource] = []
-    contradicting_evidence: List[ContradictingEvidence] = []
-    processing_summary: ProcessingSummary
-    responsible_ai_notice: str = (
-        "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
-    )
-class ImageExplainability(BaseModel):
-    heatmap_base64: str = ""
-    ela_base64: str = ""
-    boxes_base64: str = ""
-    heatmap_status: str = "success"  # success | failed | degraded
-    artifact_indicators: List[ArtifactIndicator] = []
-    exif: ExifSummary | None = None
-    llm_summary: LLMExplainabilitySummary | None = None
-    vlm_breakdown: VLMBreakdown | None = None
-class FrameAnalysisOut(BaseModel):
-    index: int
-    timestamp_s: float
-    label: str
-    confidence: float
-    suspicious_prob: float
-    is_suspicious: bool
-    has_face: bool = False
-    scored: bool = False
-class VideoExplainability(BaseModel):
-    num_frames_sampled: int
-    num_face_frames: int = 0
-    num_suspicious_frames: int
-    mean_suspicious_prob: float
-    max_suspicious_prob: float
-    suspicious_ratio: float
-    insufficient_faces: bool = False
-    suspicious_timestamps: List[float] = []
-    frames: List[FrameAnalysisOut] = []
-class VideoAnalysisResponse(BaseModel):
-    analysis_id: str
-    record_id: int = 0
-    media_type: str = "video"
-    timestamp: str
-    verdict: Verdict
-    explainability: VideoExplainability
-    llm_summary: LLMExplainabilitySummary | None = None
-    trusted_sources: List[TrustedSource] = []
-    contradicting_evidence: List[ContradictingEvidence] = []
-    processing_summary: ProcessingSummary
-    responsible_ai_notice: str = (
-        "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
-    )
-class ImageAnalysisResponse(BaseModel):
-    analysis_id: str
-    record_id: int = 0
-    media_type: str = "image"
-    timestamp: str
-    verdict: Verdict
-    explainability: ImageExplainability
-    trusted_sources: List[TrustedSource] = []
-    contradicting_evidence: List[ContradictingEvidence] = []
-    processing_summary: ProcessingSummary
-    responsible_ai_notice: str = (
-        "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
-    )

api/router.py CHANGED Viewed

@@ -5,6 +5,7 @@ from api.v1 import analyze, auth, health, history, report
 api_router = APIRouter(prefix="/api/v1")
 api_router.include_router(health.router)
 api_router.include_router(analyze.router)
 api_router.include_router(report.router)
 api_router.include_router(auth.router)
 api_router.include_router(history.router)

 api_router = APIRouter(prefix="/api/v1")
 api_router.include_router(health.router)
 api_router.include_router(analyze.router)
+api_router.include_router(analyze.jobs_router)  # Phase 19.3
 api_router.include_router(report.router)
 api_router.include_router(auth.router)
 api_router.include_router(history.router)

api/v1/health.py CHANGED Viewed

@@ -1,8 +1,79 @@
-from fastapi import APIRouter
 router = APIRouter(tags=["health"])
 @router.get("/health")
 def health():
     return {"status": "ok", "service": "deepshield-backend"}

+from fastapi import APIRouter, Response, status
+from loguru import logger
+from sqlalchemy import text
+from config import settings
+from db.database import engine
+from services.llm_explainer import is_rate_limited
 router = APIRouter(tags=["health"])
 @router.get("/health")
 def health():
+    """Legacy combined healthcheck — kept for backwards compatibility."""
     return {"status": "ok", "service": "deepshield-backend"}
+@router.get("/health/live")
+def health_live():
+    """Liveness probe — returns 200 as long as the process is up."""
+    return {"status": "alive"}
+@router.get("/health/ready")
+def health_ready(response: Response):
+    """Readiness probe — 200 only when DB is reachable and models are loaded.
+    Phase 19.5: the frontend disables the Analyze button while this returns 503.
+    """
+    checks: dict[str, bool] = {}
+    try:
+        with engine.connect() as conn:
+            conn.execute(text("SELECT 1"))
+        checks["db"] = True
+    except Exception as e:  # noqa: BLE001
+        logger.warning(f"readiness db check failed: {e}")
+        checks["db"] = False
+    try:
+        from models.model_loader import get_model_loader
+        checks["models"] = bool(get_model_loader().is_ready())
+    except AttributeError:
+        # No is_ready() — fall back to "ready if loader constructs"
+        try:
+            from models.model_loader import get_model_loader
+            get_model_loader()
+            checks["models"] = True
+        except Exception:  # noqa: BLE001
+            checks["models"] = False
+    except Exception as e:  # noqa: BLE001
+        logger.warning(f"readiness model check failed: {e}")
+        checks["models"] = False
+    ok = all(checks.values())
+    if not ok:
+        response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
+    return {"status": "ready" if ok else "not_ready", "checks": checks}
+@router.get("/health/llm")
+def health_llm(response: Response):
+    """LLM availability probe — lets the frontend decide whether to request/show
+    the AI summary card. Doesn't spend tokens; only checks config + breaker state.
+    """
+    has_primary = bool(settings.LLM_API_KEY)
+    has_fallback = bool(settings.GROQ_API_KEY)
+    cooldown = is_rate_limited()
+    # Available if (any provider configured) AND (not rate-limited OR fallback exists)
+    available = (has_primary or has_fallback) and (not cooldown or has_fallback)
+    if not available:
+        response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
+    return {
+        "available": available,
+        "primary": f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}" if has_primary else None,
+        "fallback": f"groq/{settings.GROQ_MODEL}" if has_fallback else None,
+        "rate_limited": cooldown,
+    }

api/v1/history.py CHANGED Viewed

@@ -60,7 +60,13 @@ def get_history_detail(
     if not r or r.user_id != user.id:
         raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
     try:
-        return json.loads(r.result_json)
     except Exception:
         raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")

     if not r or r.user_id != user.id:
         raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
     try:
+        payload = json.loads(r.result_json)
+        # Inject storage fields from DB columns so the frontend can display full-size media
+        if r.media_path and not payload.get("media_path"):
+            payload["media_path"] = r.media_path
+        if r.thumbnail_url and not payload.get("thumbnail_url"):
+            payload["thumbnail_url"] = r.thumbnail_url
+        return payload
     except Exception:
         raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")

api/v1/report.py CHANGED Viewed

@@ -2,24 +2,45 @@ from __future__ import annotations
 from pathlib import Path
-from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import FileResponse
 from loguru import logger
 from sqlalchemy.orm import Session
 from db.database import get_db
-from db.models import AnalysisRecord, Report
 from services.report_service import cleanup_expired, create_report_row, generate_report
 router = APIRouter(prefix="/report", tags=["report"])
 @router.post("/{analysis_id}")
-def generate(analysis_id: int, db: Session = Depends(get_db)):
     record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
     if not record:
         raise HTTPException(status_code=404, detail="analysis not found")
     existing = db.query(Report).filter(Report.analysis_id == analysis_id).first()
     if existing and Path(existing.file_path).exists():
         return {"report_id": existing.id, "analysis_id": analysis_id, "ready": True}
@@ -44,7 +65,19 @@ def generate(analysis_id: int, db: Session = Depends(get_db)):
 @router.get("/{analysis_id}/download")
-def download(analysis_id: int, db: Session = Depends(get_db)):
     row = db.query(Report).filter(Report.analysis_id == analysis_id).first()
     if not row:
         raise HTTPException(status_code=404, detail="report not found — generate first")
@@ -58,7 +91,9 @@ def download(analysis_id: int, db: Session = Depends(get_db)):
     )
-@router.post("/cleanup")
-def cleanup():
     n = cleanup_expired()
     return {"deleted": n}

 from pathlib import Path
+from fastapi import APIRouter, Depends, HTTPException, Request, status
 from fastapi.responses import FileResponse
 from loguru import logger
 from sqlalchemy.orm import Session
+from api.deps import get_current_user, optional_current_user
 from db.database import get_db
+from db.models import AnalysisRecord, Report, User
+from services.rate_limit import ANON_REPORT, AUTH_REPORT, is_anon, is_authed, limiter
 from services.report_service import cleanup_expired, create_report_row, generate_report
 router = APIRouter(prefix="/report", tags=["report"])
+def _assert_record_access(record: AnalysisRecord, user: User | None) -> None:
+    """Phase 15.1 — allow access if the requester owns the record, or if the record
+    is anonymous (user_id is None). Everything else is 403."""
+    if record.user_id is None:
+        return
+    if user is not None and record.user_id == user.id:
+        return
+    raise HTTPException(status.HTTP_403_FORBIDDEN, "You do not own this analysis")
 @router.post("/{analysis_id}")
+@limiter.limit(ANON_REPORT, exempt_when=is_authed)
+@limiter.limit(AUTH_REPORT, exempt_when=is_anon)
+def generate(
+    request: Request,
+    analysis_id: int,
+    db: Session = Depends(get_db),
+    user: User | None = Depends(optional_current_user),
+):
     record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
     if not record:
         raise HTTPException(status_code=404, detail="analysis not found")
+    _assert_record_access(record, user)
     existing = db.query(Report).filter(Report.analysis_id == analysis_id).first()
     if existing and Path(existing.file_path).exists():
         return {"report_id": existing.id, "analysis_id": analysis_id, "ready": True}
 @router.get("/{analysis_id}/download")
+@limiter.limit(ANON_REPORT, exempt_when=is_authed)
+@limiter.limit(AUTH_REPORT, exempt_when=is_anon)
+def download(
+    request: Request,
+    analysis_id: int,
+    db: Session = Depends(get_db),
+    user: User | None = Depends(optional_current_user),
+):
+    record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
+    if not record:
+        raise HTTPException(status_code=404, detail="analysis not found")
+    _assert_record_access(record, user)
     row = db.query(Report).filter(Report.analysis_id == analysis_id).first()
     if not row:
         raise HTTPException(status_code=404, detail="report not found — generate first")
     )
+@router.post("/cleanup", include_in_schema=False)
+def cleanup(user: User = Depends(get_current_user)):
+    # Phase 15.1 — auth-guarded. Exposed only to authenticated users; an internal
+    # scheduler loop in main.py handles periodic cleanup automatically.
     n = cleanup_expired()
     return {"deleted": n}

artifact_detector.py DELETED Viewed

@@ -1,229 +0,0 @@
-from __future__ import annotations
-import io
-from typing import List
-import numpy as np
-from loguru import logger
-from PIL import Image
-from schemas.common import ArtifactIndicator
-def _severity_from_score(score: float) -> str:
-    if score >= 0.7:
-        return "high"
-    if score >= 0.4:
-        return "medium"
-    return "low"
-# ---------- 1. GAN high-frequency signature (FFT) ----------
-def detect_gan_hf_artifact(pil_img: Image.Image) -> ArtifactIndicator | None:
-    """Compute high-frequency energy ratio on the luminance channel.
-    Real photos typically follow a ~1/f spectrum; many GAN outputs show
-    elevated HF energy or spectral peaks.
-    """
-    try:
-        gray = np.asarray(pil_img.convert("L"), dtype=np.float32)
-        # downsample for speed
-        if max(gray.shape) > 512:
-            import cv2
-            scale = 512 / max(gray.shape)
-            gray = cv2.resize(gray, (int(gray.shape[1] * scale), int(gray.shape[0] * scale)))
-        fft = np.fft.fftshift(np.fft.fft2(gray))
-        mag = np.abs(fft)
-        h, w = mag.shape
-        cy, cx = h // 2, w // 2
-        y, x = np.ogrid[:h, :w]
-        r = np.sqrt((x - cx) ** 2 + (y - cy) ** 2)
-        r_max = np.sqrt(cx * cx + cy * cy)
-        hf_mask = r > (0.5 * r_max)
-        total = float(mag.sum() + 1e-9)
-        hf = float(mag[hf_mask].sum())
-        ratio = hf / total  # typically 0.05–0.20 for natural photos
-        # normalize to [0,1] suspiciousness
-        score = max(0.0, min(1.0, (ratio - 0.10) / 0.20))
-        sev = _severity_from_score(score)
-        return ArtifactIndicator(
-            type="gan_artifact",
-            severity=sev,
-            description=(
-                f"High-frequency energy ratio {ratio:.3f} — "
-                + ("elevated HF energy consistent with GAN/diffusion outputs" if score > 0.4
-                   else "natural frequency falloff")
-            ),
-            confidence=float(score),
-        )
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"GAN HF detection failed: {e}")
-        return None
-# ---------- 2. JPEG quantization table anomaly ----------
-_STANDARD_Q_SUMS = {  # rough heuristic: camera JPEGs fall in these ranges
-    50: (1500, 4500),
-    75: (600, 2500),
-    90: (200, 1000),
-    95: (100, 600),
-}
-def detect_compression_anomaly(raw_bytes: bytes) -> ArtifactIndicator | None:
-    """Inspect JPEG quantization tables. Missing tables, non-standard layouts,
-    or re-saved tables often indicate manipulation or re-encoding.
-    """
-    try:
-        img = Image.open(io.BytesIO(raw_bytes))
-        if img.format != "JPEG":
-            return ArtifactIndicator(
-                type="compression",
-                severity="low",
-                description=f"Non-JPEG format ({img.format}); compression signature not available",
-                confidence=0.1,
-            )
-        q = getattr(img, "quantization", None)
-        if not q:
-            return ArtifactIndicator(
-                type="compression",
-                severity="low",
-                description="No JPEG quantization tables readable",
-                confidence=0.2,
-            )
-        tables = list(q.values())
-        sums = [int(sum(t)) for t in tables]
-        num_tables = len(tables)
-        # Heuristics: very low sum → very high quality (possibly re-saved);
-        # non-standard number of tables; extreme values.
-        suspicious = 0.0
-        reasons: list[str] = []
-        if num_tables not in (1, 2):
-            suspicious += 0.4
-            reasons.append(f"unusual table count ({num_tables})")
-        if any(s < 60 for s in sums):
-            suspicious += 0.3
-            reasons.append("very low quantization sums (possible re-encoding)")
-        if any(s > 8000 for s in sums):
-            suspicious += 0.2
-            reasons.append("very high quantization sums")
-        score = max(0.0, min(1.0, suspicious))
-        sev = _severity_from_score(score)
-        desc = (
-            f"JPEG Q-table sums {sums}"
-            + (f"; {', '.join(reasons)}" if reasons else "; within typical camera range")
-        )
-        return ArtifactIndicator(
-            type="compression",
-            severity=sev,
-            description=desc,
-            confidence=float(score),
-        )
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"Compression anomaly detection failed: {e}")
-        return None
-# ---------- 3. Facial boundary + 4. Lighting (MediaPipe) ----------
-def detect_face_based_artifacts(pil_img: Image.Image) -> List[ArtifactIndicator]:
-    """If a face is detected, analyze jaw boundary variance and per-quadrant
-    luminance balance. Returns 0, 1, or 2 indicators.
-    """
-    results: List[ArtifactIndicator] = []
-    try:
-        import mediapipe as mp  # type: ignore
-        from models.model_loader import get_model_loader
-        detector = get_model_loader().load_face_detector()
-        rgb = np.asarray(pil_img.convert("RGB"))
-        h, w = rgb.shape[:2]
-        mp_result = detector.process(rgb)
-        if not mp_result.multi_face_landmarks:
-            return results
-        landmarks = mp_result.multi_face_landmarks[0].landmark
-        # ----- Jaw boundary jitter -----
-        # FaceMesh jaw/oval landmark indices (approximate face contour)
-        JAW_IDX = [
-            10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361,
-            288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149,
-            150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109,
-        ]
-        pts = np.array([(landmarks[i].x * w, landmarks[i].y * h) for i in JAW_IDX])
-        # Second-difference magnitude = local curvature jitter
-        diffs = np.diff(pts, axis=0)
-        seconds = np.diff(diffs, axis=0)
-        jitter = float(np.linalg.norm(seconds, axis=1).mean()) / max(w, h)
-        jitter_score = max(0.0, min(1.0, (jitter - 0.003) / 0.010))
-        results.append(
-            ArtifactIndicator(
-                type="facial_boundary",
-                severity=_severity_from_score(jitter_score),
-                description=(
-                    f"Jaw-contour jitter {jitter:.4f} (normalized) — "
-                    + ("inconsistent boundary blending detected" if jitter_score > 0.4
-                       else "face boundary appears smooth")
-                ),
-                confidence=float(jitter_score),
-            )
-        )
-        # ----- Lighting inconsistency (per-quadrant luminance) -----
-        xs = np.array([lm.x * w for lm in landmarks])
-        ys = np.array([lm.y * h for lm in landmarks])
-        x0, x1 = int(max(0, xs.min())), int(min(w, xs.max()))
-        y0, y1 = int(max(0, ys.min())), int(min(h, ys.max()))
-        if x1 > x0 + 4 and y1 > y0 + 4:
-            face_crop = rgb[y0:y1, x0:x1]
-            gray = 0.299 * face_crop[..., 0] + 0.587 * face_crop[..., 1] + 0.114 * face_crop[..., 2]
-            hh, ww = gray.shape
-            quads = [
-                gray[: hh // 2, : ww // 2],
-                gray[: hh // 2, ww // 2 :],
-                gray[hh // 2 :, : ww // 2],
-                gray[hh // 2 :, ww // 2 :],
-            ]
-            means = np.array([q.mean() for q in quads if q.size > 0])
-            if means.size == 4 and means.mean() > 1e-3:
-                imbalance = float(means.std() / means.mean())
-                lighting_score = max(0.0, min(1.0, (imbalance - 0.08) / 0.20))
-                results.append(
-                    ArtifactIndicator(
-                        type="lighting",
-                        severity=_severity_from_score(lighting_score),
-                        description=(
-                            f"Luminance imbalance across face quadrants {imbalance:.3f} — "
-                            + ("inconsistent lighting direction" if lighting_score > 0.4
-                               else "lighting appears uniform")
-                        ),
-                        confidence=float(lighting_score),
-                    )
-                )
-    except Exception as e:  # noqa: BLE001
-        logger.warning(f"Face-based artifact detection failed: {e}")
-    return results
-# ---------- Orchestrator ----------
-def scan_artifacts(pil_img: Image.Image, raw_bytes: bytes) -> List[ArtifactIndicator]:
-    indicators: List[ArtifactIndicator] = []
-    for fn in (
-        lambda: detect_gan_hf_artifact(pil_img),
-        lambda: detect_compression_anomaly(raw_bytes),
-    ):
-        ind = fn()
-        if ind is not None:
-            indicators.append(ind)
-    indicators.extend(detect_face_based_artifacts(pil_img))
-    return indicators

auth.py DELETED Viewed

@@ -1,30 +0,0 @@
-from __future__ import annotations
-from datetime import datetime
-from pydantic import BaseModel, EmailStr, Field
-class RegisterBody(BaseModel):
-    email: EmailStr
-    password: str = Field(min_length=6, max_length=128)
-    name: str | None = Field(default=None, max_length=255)
-class LoginBody(BaseModel):
-    email: EmailStr
-    password: str
-class UserOut(BaseModel):
-    id: int
-    email: str
-    name: str | None = None
-    created_at: datetime
-class TokenResponse(BaseModel):
-    access_token: str
-    token_type: str = "bearer"
-    expires_in_minutes: int
-    user: UserOut

auth_service.py DELETED Viewed

@@ -1,67 +0,0 @@
-from __future__ import annotations
-from datetime import datetime, timedelta, timezone
-from typing import Any
-import bcrypt
-from jose import JWTError, jwt
-from sqlalchemy.orm import Session
-from config import settings
-from db.models import User
-def _encode_pw(plain: str) -> bytes:
-    # bcrypt truncates to 72 bytes silently in some builds and hard-errors in others.
-    # Truncate explicitly so behavior is deterministic across versions.
-    return plain.encode("utf-8")[:72]
-def hash_password(plain: str) -> str:
-    return bcrypt.hashpw(_encode_pw(plain), bcrypt.gensalt()).decode("utf-8")
-def verify_password(plain: str, hashed: str) -> bool:
-    try:
-        return bcrypt.checkpw(_encode_pw(plain), hashed.encode("utf-8"))
-    except Exception:
-        return False
-def create_access_token(user_id: int, email: str) -> str:
-    now = datetime.now(timezone.utc)
-    payload = {
-        "sub": str(user_id),
-        "email": email,
-        "iat": int(now.timestamp()),
-        "exp": int((now + timedelta(minutes=settings.JWT_EXPIRATION_MINUTES)).timestamp()),
-    }
-    return jwt.encode(payload, settings.JWT_SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
-def decode_token(token: str) -> dict[str, Any] | None:
-    try:
-        return jwt.decode(token, settings.JWT_SECRET_KEY, algorithms=[settings.JWT_ALGORITHM])
-    except JWTError:
-        return None
-def register_user(db: Session, email: str, password: str, name: str | None) -> User:
-    email = email.strip().lower()
-    user = User(email=email, password_hash=hash_password(password), name=(name or None))
-    db.add(user)
-    db.commit()
-    db.refresh(user)
-    return user
-def authenticate(db: Session, email: str, password: str) -> User | None:
-    email = email.strip().lower()
-    user = db.query(User).filter(User.email == email).first()
-    if not user or not verify_password(password, user.password_hash):
-        return None
-    return user
-def get_user(db: Session, user_id: int) -> User | None:
-    return db.query(User).filter(User.id == user_id).first()

common.py DELETED Viewed

@@ -1,88 +0,0 @@
-from __future__ import annotations
-from typing import List, Optional
-from pydantic import BaseModel, ConfigDict, Field
-class Verdict(BaseModel):
-    model_config = ConfigDict(protected_namespaces=())
-    label: str
-    severity: str
-    authenticity_score: int = Field(ge=0, le=100)
-    model_confidence: float = Field(ge=0.0, le=1.0)
-    model_label: str
-class ArtifactIndicator(BaseModel):
-    type: str
-    severity: str  # low | medium | high
-    description: str
-    confidence: float = Field(ge=0.0, le=1.0)
-class TrustedSource(BaseModel):
-    source_name: str
-    title: str
-    url: str
-    published_at: Optional[str] = None
-    relevance_score: float = Field(ge=0.0, le=1.0)
-class ContradictingEvidence(BaseModel):
-    source_name: str
-    title: str
-    url: str
-    type: str = "fact_check"
-class TruthOverride(BaseModel):
-    applied: bool = False
-    source_url: str = ""
-    source_name: str = ""
-    similarity: float = 0.0
-    fake_prob_before: float = 0.0
-    fake_prob_after: float = 0.0
-class ExifSummary(BaseModel):
-    make: Optional[str] = None
-    model: Optional[str] = None
-    datetime_original: Optional[str] = None
-    gps_info: Optional[str] = None
-    software: Optional[str] = None
-    lens_model: Optional[str] = None
-    trust_adjustment: int = 0  # negative = more real, positive = more fake
-    trust_reason: str = ""
-class LLMExplainabilitySummary(BaseModel):
-    paragraph: str = ""
-    bullets: List[str] = []
-    model_used: str = ""
-    cached: bool = False
-class VLMComponentScore(BaseModel):
-    score: int = Field(ge=0, le=100, default=75)
-    notes: str = ""
-class VLMBreakdown(BaseModel):
-    facial_symmetry: VLMComponentScore = VLMComponentScore()
-    skin_texture: VLMComponentScore = VLMComponentScore()
-    lighting_consistency: VLMComponentScore = VLMComponentScore()
-    background_coherence: VLMComponentScore = VLMComponentScore()
-    anatomy_hands_eyes: VLMComponentScore = VLMComponentScore()
-    context_objects: VLMComponentScore = VLMComponentScore()
-    model_used: str = ""
-    cached: bool = False
-class ProcessingSummary(BaseModel):
-    model_config = ConfigDict(protected_namespaces=())
-    stages_completed: List[str]
-    total_duration_ms: int
-    model_used: str

config.py CHANGED Viewed

@@ -20,6 +20,7 @@ class Settings(BaseSettings):
     # AI Models
     IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model"
     TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect"
     # Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID.
     TEXT_MULTILANG_MODEL_ID: str = ""
@@ -37,15 +38,47 @@ class Settings(BaseSettings):
     REPORT_DIR: str = "./temp_reports"
     REPORT_TTL_SECONDS: int = 3600  # 1h expiry
     # LLM Explainability (Phase 12)
     LLM_PROVIDER: str = "gemini"  # "gemini" | "openai"
     LLM_API_KEY: str = ""
-    LLM_MODEL: str = "gemini-2.5-pro"  # or "gpt-4o"
     # EfficientNet (ICPR2020 / DeepShield1 merge)
     EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4"
     EFFICIENTNET_TRAIN_DB: str = "DFDC"
     ENSEMBLE_MODE: bool = True  # run both ViT + EfficientNet and average scores
     VIDEO_SAMPLE_FRAMES: int = 16  # frames to sample per video for inference
     EXIFTOOL_PATH: str = ""  # full path to ExifTool binary; empty = metadata write disabled

     # AI Models
     IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model"
+    GENERAL_IMAGE_MODEL_ID: str = "umm-maybe/AI-image-detector"
     TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect"
     # Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID.
     TEXT_MULTILANG_MODEL_ID: str = ""
     REPORT_DIR: str = "./temp_reports"
     REPORT_TTL_SECONDS: int = 3600  # 1h expiry
+    # Phase 19 — dedup cache + object storage
+    CACHE_TTL_DAYS: int = 30
+    MEDIA_ROOT: str = "./media"
     # LLM Explainability (Phase 12)
     LLM_PROVIDER: str = "gemini"  # "gemini" | "openai"
     LLM_API_KEY: str = ""
+    LLM_MODEL: str = "gemini-2.5-flash"  # flash is ~12x cheaper + larger free-tier quota than pro. Use "gemini-2.5-pro" for harder reasoning.
+    # LLM fallback — Groq (Llama 3.3 70B by default). Used automatically when the
+    # primary provider returns 429/quota exceeded. Leave empty to disable fallback.
+    GROQ_API_KEY: str = ""
+    GROQ_MODEL: str = "llama-3.3-70b-versatile"
     # EfficientNet (ICPR2020 / DeepShield1 merge)
     EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4"
     EFFICIENTNET_TRAIN_DB: str = "DFDC"
     ENSEMBLE_MODE: bool = True  # run both ViT + EfficientNet and average scores
+    # Phase 11.3: FFPP-fine-tuned ViT. Path is resolved relative to the repo root.
+    # The checkpoint lives at <repo_root>/trained_models/ (the `trained_models/` dir
+    # at the project root, alongside `backend/` and `frontend/`).
+    FFPP_MODEL_PATH: str = "trained_models"
+    # Optional: pull FFPP checkpoint from Hugging Face Hub when local checkpoint
+    # is missing (keeps large model files out of GitHub source repo).
+    FFPP_MODEL_REPO_ID: str = ""
+    FFPP_MODEL_REVISION: str = "main"
+    FFPP_BASE_PROCESSOR_ID: str = "google/vit-base-patch16-224-in21k"
+    FFPP_ENABLED: bool = True
+    # Ensemble weights — FFPP is trained on a better (face-specific FFPP c40) dataset
+    # and is weighted more heavily when a face is present. When no face is detected,
+    # we still blend it but lean on the generic ViT since FFPP only saw face crops.
+    FFPP_WEIGHT_FACE: float = 0.55       # face-present ensemble weight
+    VIT_WEIGHT_FACE: float = 0.20
+    EFFNET_WEIGHT_FACE: float = 0.25
+    FFPP_WEIGHT_NOFACE: float = 0.35     # no-face ensemble weight
+    VIT_WEIGHT_NOFACE: float = 0.65
+    NOFACE_GENERAL_WEIGHT: float = 0.60
+    NOFACE_FORENSICS_WEIGHT: float = 0.20
+    NOFACE_EXIF_WEIGHT: float = 0.10
+    NOFACE_VLM_WEIGHT: float = 0.10
     VIDEO_SAMPLE_FRAMES: int = 16  # frames to sample per video for inference
     EXIFTOOL_PATH: str = ""  # full path to ExifTool binary; empty = metadata write disabled

database.py DELETED Viewed

@@ -1,30 +0,0 @@
-from sqlalchemy import create_engine
-from sqlalchemy.orm import DeclarativeBase, sessionmaker
-from config import settings
-engine = create_engine(
-    settings.DATABASE_URL,
-    connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {},
-    pool_pre_ping=True,
-    pool_recycle=300,
-)
-SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
-class Base(DeclarativeBase):
-    pass
-def get_db():
-    db = SessionLocal()
-    try:
-        yield db
-    finally:
-        db.close()
-def init_db():
-    from db import models  # noqa: F401
-    Base.metadata.create_all(bind=engine)

datasets/__init__.py DELETED Viewed

File without changes

datasets/build_manifest.py DELETED Viewed

@@ -1,93 +0,0 @@
-"""Build a unified train/val/test manifest (70/15/15) across all dataset buckets.
-Expected input layout (produced by the other scripts in this package):
-    data_root/
-      real/
-        ffpp_youtube/*.jpg          # frames from FFPP original_sequences
-        ffhq/*.jpg                  # FFHQ thumbnails
-      fake/
-        ffpp_deepfakes/*.jpg
-        ffpp_face2face/*.jpg
-        ffpp_faceswap/*.jpg
-        ffpp_neuraltextures/*.jpg
-        ffpp_faceshifter/*.jpg
-        dfdc/*.jpg
-The manifest is stratified by (label, source) so FFHQ stays represented
-in val/test.
-Usage:
-    python -m backend.training.datasets.build_manifest \
-        --data ./data --out ./data/manifest.csv --seed 42
-"""
-from __future__ import annotations
-import argparse
-import csv
-import random
-from collections import defaultdict
-from pathlib import Path
-IMG_EXTS = {".jpg", ".jpeg", ".png"}
-def collect(data_root: Path) -> list[tuple[str, str, str]]:
-    rows: list[tuple[str, str, str]] = []
-    for label in ("real", "fake"):
-        label_root = data_root / label
-        if not label_root.exists():
-            continue
-        for source_dir in sorted(p for p in label_root.iterdir() if p.is_dir()):
-            for img in source_dir.rglob("*"):
-                if img.suffix.lower() in IMG_EXTS and img.is_file():
-                    rows.append((str(img.resolve()), label, source_dir.name))
-    return rows
-def split(rows: list[tuple[str, str, str]], seed: int) -> dict[str, list[tuple[str, str, str]]]:
-    buckets: dict[tuple[str, str], list[tuple[str, str, str]]] = defaultdict(list)
-    for r in rows:
-        buckets[(r[1], r[2])].append(r)
-    rng = random.Random(seed)
-    out = {"train": [], "val": [], "test": []}
-    for key, items in buckets.items():
-        rng.shuffle(items)
-        n = len(items)
-        n_train = int(0.70 * n)
-        n_val = int(0.15 * n)
-        out["train"].extend(items[:n_train])
-        out["val"].extend(items[n_train : n_train + n_val])
-        out["test"].extend(items[n_train + n_val :])
-    return out
-def main() -> None:
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--data", required=True, type=Path)
-    ap.add_argument("--out", required=True, type=Path)
-    ap.add_argument("--seed", type=int, default=42)
-    args = ap.parse_args()
-    rows = collect(args.data)
-    if not rows:
-        raise SystemExit(f"No images found under {args.data}")
-    splits = split(rows, args.seed)
-    args.out.parent.mkdir(parents=True, exist_ok=True)
-    with args.out.open("w", newline="", encoding="utf-8") as f:
-        w = csv.writer(f)
-        w.writerow(["path", "label", "source", "split"])
-        for name, items in splits.items():
-            for path, label, source in items:
-                w.writerow([path, label, source, name])
-    summary = {k: len(v) for k, v in splits.items()}
-    print(f"Manifest: {args.out}")
-    print(f"Totals: {summary} (overall {sum(summary.values())})")
-if __name__ == "__main__":
-    main()

datasets/download_dfdc_sample.py DELETED Viewed

@@ -1,44 +0,0 @@
-"""Download a sample of the DFDC (Deepfake Detection Challenge) Preview dataset.
-The full DFDC is ~470GB; the *preview* release (~5GB, Kaggle) is enough for
-diversity augmentation alongside FFPP.
-Requires the Kaggle CLI (`pip install kaggle`) and ~/.kaggle/kaggle.json.
-Usage:
-    python -m backend.training.datasets.download_dfdc_sample --output ./data/dfdc_preview
-"""
-from __future__ import annotations
-import argparse
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-def main() -> None:
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--output", required=True, type=Path)
-    ap.add_argument(
-        "--competition",
-        default="deepfake-detection-challenge",
-        help="Kaggle competition slug (default: deepfake-detection-challenge preview).",
-    )
-    args = ap.parse_args()
-    kaggle = shutil.which("kaggle")
-    if kaggle is None:
-        print("Kaggle CLI not found. Install with: pip install kaggle", file=sys.stderr)
-        print("Then place kaggle.json in ~/.kaggle/ (chmod 600).", file=sys.stderr)
-        sys.exit(2)
-    args.output.mkdir(parents=True, exist_ok=True)
-    cmd = [kaggle, "competitions", "download", "-c", args.competition, "-p", str(args.output)]
-    print("Running:", " ".join(cmd))
-    subprocess.run(cmd, check=True)
-    print(f"Downloaded to {args.output}. Unzip with: unzip *.zip")
-if __name__ == "__main__":
-    main()

datasets/download_ffhq.py DELETED Viewed

@@ -1,49 +0,0 @@
-"""Download the FFHQ 128x128 thumbnail subset from the official Google Drive mirror.
-Pulls up to N images (default 10k) into the `real` bucket of the training set.
-Falls back to the NVlabs 'ffhq-dataset' helper if available; otherwise expects
-user to run the manual download once.
-Usage:
-    python -m backend.training.datasets.download_ffhq --output ./data/real/ffhq -n 10000
-"""
-from __future__ import annotations
-import argparse
-import shutil
-import subprocess
-import sys
-from pathlib import Path
-def try_nvlabs_helper(output: Path, num: int) -> bool:
-    """Prefer the official ffhq-dataset downloader if installed."""
-    helper = shutil.which("ffhq-dataset")
-    if helper is None:
-        return False
-    cmd = [helper, "--json", "ffhq-dataset-v2.json", "--thumbs", "--num_threads", "4"]
-    print("Running:", " ".join(cmd))
-    subprocess.run(cmd, cwd=output, check=False)
-    return True
-def main() -> None:
-    ap = argparse.ArgumentParser()
-    ap.add_argument("--output", required=True, type=Path)
-    ap.add_argument("-n", "--num", type=int, default=10000)
-    args = ap.parse_args()
-    args.output.mkdir(parents=True, exist_ok=True)
-    if try_nvlabs_helper(args.output, args.num):
-        return
-    print("[!] `ffhq-dataset` helper not installed.")
-    print("    Install via: pip install ffhq-dataset  (requires gdown)")
-    print("    Or download thumbnails128x128.zip manually from:")
-    print("      https://github.com/NVlabs/ffhq-dataset")
-    print(f"    Extract into: {args.output}")
-    sys.exit(1)
-if __name__ == "__main__":
-    main()

datasets/extract_frames.py DELETED Viewed

@@ -1,90 +0,0 @@
-"""Convert FFPP / DFDC videos -> 16 sampled frames at 224x224 RGB.
-Usage:
-    python -m backend.training.datasets.extract_frames \
-        --input ./ffpp_data/original_sequences/youtube/raw/videos \
-        --output ./ffpp_data/frames/real \
-        --label real --frames 16 --size 224
-"""
-from __future__ import annotations
-import argparse
-import csv
-from pathlib import Path
-import cv2
-import numpy as np
-from tqdm import tqdm
-def sample_frame_indices(total: int, n: int) -> list[int]:
-    if total <= 0:
-        return []
-    if total <= n:
-        return list(range(total))
-    step = total / float(n)
-    return [min(total - 1, int(step * i + step / 2)) for i in range(n)]
-def extract_from_video(path: Path, out_dir: Path, n: int, size: int) -> int:
-    cap = cv2.VideoCapture(str(path))
-    if not cap.isOpened():
-        return 0
-    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    indices = set(sample_frame_indices(total, n))
-    out_dir.mkdir(parents=True, exist_ok=True)
-    saved = 0
-    i = 0
-    while True:
-        ok, frame = cap.read()
-        if not ok:
-            break
-        if i in indices:
-            frame = cv2.resize(frame, (size, size), interpolation=cv2.INTER_AREA)
-            cv2.imwrite(str(out_dir / f"{path.stem}_f{i:06d}.jpg"), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
-            saved += 1
-        i += 1
-    cap.release()
-    return saved
-def main() -> None:
-    ap = argparse.ArgumentParser(description="Sample N frames per video and resize.")
-    ap.add_argument("--input", required=True, type=Path, help="Directory of .mp4 videos (recursive).")
-    ap.add_argument("--output", required=True, type=Path, help="Directory to write .jpg frames.")
-    ap.add_argument("--label", required=True, choices=["real", "fake"], help="Label tag for manifest.")
-    ap.add_argument("--frames", type=int, default=16)
-    ap.add_argument("--size", type=int, default=224)
-    ap.add_argument("--manifest", type=Path, default=None, help="Optional CSV manifest append path.")
-    args = ap.parse_args()
-    videos = [p for p in args.input.rglob("*.mp4")]
-    if not videos:
-        print(f"No .mp4 found under {args.input}")
-        return
-    rows: list[tuple[str, str, str]] = []
-    total_frames = 0
-    for vid in tqdm(videos, desc=f"extract[{args.label}]"):
-        rel_out = args.output / vid.stem
-        saved = extract_from_video(vid, rel_out, args.frames, args.size)
-        total_frames += saved
-        if args.manifest is not None:
-            for jpg in rel_out.glob("*.jpg"):
-                rows.append((str(jpg), args.label, vid.stem))
-    if args.manifest is not None and rows:
-        args.manifest.parent.mkdir(parents=True, exist_ok=True)
-        new_file = not args.manifest.exists()
-        with args.manifest.open("a", newline="", encoding="utf-8") as f:
-            w = csv.writer(f)
-            if new_file:
-                w.writerow(["path", "label", "source_video"])
-            w.writerows(rows)
-    print(f"Done. Videos: {len(videos)}, frames written: {total_frames}")
-if __name__ == "__main__":
-    main()

datasets/procure_all.ps1 DELETED Viewed

@@ -1,40 +0,0 @@
-# Phase 11.1 orchestrator for Windows (PowerShell)
-$ErrorActionPreference = "Stop"
-$ROOT = if ($env:ROOT) { $env:ROOT } else { ".\data" }
-$FFPP = if ($env:FFPP) { $env:FFPP } else { ".\ffpp_data" }
-New-Item -ItemType Directory -Force -Path "$ROOT\real" | Out-Null
-New-Item -ItemType Directory -Force -Path "$ROOT\fake" | Out-Null
-New-Item -ItemType Directory -Force -Path $FFPP | Out-Null
-Write-Host "1. FaceForensics++ (highly compressed c40, 10 videos only) -- requires TOS keypress"
-python backend\scripts\download_ffpp.py $FFPP -d all -c c40 -t videos -n 10
-Write-Host "2. Frame extraction: real (original youtube)"
-python -m backend.training.datasets.extract_frames `
-    --input "$FFPP\original_sequences\youtube\c40\videos" `
-    --output "$ROOT\real\ffpp_youtube" --label real --frames 4 --size 224
-Write-Host "3. Frame extraction: fakes (each manipulation family)"
-$Families = @("Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures", "FaceShifter")
-foreach ($fam in $Families) {
-    $famLower = $fam.ToLower()
-    python -m backend.training.datasets.extract_frames `
-        --input "$FFPP\manipulated_sequences\$fam\c40\videos" `
-        --output "$ROOT\fake\ffpp_$famLower" --label fake --frames 4 --size 224
-}
-Write-Host "4. FFHQ thumbnails (real - limited to 100 items)"
-python -m backend.training.datasets.download_ffhq --output "$ROOT\real\ffhq" -n 100
-Write-Host "6. DFDC preview sample (fake+real)"
-python -m backend.training.datasets.download_dfdc_sample --output "$ROOT\_dfdc_raw"
-Write-Host "NOTE: You will need to manually unzip + sort DFDC into $ROOT\fake\dfdc and $ROOT\real\dfdc"
-Write-Host "7. Build manifest"
-python -m backend.training.datasets.build_manifest `
-    --data $ROOT --out "$ROOT\manifest.csv" --seed 42
-Write-Host "Phase 11.1 complete. See $ROOT\manifest.csv"

datasets/procure_all.sh DELETED Viewed

@@ -1,37 +0,0 @@
-#!/usr/bin/env bash
-# Phase 11.1 orchestrator: download + frame-extract + manifest.
-# Total disk target: ~120k labeled images. Expect 60-80GB intermediate, ~30GB frames.
-set -euo pipefail
-ROOT="${ROOT:-./data}"
-FFPP="${FFPP:-./ffpp_data}"
-mkdir -p "$ROOT/real" "$ROOT/fake" "$FFPP"
-# 1. FaceForensics++ (raw, videos) -- requires TOS keypress
-python backend/scripts/download_ffpp.py "$FFPP" -d all -c raw -t videos
-# 2. Frame extraction: real (original youtube)
-python -m backend.training.datasets.extract_frames \
-    --input  "$FFPP/original_sequences/youtube/raw/videos" \
-    --output "$ROOT/real/ffpp_youtube" --label real --frames 16 --size 224
-# 3. Frame extraction: fakes (each manipulation family)
-for fam in Deepfakes Face2Face FaceSwap NeuralTextures FaceShifter; do
-    python -m backend.training.datasets.extract_frames \
-        --input  "$FFPP/manipulated_sequences/$fam/raw/videos" \
-        --output "$ROOT/fake/ffpp_${fam,,}" --label fake --frames 16 --size 224
-done
-# 4. FFHQ thumbnails (real)
-python -m backend.training.datasets.download_ffhq --output "$ROOT/real/ffhq" -n 10000
-# 6. DFDC preview sample (fake+real) -- needs Kaggle creds
-python -m backend.training.datasets.download_dfdc_sample --output "$ROOT/_dfdc_raw"
-# NOTE: unzip + sort into $ROOT/fake/dfdc  and  $ROOT/real/dfdc  per DFDC metadata.json
-# 7. Build manifest
-python -m backend.training.datasets.build_manifest \
-    --data "$ROOT" --out "$ROOT/manifest.csv" --seed 42
-echo "Phase 11.1 complete. See $ROOT/manifest.csv"

db/database.py CHANGED Viewed

@@ -1,28 +1,26 @@
-from sqlalchemy import create_engine
 from sqlalchemy.orm import DeclarativeBase, sessionmaker
 from config import settings
-_is_postgres = not settings.DATABASE_URL.startswith("sqlite")
 engine = create_engine(
     settings.DATABASE_URL,
-    # SQLite needs check_same_thread=False; Postgres doesn't support it
-    connect_args={"check_same_thread": False} if not _is_postgres else {},
-    # Neon (and other serverless Postgres) silently drops idle SSL connections.
-    # pool_pre_ping=True: test each connection before use and transparently
-    # reconnect if the server closed it — eliminates "SSL connection has been
-    # closed unexpectedly" 500s.
-    pool_pre_ping=_is_postgres,
-    # Recycle connections every 5 min so we never hold a connection past Neon's
-    # idle timeout (~5–10 min depending on plan).
-    pool_recycle=300 if _is_postgres else -1,
-    # Keep pool small — HF free tier is single-process; Neon free tier has a
-    # max-connection limit.
-    pool_size=5 if _is_postgres else 5,
-    max_overflow=2 if _is_postgres else 10,
 )
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
@@ -40,5 +38,31 @@ def get_db():
 def init_db():
     from db import models  # noqa: F401
     Base.metadata.create_all(bind=engine)

+from sqlalchemy import create_engine, event
 from sqlalchemy.orm import DeclarativeBase, sessionmaker
 from config import settings
 engine = create_engine(
     settings.DATABASE_URL,
+    connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {},
+    pool_pre_ping=True,
+    pool_recycle=300,
 )
+if settings.DATABASE_URL.startswith("sqlite"):
+    @event.listens_for(engine, "connect")
+    def _sqlite_on_connect(dbapi_conn, _):
+        # Enforce FK constraints (needed for ON DELETE SET NULL) + WAL for better
+        # concurrent reads while a writer is active.
+        cur = dbapi_conn.cursor()
+        cur.execute("PRAGMA foreign_keys=ON")
+        cur.execute("PRAGMA journal_mode=WAL")
+        cur.close()
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
 def init_db():
     from db import models  # noqa: F401
+    from sqlalchemy import inspect, text
     Base.metadata.create_all(bind=engine)
+    # Phase 19.4 — lightweight in-place migration for new columns.
+    # Alembic is overkill here; just ALTER TABLE when a new column is missing.
+    insp = inspect(engine)
+    if "analyses" in insp.get_table_names():
+        existing = {c["name"] for c in insp.get_columns("analyses")}
+        additions = {
+            "media_hash": "VARCHAR(64)",
+            "media_path": "VARCHAR(512)",
+            "thumbnail_url": "VARCHAR(512)",
+        }
+        with engine.begin() as conn:
+            for col, ddl in additions.items():
+                if col not in existing:
+                    conn.execute(text(f"ALTER TABLE analyses ADD COLUMN {col} {ddl}"))
+            # Indices (CREATE INDEX IF NOT EXISTS is SQLite+Postgres safe)
+            for ddl in (
+                "CREATE INDEX IF NOT EXISTS ix_analyses_media_hash ON analyses (media_hash)",
+                "CREATE INDEX IF NOT EXISTS ix_record_user_created ON analyses (user_id, created_at)",
+                "CREATE INDEX IF NOT EXISTS ix_report_analysis ON reports (analysis_id)",
+            ):
+                try:
+                    conn.execute(text(ddl))
+                except Exception:  # noqa: BLE001
+                    pass

db/models.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from datetime import datetime
-from sqlalchemy import DateTime, ForeignKey, Integer, String, Text
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from db.database import Base
@@ -13,7 +13,7 @@ class User(Base):
     email: Mapped[str] = mapped_column(String(255), unique=True, index=True, nullable=False)
     password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
     name: Mapped[str | None] = mapped_column(String(255), nullable=True)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
     analyses: Mapped[list["AnalysisRecord"]] = relationship(back_populates="user")
@@ -22,16 +22,26 @@ class AnalysisRecord(Base):
     __tablename__ = "analyses"
     id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
-    user_id: Mapped[int | None] = mapped_column(ForeignKey("users.id"), nullable=True)
     media_type: Mapped[str] = mapped_column(String(32), nullable=False)  # image|video|text|screenshot
     verdict: Mapped[str] = mapped_column(String(32), nullable=False)
     authenticity_score: Mapped[float] = mapped_column(nullable=False)
     result_json: Mapped[str] = mapped_column(Text, nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
     user: Mapped["User | None"] = relationship(back_populates="analyses")
     report: Mapped["Report | None"] = relationship(back_populates="analysis", uselist=False)
 class Report(Base):
     __tablename__ = "reports"
@@ -39,7 +49,11 @@ class Report(Base):
     id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
     analysis_id: Mapped[int] = mapped_column(ForeignKey("analyses.id"), nullable=False)
     file_path: Mapped[str] = mapped_column(String(512), nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
-    expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
     analysis: Mapped["AnalysisRecord"] = relationship(back_populates="report")

+from datetime import datetime, timezone
+from sqlalchemy import DateTime, ForeignKey, Index, Integer, String, Text
 from sqlalchemy.orm import Mapped, mapped_column, relationship
 from db.database import Base
     email: Mapped[str] = mapped_column(String(255), unique=True, index=True, nullable=False)
     password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
     name: Mapped[str | None] = mapped_column(String(255), nullable=True)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
     analyses: Mapped[list["AnalysisRecord"]] = relationship(back_populates="user")
     __tablename__ = "analyses"
     id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
+    user_id: Mapped[int | None] = mapped_column(
+        ForeignKey("users.id", ondelete="SET NULL"), nullable=True,
+    )
     media_type: Mapped[str] = mapped_column(String(32), nullable=False)  # image|video|text|screenshot
     verdict: Mapped[str] = mapped_column(String(32), nullable=False)
     authenticity_score: Mapped[float] = mapped_column(nullable=False)
     result_json: Mapped[str] = mapped_column(Text, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    # Phase 19.1 / 19.2 — SHA-256 dedup + object storage
+    media_hash: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
+    media_path: Mapped[str | None] = mapped_column(String(512), nullable=True)
+    thumbnail_url: Mapped[str | None] = mapped_column(String(512), nullable=True)
     user: Mapped["User | None"] = relationship(back_populates="analyses")
     report: Mapped["Report | None"] = relationship(back_populates="analysis", uselist=False)
+    __table_args__ = (
+        Index("ix_record_user_created", "user_id", "created_at"),
+    )
 class Report(Base):
     __tablename__ = "reports"
     id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
     analysis_id: Mapped[int] = mapped_column(ForeignKey("analyses.id"), nullable=False)
     file_path: Mapped[str] = mapped_column(String(512), nullable=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
+    expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
     analysis: Mapped["AnalysisRecord"] = relationship(back_populates="report")
+    __table_args__ = (
+        Index("ix_report_analysis", "analysis_id"),
+    )

deepshield.db-shm ADDED Viewed

Binary file (32.8 kB). View file

deepshield.db-wal ADDED Viewed

Binary file (86.6 kB). View file

deepshield_13_5bcf1328.pdf DELETED Viewed

@@ -1,148 +0,0 @@
-%PDF-1.4
-%���� ReportLab Generated PDF document (opensource)
-1 0 obj
-<<
-/F1 2 0 R /F2 3 0 R /F3 5 0 R
->>
-endobj
-2 0 obj
-<<
-/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
->>
-endobj
-3 0 obj
-<<
-/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
->>
-endobj
-4 0 obj
-<<
-/Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 17 0 R /Resources <<
-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
->> /Rotate 0 /Trans <<
->>
-  /Type /Page
->>
-endobj
-5 0 obj
-<<
-/BaseFont /Symbol /Name /F3 /Subtype /Type1 /Type /Font
->>
-endobj
-6 0 obj
-<<
-/Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 17 0 R /Resources <<
-/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
->> /Rotate 0 /Trans <<
->>
-  /Type /Page
->>
-endobj
-7 0 obj
-<<
-/Outlines 9 0 R /PageMode /UseNone /Pages 17 0 R /Type /Catalog
->>
-endobj
-8 0 obj
-<<
-/Author () /CreationDate (D:20260415181653+05'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260415181653+05'00') /Producer (xhtml2pdf <https://github.com/xhtml2pdf/xhtml2pdf/>)
-  /Subject () /Title (DeepShield Analysis Report \204 7771f496-45b1-4c97-8a1a-d9d2492ca67d) /Trapped /False
->>
-endobj
-9 0 obj
-<<
-/Count 3 /First 10 0 R /Last 10 0 R /Type /Outlines
->>
-endobj
-10 0 obj
-<<
-/Count -4 /Dest [ 4 0 R /Fit ] /First 11 0 R /Last 16 0 R /Parent 9 0 R /Title (DeepShield Analysis Report)
->>
-endobj
-11 0 obj
-<<
-/Dest [ 4 0 R /Fit ] /Next 12 0 R /Parent 10 0 R /Title (Verdict)
->>
-endobj
-12 0 obj
-<<
-/Count -2 /Dest [ 4 0 R /Fit ] /First 13 0 R /Last 14 0 R /Next 15 0 R /Parent 10 0 R
-  /Prev 11 0 R /Title (Text Classification)
->>
-endobj
-13 0 obj
-<<
-/Dest [ 4 0 R /Fit ] /Next 14 0 R /Parent 12 0 R /Title (Sensationalism Signals)
->>
-endobj
-14 0 obj
-<<
-/Dest [ 4 0 R /Fit ] /Parent 12 0 R /Prev 13 0 R /Title (Extracted Keywords)
->>
-endobj
-15 0 obj
-<<
-/Dest [ 4 0 R /Fit ] /Next 16 0 R /Parent 10 0 R /Prev 12 0 R /Title (Trusted Source Cross-Reference \(1\))
->>
-endobj
-16 0 obj
-<<
-/Dest [ 6 0 R /Fit ] /Parent 10 0 R /Prev 15 0 R /Title (Processing Summary)
->>
-endobj
-17 0 obj
-<<
-/Count 2 /Kids [ 4 0 R 6 0 R ] /Type /Pages
->>
-endobj
-18 0 obj
-<<
-/Filter [ /ASCII85Decode /FlateDecode ] /Length 1750
->>
-stream
-Gb"/(9lo&I&A@sBlm4G[Acr2Y4p^$ca2t\gAsuiHo\c,I9gURE8lSA3M>qu?,XkR;()9nE&%0G$"Ts\%gUFdJ0E[3iXSb#I!k]Slq-+&^_fu5V&-:f'>`[5155TjpXI_!]U"iQd1qrcX0jNK021sk.K_S`f[kfkaR[pr2$LLU)UX&`3>7R17rJ3t':B_<4Kk*Grr8\a:5/Z<<[I]mbfHq28c@Y+3O)t)0k@mu0K^fiq^N*(u.%T.'jl<s/Nh4He2l7^V7l^6+r/e]g]la.!>S?L^o+>>SgBV8H:sX>5A0-l`)&\h4Lk6L5I=)ArV#_bh%^>M_c,"jSErfH[2A&CfKtLn_&K3h)!u;:i'6.H*(apE@/QWkIgF*OaTZ"ZT=me'_?iN-hL[(uHeb"'/B!\/7d068ieW>Y3P8NcsU#;"%eOe_!^-"Xsc?9a'H,u4"nMEm$3F[>c1S8J!`Sh;Ye8pG>de>ac3KpI*&j-(`*[@OB&i#OgJSl=(I-'<c@@S(D;k%W_$;Jl?$^4Y-G*rH-Rk_h_*=&9o`q/eu[3o$--Zc#XoX(sA&CI7RqS'cWBhG2:+ODa!):O6`^NT((K7(:%BVJ3=F%emKe-WmK3EIie5ZAbGXt^Hf,[uurZtImn"m<3AaU$p)@,./&T/aMg@_t-oU(Al5HTNb;0J4E-fqZg*4Y/o@,5%"0ObY@,kKsQdk#2'pZOD8tZrghVcMH[#FI&3f.,FmGKKKNo9?B[@`=FkP`:=oo>;4Vs.^rc%L+kt99^Gd]mfUsWoLD02jLH*WUl.Pb(oF^j?7RUN!m&Us22M!@A<RB<?,"#orPd]<&>ld**8+J._-f-FEVm$t<`HO6GNqd_[bhJ&8qK0d-ZKt;EB60u<VCgOQ;8F:jeCp]E2HpO&5==e.Z2c5.#%nBkfCHsrt>d0-2Z<CdP%-(PZ=R(ET3u6<D1@I(u[6LMn;M%:K3fl4ls;SX'd>:*Z]IT(dG)'7QU\#<V$$AmO6;HncG;?UO[<qf,QJem^o.f$D3^V'_h3dF.f82/[@>u^ecY/FgdnO#RWf_=Js*t;iiO?'fQ:g&@nC/Xhu.;&o1b+?_6-Z%i4;1H5GAUag0*4LfL'2;Sl`["O/H6p>jU\SO4%Ffq^-']m<b(Mo1Vg;h"E$f8Z?_AL@bH31kAKY%KEP\PmsdK2MJ^Dfb%0.sgc_9*[9&'t*;+>uUp/PKbuj>J71&Mh5t,WF_k&]O@P+do^;.WV"r6Kkb#5`,aF$-adPdc+'072](pse[q;.^?I#Q#kci1Qr9Z_U:Q_lQ53n!nIBHrchNfMeP-HF*=<22XdSrZ8j>sP4CR1SEP\Ge.aCh(VEW.)F'<]`"gVnaq<<]K,.uCIMlUqSgV3U</GlN`:3?Ft9S-uHH\_0/'rV&dUBe&=8^c)"F#b/Te`H6Yn1DnZc?T$IiaKe%'S][\*'W-]E<4.cnD8?.XB5)khib.oe$NkDa0D^I+$2a=[rbp"D3eQQqq@TO]aNHTMcGM3B3cn9,9'giRF__Y[<^:+bB3]sACEq,A$s%=n\8Vk/OM\c,W"mZ11,MaZ61]7"M`X1/qmcr-hH,#8+udNN9@p:IAM="9:b-RnD&FAVj^G'kW4tPgO+M25'hLH])Ped#fB*fOs>Te;V8("S^2/7e`3>4E]],alEY#@T-dG.(=/^7(s[bh3%omN/'WKl<"q_K`T7$VrMt.GfckX6]1EfAB]1F6o6g>\:2Etf)rD.XNrRc2pgl"Hr<(1MCd%~>endstream
-endobj
-19 0 obj
-<<
-/Filter [ /ASCII85Decode /FlateDecode ] /Length 1251
->>
-stream
-Gau`R;01GN&:Vs/fU'm&SZsB\Z>@pd[^l$Ne'"!6Hco+&(^1n<bt7%'s8H%#$m^MQApR0<`)taLn([eaAHiiuRK&mT!C!?!I`[+[8FM*9+s?gk^Sb`ESFuBheu'`^-k@VZQnjgqaj:g4M2J-c)%`([:iWt%O9mV9ZO6(4"\bX`WWWGJ,s27(iVrdq]@Q&`bX7t`KV@dkk1#U3_]/$nF6>.H%;Q95P;kU[/"Vgs.N%@'=M6kAJN1afF&?E_+rA+1KE+S:4],1QpOr^qg01e<#d,;@\e=!\1-*,1T[41J&^DSg86dC5.#&+tMiZhie$%p]f=sWJ!9ni#^ZR?Gp5lVJY,M<YHnZf[nt2A3ZtRV6dLh4C-*^gI%O$[,o&o;u7[Nu/XEmkj&m4-UHNFF#I0VCUiaS-$S2Gs[@(=.(Fg-V>W+]dGA*V*5[2WS\gs>9t%t32b/^W)[_+r7&3kOLD>8WTI508QU_ZkVRb*l"j_,ie@Wk/$,J'=rjAsRr^aIAp,g4N\@rcW@_7fV)G7.f:C\2aDCnK2"(-Yh-fNKV4ogPJ_Bbno/AG^W)=l`02mHESBSd,2MW2Q,8S^O,7f_^Pj+'$c\[n!'TZ'8A[[6$M/6Vlo9egXU318J0Zl;rXSYgM=-\-3TecfRc]m]FKNI.=E4amT3\PSaWQi;TtrPVN"#t`E;<R<T0FHF)>bkNM&M.:/OC)MK2$$?Jp$`SY/%t"jbj6*+.%6.71qjEsp)j@\0#RIF/1!&^q"O7Ou;8DL^2(?$>18.AWa`<qQ;FS*8d605U,LRjPYl%CQZ"EZ)d6ggmR/\emf.%.#K=ZXlPbU\40kfi-URgEX``iXe1pOV?N=StFNQ>H$Fi,Ak&SQPl+Y^;rG>nArp/_q%9B[r]_;\_^p'[__7OH7)iuf]c[rld?RB/M<r(<QsU%pNedj)1NmPM-_fL1VD1tNQL&@c-=<:"`[Vpojg6J[HJ4:,T\L_]InN3jJke4J(kV<hYN(d]b#E=":iOW#=k#-U%PKO/p'+,)f951AW&jRK9')Q>rP3T8Xk7<ZOVAq$3lpK6YL6tc'D2V%1G(jM8"TncWs=[!hW2(D30g$5(Q/MN1htIgRt\ADhN@$l202Af7(c#1P6?P("GPEU+>VY%=qG1""FA,mioCp,lF3^-AZtKRg/NFX>&kA^rZpnFA<r!,IA42rZQ6YFrrrLL)tME=&"E=g6gSrChSiOfRe!l*<?[tTYGRI@6&N"%Fn3=3;X6Dm0TH~>endstream
-endobj
-xref
-0 20
-0000000000 65535 f
-0000000061 00000 n
-0000000112 00000 n
-0000000219 00000 n
-0000000331 00000 n
-0000000536 00000 n
-0000000613 00000 n
-0000000818 00000 n
-0000000903 00000 n
-0000001223 00000 n
-0000001296 00000 n
-0000001426 00000 n
-0000001514 00000 n
-0000001667 00000 n
-0000001770 00000 n
-0000001869 00000 n
-0000001999 00000 n
-0000002098 00000 n
-0000002164 00000 n
-0000004006 00000 n
-trailer
-<<
-/ID
-[<8e273c2672d813e3cd44109eb1edd604><8e273c2672d813e3cd44109eb1edd604>]
-% ReportLab generated PDF document -- digest (opensource)
-/Info 8 0 R
-/Root 7 0 R
-/Size 20
->>
-startxref
-5349
-%%EOF

deps.py DELETED Viewed

@@ -1,46 +0,0 @@
-from __future__ import annotations
-from fastapi import Depends, Header, HTTPException, status
-from sqlalchemy.orm import Session
-from db.database import get_db
-from db.models import User
-from services.auth_service import decode_token, get_user
-def _extract_bearer(authorization: str | None) -> str | None:
-    if not authorization:
-        return None
-    parts = authorization.split()
-    if len(parts) != 2 or parts[0].lower() != "bearer":
-        return None
-    return parts[1]
-def get_current_user(
-    authorization: str | None = Header(default=None),
-    db: Session = Depends(get_db),
-) -> User:
-    token = _extract_bearer(authorization)
-    if not token:
-        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Missing bearer token")
-    payload = decode_token(token)
-    if not payload or "sub" not in payload:
-        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired token")
-    user = get_user(db, int(payload["sub"]))
-    if not user:
-        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found")
-    return user
-def optional_current_user(
-    authorization: str | None = Header(default=None),
-    db: Session = Depends(get_db),
-) -> User | None:
-    token = _extract_bearer(authorization)
-    if not token:
-        return None
-    payload = decode_token(token)
-    if not payload or "sub" not in payload:
-        return None
-    return get_user(db, int(payload["sub"]))

download_ffpp.py DELETED Viewed

@@ -1,261 +0,0 @@
-#!/usr/bin/env python
-""" Downloads FaceForensics++ and Deep Fake Detection public data release
-Example usage:
-    see -h or https://github.com/ondyari/FaceForensics
-"""
-# -*- coding: utf-8 -*-
-import argparse
-import os
-import urllib
-import urllib.request
-import tempfile
-import time
-import sys
-import json
-import random
-from tqdm import tqdm
-from os.path import join
-# URLs and filenames
-FILELIST_URL = 'misc/filelist.json'
-DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
-DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]
-# Parameters
-DATASETS = {
-    'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
-    'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
-    'original': 'original_sequences/youtube',
-    'DeepFakeDetection_original': 'original_sequences/actors',
-    'Deepfakes': 'manipulated_sequences/Deepfakes',
-    'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
-    'Face2Face': 'manipulated_sequences/Face2Face',
-    'FaceShifter': 'manipulated_sequences/FaceShifter',
-    'FaceSwap': 'manipulated_sequences/FaceSwap',
-    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
-    }
-ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
-                'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
-                'NeuralTextures']
-COMPRESSION = ['raw', 'c23', 'c40']
-TYPE = ['videos', 'masks', 'models']
-SERVERS = ['EU', 'EU2', 'CA']
-def parse_args():
-    parser = argparse.ArgumentParser(
-        description='Downloads FaceForensics v2 public data release.',
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument('output_path', type=str, help='Output directory.')
-    parser.add_argument('-d', '--dataset', type=str, default='all',
-                        help='Which dataset to download, either pristine or '
-                             'manipulated data or the downloaded youtube '
-                             'videos.',
-                        choices=list(DATASETS.keys()) + ['all']
-                        )
-    parser.add_argument('-c', '--compression', type=str, default='raw',
-                        help='Which compression degree. All videos '
-                             'have been generated with h264 with a varying '
-                             'codec. Raw (c0) videos are lossless compressed.',
-                        choices=COMPRESSION
-                        )
-    parser.add_argument('-t', '--type', type=str, default='videos',
-                        help='Which file type, i.e. videos, masks, for our '
-                             'manipulation methods, models, for Deepfakes.',
-                        choices=TYPE
-                        )
-    parser.add_argument('-n', '--num_videos', type=int, default=None,
-                        help='Select a number of videos number to '
-                             "download if you don't want to download the full"
-                             ' dataset.')
-    parser.add_argument('--server', type=str, default='EU',
-                        help='Server to download the data from. If you '
-                             'encounter a slow download speed, consider '
-                             'changing the server.',
-                        choices=SERVERS
-                        )
-    args = parser.parse_args()
-    # URLs
-    server = args.server
-    if server == 'EU':
-        server_url = 'http://canis.vc.in.tum.de:8100/'
-    elif server == 'EU2':
-        server_url = 'http://kaldir.vc.in.tum.de/faceforensics/'
-    elif server == 'CA':
-        server_url = 'http://falas.cmpt.sfu.ca:8100/'
-    else:
-        raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))
-    args.tos_url = server_url + 'webpage/FaceForensics_TOS.pdf'
-    args.base_url = server_url + 'v3/'
-    args.deepfakes_model_url = server_url + 'v3/manipulated_sequences/' + \
-                               'Deepfakes/models/'
-    return args
-def download_files(filenames, base_url, output_path, report_progress=True):
-    os.makedirs(output_path, exist_ok=True)
-    if report_progress:
-        filenames = tqdm(filenames)
-    for filename in filenames:
-        download_file(base_url + filename, join(output_path, filename))
-def reporthook(count, block_size, total_size):
-    global start_time
-    if count == 0:
-        start_time = time.time()
-        return
-    duration = time.time() - start_time
-    progress_size = int(count * block_size)
-    speed = int(progress_size / (1024 * duration))
-    percent = int(count * block_size * 100 / total_size)
-    sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
-                     (percent, progress_size / (1024 * 1024), speed, duration))
-    sys.stdout.flush()
-def download_file(url, out_file, report_progress=False):
-    out_dir = os.path.dirname(out_file)
-    if not os.path.isfile(out_file):
-        fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
-        f = os.fdopen(fh, 'w')
-        f.close()
-        if report_progress:
-            urllib.request.urlretrieve(url, out_file_tmp,
-                                       reporthook=reporthook)
-        else:
-            urllib.request.urlretrieve(url, out_file_tmp)
-        os.rename(out_file_tmp, out_file)
-    else:
-        tqdm.write('WARNING: skipping download of existing file ' + out_file)
-def main(args):
-    # TOS
-    print('By pressing any key to continue you confirm that you have agreed '\
-          'to the FaceForensics terms of use as described at:')
-    print(args.tos_url)
-    print('***')
-    print('Press any key to continue, or CTRL-C to exit.')
-    _ = input('')
-    # Extract arguments
-    c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
-    c_type = args.type
-    c_compression = args.compression
-    num_videos = args.num_videos
-    output_path = args.output_path
-    os.makedirs(output_path, exist_ok=True)
-    # Check for special dataset cases
-    for dataset in c_datasets:
-        dataset_path = DATASETS[dataset]
-        # Special cases
-        if 'original_youtube_videos' in dataset:
-            # Here we download the original youtube videos zip file
-            print('Downloading original youtube videos.')
-            if not 'info' in dataset_path:
-                print('Please be patient, this may take a while (~40gb)')
-                suffix = ''
-            else:
-            	suffix = 'info'
-            download_file(args.base_url + '/' + dataset_path,
-                          out_file=join(output_path,
-                                        'downloaded_videos{}.zip'.format(
-                                            suffix)),
-                          report_progress=True)
-            return
-        # Else: regular datasets
-        print('Downloading {} of dataset "{}"'.format(
-            c_type, dataset_path
-        ))
-        # Get filelists and video lenghts list from server
-        if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
-        	filepaths = json.loads(urllib.request.urlopen(args.base_url + '/' +
-                DEEPFEAKES_DETECTION_URL).read().decode("utf-8"))
-        	if 'actors' in dataset_path:
-        		filelist = filepaths['actors']
-        	else:
-        		filelist = filepaths['DeepFakesDetection']
-        elif 'original' in dataset_path:
-            # Load filelist from server
-            file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
-                FILELIST_URL).read().decode("utf-8"))
-            filelist = []
-            for pair in file_pairs:
-            	filelist += pair
-        else:
-            # Load filelist from server
-            file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
-                FILELIST_URL).read().decode("utf-8"))
-            # Get filelist
-            filelist = []
-            for pair in file_pairs:
-                filelist.append('_'.join(pair))
-                if c_type != 'models':
-                    filelist.append('_'.join(pair[::-1]))
-        # Maybe limit number of videos for download
-        if num_videos is not None and num_videos > 0:
-        	print('Downloading the first {} videos'.format(num_videos))
-        	filelist = filelist[:num_videos]
-        # Server and local paths
-        dataset_videos_url = args.base_url + '{}/{}/{}/'.format(
-            dataset_path, c_compression, c_type)
-        dataset_mask_url = args.base_url + '{}/{}/videos/'.format(
-            dataset_path, 'masks', c_type)
-        if c_type == 'videos':
-            dataset_output_path = join(output_path, dataset_path, c_compression,
-                                       c_type)
-            print('Output path: {}'.format(dataset_output_path))
-            filelist = [filename + '.mp4' for filename in filelist]
-            download_files(filelist, dataset_videos_url, dataset_output_path)
-        elif c_type == 'masks':
-            dataset_output_path = join(output_path, dataset_path, c_type,
-                                       'videos')
-            print('Output path: {}'.format(dataset_output_path))
-            if 'original' in dataset:
-                if args.dataset != 'all':
-                    print('Only videos available for original data. Aborting.')
-                    return
-                else:
-                    print('Only videos available for original data. '
-                          'Skipping original.\n')
-                    continue
-            if 'FaceShifter' in dataset:
-                print('Masks not available for FaceShifter. Aborting.')
-                return
-            filelist = [filename + '.mp4' for filename in filelist]
-            download_files(filelist, dataset_mask_url, dataset_output_path)
-        # Else: models for deepfakes
-        else:
-            if dataset != 'Deepfakes' and c_type == 'models':
-                print('Models only available for Deepfakes. Aborting')
-                return
-            dataset_output_path = join(output_path, dataset_path, c_type)
-            print('Output path: {}'.format(dataset_output_path))
-            # Get Deepfakes models
-            for folder in tqdm(filelist):
-                folder_filelist = DEEPFAKES_MODEL_NAMES
-                # Folder paths
-                folder_base_url = args.deepfakes_model_url + folder + '/'
-                folder_dataset_output_path = join(dataset_output_path,
-                                                  folder)
-                download_files(folder_filelist, folder_base_url,
-                               folder_dataset_output_path,
-                               report_progress=False)   # already done
-if __name__ == "__main__":
-    args = parse_args()
-    main(args)

ela_service.py DELETED Viewed

@@ -1,88 +0,0 @@
-"""Error Level Analysis (ELA) — Phase 12.1
-Re-saves an image at a fixed JPEG quality and diffs against the original to reveal
-per-pixel manipulation artifacts. Regions that were recently edited will show
-higher error levels than untouched areas.
-"""
-from __future__ import annotations
-import base64
-import io
-import cv2
-import numpy as np
-from loguru import logger
-from PIL import Image
-def _compute_ela(pil_img: Image.Image, quality: int = 90, scale: float = 15.0) -> np.ndarray:
-    """Return an ELA difference map as a uint8 (H,W,3) RGB array.
-    Args:
-        pil_img: Input image (any format — converted to RGB internally).
-        quality: JPEG re-save quality level (lower = more aggressive compression).
-        scale: Amplification factor for the difference (higher = more contrast).
-    Returns:
-        Difference image as uint8 (H,W,3) array.
-    """
-    rgb = pil_img.convert("RGB")
-    # Re-save at specified JPEG quality into an in-memory buffer
-    buf = io.BytesIO()
-    rgb.save(buf, format="JPEG", quality=quality)
-    buf.seek(0)
-    resaved = Image.open(buf).convert("RGB")
-    original_arr = np.array(rgb, dtype=np.float32)
-    resaved_arr = np.array(resaved, dtype=np.float32)
-    # Per-pixel absolute difference, amplified
-    diff = np.abs(original_arr - resaved_arr) * scale
-    diff = np.clip(diff, 0, 255).astype(np.uint8)
-    return diff
-def generate_ela_base64(pil_img: Image.Image, quality: int = 90, scale: float = 15.0) -> str:
-    """Produce a base64 data-URL PNG of the ELA difference map.
-    Regions with higher error levels (brighter in the output) are more likely
-    to have been digitally manipulated.
-    """
-    diff = _compute_ela(pil_img, quality=quality, scale=scale)
-    buf = io.BytesIO()
-    Image.fromarray(diff).save(buf, format="PNG")
-    b64 = base64.b64encode(buf.getvalue()).decode("ascii")
-    logger.info(f"ELA map generated ({diff.shape[1]}x{diff.shape[0]})")
-    return f"data:image/png;base64,{b64}"
-def generate_blended_ela_base64(
-    pil_img: Image.Image,
-    gradcam_weight: float = 0.6,
-    ela_weight: float = 0.4,
-    quality: int = 90,
-    scale: float = 15.0,
-) -> str:
-    """Blend Grad-CAM heatmap overlay with ELA at specified weights.
-    This is a utility for the 'blended' mode — it composites the ELA
-    difference map on top of the original image for visual clarity.
-    """
-    rgb = pil_img.convert("RGB")
-    original_arr = np.array(rgb, dtype=np.float32)
-    ela_arr = _compute_ela(pil_img, quality=quality, scale=scale).astype(np.float32)
-    # Blend: overlay ELA on the original for visual context
-    blended = np.clip(original_arr * 0.5 + ela_arr * 0.5, 0, 255).astype(np.uint8)
-    buf = io.BytesIO()
-    Image.fromarray(blended).save(buf, format="PNG")
-    b64 = base64.b64encode(buf.getvalue()).decode("ascii")
-    logger.info(f"Blended ELA generated ({blended.shape[1]}x{blended.shape[0]})")
-    return f"data:image/png;base64,{b64}"

exif_service.py DELETED Viewed

@@ -1,129 +0,0 @@
-"""EXIF Metadata Extraction — Phase 12.2
-Extracts camera metadata from uploaded images and computes a trust adjustment
-score: presence of authentic camera metadata lowers fake probability, while
-evidence of editing software raises it.
-"""
-from __future__ import annotations
-from typing import Optional
-from loguru import logger
-from PIL import Image
-from PIL.ExifTags import TAGS, GPSTAGS
-from schemas.common import ExifSummary
-# Software strings that suggest post-processing / generation
-_SUSPICIOUS_SOFTWARE = {
-    "adobe photoshop", "photoshop", "gimp", "affinity photo",
-    "stable diffusion", "midjourney", "dall-e", "comfyui",
-    "automatic1111", "invokeai",
-}
-# Software strings that are normal camera firmware
-_CAMERA_SOFTWARE = {
-    "ver.", "firmware", "camera", "dji", "gopro",
-}
-def _decode_gps(gps_info: dict) -> Optional[str]:
-    """Decode EXIF GPSInfo dict into a human-readable lat/lon string."""
-    try:
-        def _to_decimal(values, ref):
-            d, m, s = [float(v) for v in values]
-            decimal = d + m / 60.0 + s / 3600.0
-            if ref in ("S", "W"):
-                decimal = -decimal
-            return decimal
-        lat = _to_decimal(gps_info.get(2, (0, 0, 0)), gps_info.get(1, "N"))
-        lon = _to_decimal(gps_info.get(4, (0, 0, 0)), gps_info.get(3, "E"))
-        return f"{lat:.6f}, {lon:.6f}"
-    except Exception:
-        return None
-def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
-    """Extract EXIF metadata and compute a trust adjustment score.
-    Trust adjustment logic:
-    - Valid Make + Model + DateTimeOriginal → -15 (more likely real camera photo)
-    - GPS info present → -5 additional (real photos often have GPS)
-    - Suspicious editing software detected → +10 (more likely manipulated)
-    - No EXIF at all → 0 (inconclusive — many platforms strip EXIF)
-    """
-    summary = ExifSummary()
-    try:
-        exif_data = pil_img._getexif()
-    except Exception:
-        exif_data = None
-    if not exif_data:
-        # Try exifread as fallback for formats Pillow doesn't handle well
-        try:
-            import exifread
-            from io import BytesIO
-            tags = exifread.process_file(BytesIO(raw_bytes), details=False)
-            if tags:
-                summary.make = str(tags.get("Image Make", "")).strip() or None
-                summary.model = str(tags.get("Image Model", "")).strip() or None
-                summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
-                summary.software = str(tags.get("Image Software", "")).strip() or None
-                summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
-        except ImportError:
-            logger.debug("exifread not installed, skipping fallback EXIF extraction")
-        except Exception as e:
-            logger.debug(f"exifread fallback failed: {e}")
-    else:
-        # Decode Pillow EXIF
-        decoded = {}
-        for tag_id, value in exif_data.items():
-            tag_name = TAGS.get(tag_id, tag_id)
-            decoded[tag_name] = value
-        summary.make = str(decoded.get("Make", "")).strip() or None
-        summary.model = str(decoded.get("Model", "")).strip() or None
-        summary.datetime_original = str(decoded.get("DateTimeOriginal", "")).strip() or None
-        summary.software = str(decoded.get("Software", "")).strip() or None
-        summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
-        # GPS
-        gps_raw = decoded.get("GPSInfo")
-        if gps_raw and isinstance(gps_raw, dict):
-            gps_decoded = {}
-            for k, v in gps_raw.items():
-                gps_decoded[GPSTAGS.get(k, k)] = v
-            summary.gps_info = _decode_gps(gps_decoded)
-    # ── Trust adjustment scoring ──
-    adjustment = 0
-    reasons = []
-    has_camera_meta = summary.make and summary.model and summary.datetime_original
-    if has_camera_meta:
-        adjustment -= 15
-        reasons.append("valid camera metadata (Make/Model/DateTime)")
-    if summary.gps_info:
-        adjustment -= 5
-        reasons.append("GPS coordinates present")
-    if summary.software:
-        sw_lower = summary.software.lower()
-        if any(s in sw_lower for s in _SUSPICIOUS_SOFTWARE):
-            adjustment += 10
-            reasons.append(f"editing software detected: {summary.software}")
-        elif any(s in sw_lower for s in _CAMERA_SOFTWARE):
-            adjustment -= 2
-            reasons.append("camera firmware in Software field")
-    summary.trust_adjustment = adjustment
-    summary.trust_reason = "; ".join(reasons) if reasons else "no EXIF metadata found"
-    logger.info(f"EXIF extracted: make={summary.make}, model={summary.model}, "
-                f"adjustment={adjustment} ({summary.trust_reason})")
-    return summary

file_handler.py DELETED Viewed

@@ -1,96 +0,0 @@
-from __future__ import annotations
-import io
-import os
-import tempfile
-from typing import Iterable
-from fastapi import HTTPException, UploadFile, status
-from config import settings
-IMAGE_MAGIC_BYTES: dict[bytes, str] = {
-    b"\xff\xd8\xff": "image/jpeg",
-    b"\x89PNG\r\n\x1a\n": "image/png",
-    b"RIFF": "image/webp",  # partial; WEBP has 'RIFF....WEBP'
-}
-def _detect_mime_by_magic(head: bytes) -> str | None:
-    for sig, mime in IMAGE_MAGIC_BYTES.items():
-        if head.startswith(sig):
-            if mime == "image/webp" and b"WEBP" not in head[:16]:
-                continue
-            return mime
-    return None
-async def read_upload_bytes(
-    file: UploadFile,
-    allowed_mimes: Iterable[str],
-    max_size_mb: int,
-) -> tuple[bytes, str]:
-    """Read an UploadFile into memory after validating type and size.
-    Returns (raw_bytes, detected_mime). Raises HTTPException on failure.
-    """
-    data = await file.read()
-    size_mb = len(data) / (1024 * 1024)
-    if size_mb > max_size_mb:
-        raise HTTPException(
-            status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
-            detail=f"File too large ({size_mb:.1f} MB > {max_size_mb} MB)",
-        )
-    mime = _detect_mime_by_magic(data[:16]) or (file.content_type or "")
-    if mime not in allowed_mimes:
-        raise HTTPException(
-            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
-            detail=f"Unsupported type '{mime}'. Allowed: {list(allowed_mimes)}",
-        )
-    return data, mime
-def bytes_to_buffer(data: bytes) -> io.BytesIO:
-    return io.BytesIO(data)
-async def save_upload_to_tempfile(
-    file: UploadFile,
-    allowed_mimes: Iterable[str],
-    max_size_mb: int,
-    suffix: str = ".mp4",
-) -> tuple[str, str]:
-    """Stream an UploadFile to a temp file on disk. Returns (path, mime).
-    MIME is taken from the client's content_type (no magic-byte check for videos).
-    Caller is responsible for deleting the temp file.
-    """
-    mime = (file.content_type or "").lower()
-    if mime not in allowed_mimes:
-        raise HTTPException(
-            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
-            detail=f"Unsupported type '{mime}'. Allowed: {list(allowed_mimes)}",
-        )
-    max_bytes = max_size_mb * 1024 * 1024
-    fd, path = tempfile.mkstemp(suffix=suffix, prefix="ds_vid_")
-    written = 0
-    try:
-        with os.fdopen(fd, "wb") as out:
-            while True:
-                chunk = await file.read(1024 * 1024)
-                if not chunk:
-                    break
-                written += len(chunk)
-                if written > max_bytes:
-                    raise HTTPException(
-                        status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
-                        detail=f"File too large (> {max_size_mb} MB)",
-                    )
-                out.write(chunk)
-    except Exception:
-        try:
-            os.unlink(path)
-        except OSError:
-            pass
-        raise
-    return path, mime

generate_colab_nb.py DELETED Viewed

@@ -1,213 +0,0 @@
-import nbformat as nbf
-import os
-nb = nbf.v4.new_notebook()
-text = """\
-# DeepShield: FaceForensics++ ViT Training
-Run this entirely in Google Colab.
-**Before running**:
-1. Go to `Runtime` -> `Change runtime type` -> select **T4 GPU**.
-2. Run the cells below sequentially.
-"""
-code_install = """\
-!pip install timm transformers datasets accelerate evaluate opencv-python
-"""
-code_ffpp = """\
-# We create the download script inside the Colab environment
-download_script = '''#!/usr/bin/env python
-import argparse
-import os
-import urllib.request
-import tempfile
-import time
-import sys
-import json
-from tqdm import tqdm
-from os.path import join
-FILELIST_URL = 'misc/filelist.json'
-DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
-DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]
-DATASETS = {
-    'original': 'original_sequences/youtube',
-    'Deepfakes': 'manipulated_sequences/Deepfakes',
-    'Face2Face': 'manipulated_sequences/Face2Face',
-    'FaceShifter': 'manipulated_sequences/FaceShifter',
-    'FaceSwap': 'manipulated_sequences/FaceSwap',
-    'NeuralTextures': 'manipulated_sequences/NeuralTextures'
-}
-ALL_DATASETS = ['original', 'Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']
-COMPRESSION = ['raw', 'c23', 'c40']
-TYPE = ['videos']
-def download_file(url, out_file):
-    os.makedirs(os.path.dirname(out_file), exist_ok=True)
-    if not os.path.isfile(out_file):
-        urllib.request.urlretrieve(url, out_file)
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('output_path', type=str)
-    parser.add_argument('-d', '--dataset', type=str, default='all')
-    parser.add_argument('-c', '--compression', type=str, default='c40')
-    parser.add_argument('-t', '--type', type=str, default='videos')
-    parser.add_argument('-n', '--num_videos', type=int, default=50) # Small amount for tutorial
-    args = parser.parse_args()
-    base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'
-    datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
-    for dataset in datasets:
-        dataset_path = DATASETS[dataset]
-        print(f'Downloading {args.compression} of {dataset}')
-        file_pairs = json.loads(urllib.request.urlopen(base_url + FILELIST_URL).read().decode("utf-8"))
-        filelist = []
-        if 'original' in dataset_path:
-            for pair in file_pairs:
-                filelist += pair
-        else:
-            for pair in file_pairs:
-                filelist.append('_'.join(pair))
-                filelist.append('_'.join(pair[::-1]))
-        filelist = filelist[:args.num_videos]
-        dataset_videos_url = base_url + f'{dataset_path}/{args.compression}/{args.type}/'
-        dataset_output_path = join(args.output_path, dataset_path, args.compression, args.type)
-        for filename in tqdm(filelist):
-            download_file(dataset_videos_url + filename + ".mp4", join(dataset_output_path, filename + ".mp4"))
-if __name__ == "__main__":
-    main()
-'''
-with open("download_ffpp.py", "w") as f:
-    f.write(download_script)
-!python download_ffpp.py ./data -d all -c c40 -t videos -n 50
-"""
-code_extract = """\
-import cv2
-import os
-import glob
-from tqdm import tqdm
-def extract_frames(video_folder, output_folder, label, max_frames=4):
-    os.makedirs(output_folder, exist_ok=True)
-    videos = glob.glob(os.path.join(video_folder, "*.mp4"))
-    for vid_path in tqdm(videos, desc=f"Extracting {label}"):
-        vid_name = os.path.basename(vid_path).replace('.mp4','')
-        cap = cv2.VideoCapture(vid_path)
-        count = 0
-        while cap.isOpened() and count < max_frames:
-            ret, frame = cap.read()
-            if not ret: break
-            frame = cv2.resize(frame, (224, 224))
-            out_path = os.path.join(output_folder, f"{vid_name}_f{count}.jpg")
-            cv2.imwrite(out_path, frame)
-            count += 1
-        cap.release()
-# Extract Real
-extract_frames('./data/original_sequences/youtube/c40/videos', './dataset/real', 'real')
-# Extract Fakes
-fakes = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']
-for f in fakes:
-    extract_frames(f'./data/manipulated_sequences/{f}/c40/videos', './dataset/fake', 'fake')
-"""
-code_train = """\
-import numpy as np
-from datasets import load_dataset
-from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
-import torch
-# 1. Load Dataset
-dataset = load_dataset('imagefolder', data_dir='./dataset')
-# Split into train/validation
-dataset = dataset['train'].train_test_split(test_size=0.1)
-# 2. Preprocessor
-model_name_or_path = 'google/vit-base-patch16-224-in21k'
-processor = ViTImageProcessor.from_pretrained(model_name_or_path)
-def transform(example_batch):
-    # Take a list of PIL images and turn them to pixel values
-    inputs = processor([x.convert("RGB") for x in example_batch['image']], return_tensors='pt')
-    inputs['labels'] = example_batch['label']
-    return inputs
-prepared_ds = dataset.with_transform(transform)
-def collate_fn(batch):
-    return {
-        'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
-        'labels': torch.tensor([x['labels'] for x in batch])
-    }
-# 3. Load Model
-labels = dataset['train'].features['label'].names
-model = ViTForImageClassification.from_pretrained(
-    model_name_or_path,
-    num_labels=len(labels),
-    id2label={str(i): c for i, c in enumerate(labels)},
-    label2id={c: str(i) for i, c in enumerate(labels)}
-)
-training_args = TrainingArguments(
-    output_dir="./vit-deepshield",
-    per_device_train_batch_size=16,
-    eval_strategy="steps",
-    num_train_epochs=3,
-    fp16=True, # Mixed precision for speed
-    save_steps=100,
-    eval_steps=100,
-    logging_steps=10,
-    learning_rate=2e-4,
-    save_total_limit=2,
-    remove_unused_columns=False,
-    push_to_hub=False,
-    load_best_model_at_end=True,
-)
-import evaluate
-metric = evaluate.load("accuracy")
-def compute_metrics(p):
-    return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    data_collator=collate_fn,
-    compute_metrics=compute_metrics,
-    train_dataset=prepared_ds["train"],
-    eval_dataset=prepared_ds["test"],
-)
-# 4. Train
-train_results = trainer.train()
-trainer.save_model("deepshield_vit_model")
-processor.save_pretrained("deepshield_vit_model")
-trainer.log_metrics("train", train_results.metrics)
-trainer.save_metrics("train", train_results.metrics)
-trainer.save_state()
-print("Training Complete! The model is saved to ./deepshield_vit_model")
-"""
-nb['cells'] = [
-    nbf.v4.new_markdown_cell(text),
-    nbf.v4.new_code_cell(code_install),
-    nbf.v4.new_code_cell(code_ffpp),
-    nbf.v4.new_code_cell(code_extract),
-    nbf.v4.new_code_cell(code_train)
-]
-with open(r'c:\Users\athar\Desktop\minor2\backend\training\Colab_ViT_Training.ipynb', 'w', encoding='utf-8') as f:
-    nbf.write(nb, f)

heatmap_generator.py DELETED Viewed

@@ -1,164 +0,0 @@
-from __future__ import annotations
-import base64
-import io
-from typing import Optional
-import cv2
-import numpy as np
-import torch
-from loguru import logger
-from PIL import Image
-from pytorch_grad_cam import GradCAMPlusPlus
-from pytorch_grad_cam.utils.image import show_cam_on_image
-from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
-from config import settings
-from models.model_loader import get_model_loader
-class _HFLogitsWrapper(torch.nn.Module):
-    """Wrap a HuggingFace image classification model so forward() returns logits
-    as a plain tensor (pytorch_grad_cam expects tensor outputs, not dicts/dataclasses).
-    """
-    def __init__(self, model: torch.nn.Module) -> None:
-        super().__init__()
-        self.model = model
-    def forward(self, pixel_values: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
-        return self.model(pixel_values=pixel_values).logits
-def _vit_reshape_transform(tensor: torch.Tensor, height: int = 14, width: int = 14) -> torch.Tensor:
-    """Grad-CAM expects (B, C, H, W); ViT hidden states are (B, 1+H*W, C).
-    Drop the CLS token and reshape tokens into a spatial grid.
-    """
-    result = tensor[:, 1:, :]
-    b, n, c = result.shape
-    result = result.reshape(b, height, width, c)
-    result = result.permute(0, 3, 1, 2)  # (B, C, H, W)
-    return result
-def _preprocess_for_cam(pil_img: Image.Image, processor) -> tuple[torch.Tensor, np.ndarray]:
-    """Return (input_tensor, rgb_float_224) where rgb_float_224 is a (H,W,3) float
-    array in [0,1] matching the model input geometry — needed for overlaying.
-    """
-    inputs = processor(images=pil_img, return_tensors="pt")
-    input_tensor = inputs["pixel_values"].to(settings.DEVICE)
-    size = getattr(processor, "size", {"height": 224, "width": 224})
-    h = size.get("height", 224) if isinstance(size, dict) else 224
-    w = size.get("width", 224) if isinstance(size, dict) else 224
-    resized = pil_img.resize((w, h), Image.BILINEAR)
-    rgb = np.array(resized).astype(np.float32) / 255.0  # (H,W,3) in [0,1]
-    return input_tensor, rgb
-def _encode_overlay_to_base64(overlay: np.ndarray) -> str:
-    """Encode a uint8 (H,W,3) RGB overlay to a base64 data-URL PNG."""
-    buf = io.BytesIO()
-    Image.fromarray(overlay).save(buf, format="PNG")
-    b64 = base64.b64encode(buf.getvalue()).decode("ascii")
-    return f"data:image/png;base64,{b64}"
-def _compute_gradcam_pp(
-    pil_img: Image.Image,
-    target_class_idx: Optional[int] = None,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Compute Grad-CAM++ averaged across the last 3 ViT encoder layers.
-    Returns (grayscale_cam, rgb_float) where grayscale_cam is (H,W) in [0,1].
-    """
-    loader = get_model_loader()
-    model, processor = loader.load_image_model()
-    model.eval()
-    for p in model.parameters():
-        p.requires_grad_(True)
-    input_tensor, rgb_float = _preprocess_for_cam(pil_img, processor)
-    grid = int(model.config.image_size / model.config.patch_size)
-    # Average across last 3 ViT encoder layers for smoother heatmaps
-    num_layers = len(model.vit.encoder.layer)
-    last_n = min(3, num_layers)
-    target_layers = [
-        model.vit.encoder.layer[-(i + 1)].layernorm_before
-        for i in range(last_n)
-    ]
-    wrapped = _HFLogitsWrapper(model)
-    targets = None
-    if target_class_idx is not None:
-        targets = [ClassifierOutputTarget(int(target_class_idx))]
-    with GradCAMPlusPlus(
-        model=wrapped,
-        target_layers=target_layers,
-        reshape_transform=lambda t: _vit_reshape_transform(t, grid, grid),
-    ) as cam:
-        grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0]  # (H,W) in [0,1]
-    return grayscale_cam, rgb_float
-def generate_heatmap_base64(
-    pil_img: Image.Image,
-    target_class_idx: Optional[int] = None,
-) -> str:
-    """Produce a base64 data-URL PNG of the Grad-CAM++ overlay for the given image."""
-    grayscale_cam, rgb_float = _compute_gradcam_pp(pil_img, target_class_idx)
-    overlay = show_cam_on_image(rgb_float, grayscale_cam, use_rgb=True)
-    logger.info(f"Heatmap generated ({overlay.shape[0]}x{overlay.shape[1]})")
-    return _encode_overlay_to_base64(overlay)
-def generate_boxes_base64(
-    pil_img: Image.Image,
-    target_class_idx: Optional[int] = None,
-    top_k: int = 5,
-    threshold: float = 0.4,
-) -> str:
-    """Produce bounding boxes around top-K connected components from Grad-CAM++ activation.
-    Renders colored boxes (red/yellow/orange by intensity) on the original image.
-    """
-    grayscale_cam, rgb_float = _compute_gradcam_pp(pil_img, target_class_idx)
-    h, w = rgb_float.shape[:2]
-    base_img = (rgb_float * 255).astype(np.uint8).copy()
-    # Threshold the heatmap to find activated regions
-    binary = (grayscale_cam >= threshold).astype(np.uint8) * 255
-    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    if not contours:
-        logger.info("No significant activation regions found for bounding boxes")
-        return _encode_overlay_to_base64(base_img)
-    # Sort by area descending, take top_k
-    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:top_k]
-    # Color by mean activation intensity within each box
-    for cnt in contours:
-        x, y, bw, bh = cv2.boundingRect(cnt)
-        region_activation = grayscale_cam[y:y + bh, x:x + bw].mean()
-        if region_activation >= 0.7:
-            color = (220, 40, 40)    # red — high suspicion
-        elif region_activation >= 0.5:
-            color = (240, 140, 20)   # orange — medium
-        else:
-            color = (230, 200, 40)   # yellow — lower
-        cv2.rectangle(base_img, (x, y), (x + bw, y + bh), color, 2)
-        label = f"{region_activation * 100:.0f}%"
-        cv2.putText(base_img, label, (x, max(y - 6, 12)),
-                     cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1, cv2.LINE_AA)
-    logger.info(f"Bounding boxes generated: {len(contours)} regions")
-    return _encode_overlay_to_base64(base_img)

image_service.py DELETED Viewed

@@ -1,58 +0,0 @@
-from __future__ import annotations
-import io
-from dataclasses import dataclass
-from typing import Tuple
-import torch
-from loguru import logger
-from PIL import Image
-from config import settings
-from models.model_loader import get_model_loader
-@dataclass
-class ImageClassification:
-    label: str
-    confidence: float
-    all_scores: dict[str, float]
-def load_image_from_bytes(data: bytes) -> Image.Image:
-    img = Image.open(io.BytesIO(data))
-    if img.mode != "RGB":
-        img = img.convert("RGB")
-    return img
-def classify_image(pil_img: Image.Image) -> ImageClassification:
-    """Run the ViT deepfake classifier on a PIL image."""
-    loader = get_model_loader()
-    model, processor = loader.load_image_model()
-    inputs = processor(images=pil_img, return_tensors="pt")
-    inputs = {k: v.to(settings.DEVICE) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = model(**inputs)
-        logits = outputs.logits  # (1, num_labels)
-        probs = torch.softmax(logits, dim=-1)[0]
-    id2label: dict[int, str] = getattr(model.config, "id2label", {})
-    all_scores = {id2label.get(i, str(i)): float(p.item()) for i, p in enumerate(probs)}
-    top_idx = int(torch.argmax(probs).item())
-    top_label = id2label.get(top_idx, str(top_idx))
-    top_conf = float(probs[top_idx].item())
-    logger.info(f"Image classify → {top_label} @ {top_conf:.3f}")
-    return ImageClassification(label=top_label, confidence=top_conf, all_scores=all_scores)
-def preprocess_and_classify(raw_bytes: bytes) -> Tuple[Image.Image, ImageClassification]:
-    """Convenience: decode bytes → PIL → classify. Returns the PIL image too so
-    downstream steps (heatmap, artifact scan) can reuse it.
-    """
-    pil = load_image_from_bytes(raw_bytes)
-    result = classify_image(pil)
-    return pil, result

llm_explainer.py DELETED Viewed

@@ -1,191 +0,0 @@
-"""LLM Explainability Card — Phase 12.3
-Generates a plain-English summary paragraph + 3 key-signal bullets from the
-full analysis payload.  Supports Gemini (default) and OpenAI providers.
-Results are cached per record_id to avoid re-spending tokens.
-"""
-from __future__ import annotations
-import json
-from abc import ABC, abstractmethod
-from functools import lru_cache
-from typing import Any
-from loguru import logger
-from config import settings
-from schemas.common import LLMExplainabilitySummary
-# ── In-memory cache keyed by record_id ──
-_cache: dict[str, LLMExplainabilitySummary] = {}
-_PROMPT_TEMPLATE = """\
-You are DeepShield's explainability engine. Given the JSON analysis payload below,
-write a concise, accessible summary for a non-technical user.
-**Output format (strict JSON only — no markdown fences):**
-{{
-  "paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
-  "bullets": [
-    "<key signal 1>",
-    "<key signal 2>",
-    "<key signal 3>"
-  ]
-}}
-Rules:
-- Be factual. State what the analysis found, not what you speculate.
-- Reference specific indicators (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
-- If the verdict is "Likely Authentic", reassure the user and explain why.
-- If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence.
-- Keep the paragraph under 60 words. Each bullet under 20 words.
-**Analysis payload:**
-{payload_json}
-"""
-class _LLMProvider(ABC):
-    @abstractmethod
-    def generate(self, prompt: str) -> str:
-        """Send prompt to LLM and return raw text response."""
-class _GeminiProvider(_LLMProvider):
-    def __init__(self) -> None:
-        import google.generativeai as genai
-        genai.configure(api_key=settings.LLM_API_KEY)
-        self._model = genai.GenerativeModel(settings.LLM_MODEL)
-    def generate(self, prompt: str) -> str:
-        response = self._model.generate_content(prompt)
-        return response.text
-class _OpenAIProvider(_LLMProvider):
-    def __init__(self) -> None:
-        from openai import OpenAI
-        self._client = OpenAI(api_key=settings.LLM_API_KEY)
-    def generate(self, prompt: str) -> str:
-        response = self._client.chat.completions.create(
-            model=settings.LLM_MODEL,
-            messages=[{"role": "user", "content": prompt}],
-            temperature=0.3,
-            max_tokens=300,
-        )
-        return response.choices[0].message.content
-@lru_cache(maxsize=1)
-def _get_provider() -> _LLMProvider:
-    """Lazy-init the configured LLM provider (singleton)."""
-    provider_name = settings.LLM_PROVIDER.lower()
-    if provider_name == "openai":
-        return _OpenAIProvider()
-    return _GeminiProvider()
-def _parse_llm_response(raw: str) -> tuple[str, list[str]]:
-    """Parse the LLM's JSON response into (paragraph, bullets).
-    Handles cases where the LLM wraps output in markdown fences.
-    """
-    text = raw.strip()
-    # Strip markdown code fences if present
-    if text.startswith("```"):
-        lines = text.split("\n")
-        # Remove first and last fence lines
-        lines = [l for l in lines if not l.strip().startswith("```")]
-        text = "\n".join(lines).strip()
-    parsed = json.loads(text)
-    paragraph = parsed.get("paragraph", "")
-    bullets = parsed.get("bullets", [])
-    if not isinstance(bullets, list):
-        bullets = [str(bullets)]
-    return paragraph, bullets[:3]
-def generate_llm_summary(
-    payload: dict[str, Any],
-    record_id: str | None = None,
-) -> LLMExplainabilitySummary:
-    """Generate an LLM-powered plain-English explanation for an analysis result.
-    Args:
-        payload: The full analysis response dict (verdict, scores, indicators, etc.).
-        record_id: Optional cache key. If provided and cached, returns cached result.
-    Returns:
-        LLMExplainabilitySummary with paragraph, bullets, and model info.
-    """
-    # Check cache
-    if record_id and record_id in _cache:
-        logger.debug(f"LLM summary cache hit for record_id={record_id}")
-        cached = _cache[record_id]
-        cached.cached = True
-        return cached
-    # Guard: no API key configured
-    if not settings.LLM_API_KEY:
-        logger.warning("LLM_API_KEY not set — using deterministic fallback summary")
-        verdict_data = payload.get("verdict", {})
-        label = verdict_data.get("label", "Unknown")
-        score = verdict_data.get("authenticity_score", 50)
-        return LLMExplainabilitySummary(
-            paragraph=f"The DeepShield AI engine has analyzed this media and determined it is '{label}' with an authenticity score of {score}/100. We arrived at this conclusion by passing the file through our deepfake detection algorithms, artifact scanners, and metadata analyzers.",
-            bullets=[
-                f"Overall Authenticity Score: {score}/100",
-                f"Primary Verdict: {label}",
-                "Note: Configure an LLM API key for deeper contextual analysis."
-            ],
-            model_used="static-fallback",
-        )
-    # Strip heavy base64 fields to reduce token usage
-    slim_payload = {k: v for k, v in payload.items()
-                    if k not in ("explainability",)}
-    # Include explainability but strip base64 images
-    if "explainability" in payload and isinstance(payload["explainability"], dict):
-        expl = {k: v for k, v in payload["explainability"].items()
-                if not k.endswith("_base64")}
-        slim_payload["explainability"] = expl
-    prompt = _PROMPT_TEMPLATE.format(payload_json=json.dumps(slim_payload, indent=2, default=str))
-    try:
-        provider = _get_provider()
-        raw_response = provider.generate(prompt)
-        paragraph, bullets = _parse_llm_response(raw_response)
-        summary = LLMExplainabilitySummary(
-            paragraph=paragraph,
-            bullets=bullets,
-            model_used=f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}",
-        )
-        # Cache result
-        if record_id:
-            _cache[record_id] = summary
-        logger.info(f"LLM summary generated via {settings.LLM_PROVIDER}/{settings.LLM_MODEL}")
-        return summary
-    except json.JSONDecodeError as e:
-        logger.error(f"LLM returned unparseable JSON: {e}")
-        return LLMExplainabilitySummary(
-            paragraph="Analysis complete. See the detailed indicators below for specifics.",
-            bullets=["LLM explanation could not be parsed"],
-            model_used=f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}",
-        )
-    except Exception as e:
-        logger.error(f"LLM explainer failed: {e}")
-        return LLMExplainabilitySummary(
-            paragraph="Analysis complete. See the detailed indicators below for specifics.",
-            bullets=["LLM explanation temporarily unavailable"],
-            model_used="error",
-        )

logs/deepshield.log ADDED Viewed

	@@ -0,0 +1,949 @@

+2026-04-22 18:24:59.601 | INFO     | main:lifespan:83 - Starting DeepShield backend
+2026-04-22 18:24:59.655 | INFO     | main:lifespan:85 - Database initialized
+2026-04-22 18:24:59.656 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-22 18:25:06.201 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-22 18:25:06.206 | INFO     | services.report_service:cleanup_expired:151 - Cleaned up 1 expired reports
+2026-04-22 18:26:20.263 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-22 18:26:22.700 | INFO     | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
+2026-04-22 18:26:23.034 | INFO     | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.078 ffpp=n/a eff=0.18335410952568054 → 0.131
+2026-04-22 18:26:28.349 | INFO     | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
+2026-04-22 18:26:28.390 | INFO     | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
+2026-04-22 18:26:29.238 | INFO     | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
+2026-04-22 18:26:29.277 | INFO     | services.ela_service:generate_ela_base64:60 - ELA map generated (256x256)
+2026-04-22 18:26:30.141 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
+2026-04-22 18:26:30.327 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-22 18:26:30.347 | INFO     | api.v1.analyze:analyze_image:214 - Saved AnalysisRecord id=19 score=13 verdict=Very Likely Fake
+2026-04-22 18:26:30.349 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: No module named 'google.generativeai'
+2026-04-22 18:26:30.349 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: No module named 'google.generativeai'
+2026-04-22 18:27:58.805 | INFO     | main:lifespan:93 - Shutting down DeepShield backend
+2026-04-22 18:28:09.692 | INFO     | main:lifespan:83 - Starting DeepShield backend
+2026-04-22 18:28:09.698 | INFO     | main:lifespan:85 - Database initialized
+2026-04-22 18:28:09.698 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-22 18:28:11.556 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-24 01:50:58.220 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-24 01:51:03.592 | INFO     | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
+2026-04-24 01:51:03.887 | INFO     | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.597 ffpp=n/a eff=n/a → 0.597
+2026-04-24 01:51:12.975 | INFO     | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
+2026-04-24 01:51:13.089 | INFO     | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
+2026-04-24 01:51:13.255 | INFO     | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
+2026-04-24 01:51:13.320 | INFO     | services.ela_service:generate_ela_base64:60 - ELA map generated (640x427)
+2026-04-24 01:51:14.648 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 1 regions
+2026-04-24 01:51:14.933 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-24 01:51:14.979 | INFO     | api.v1.analyze:analyze_image:215 - Saved AnalysisRecord id=20 score=40 verdict=Likely Fake
+2026-04-24 01:51:14.982 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: No module named 'google.generativeai'
+2026-04-24 01:51:14.984 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: No module named 'google.generativeai'
+2026-04-24 07:35:53.458 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 07:36:02.194 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 07:36:03.057 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
+2026-04-24 07:36:03.058 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 68 (High) excl=4 caps=3 cb=1 emo=1
+2026-04-24 07:36:03.061 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
+2026-04-24 07:36:05.585 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 07:36:06.959 | INFO     | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=21 text score=15 verdict=Very Likely Fake
+2026-04-24 07:36:08.561 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 51.884484839s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 51
+}
+]
+2026-04-24 07:36:41.979 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-24 07:36:47.524 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-24 07:36:48.484 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-24 07:36:49.759 | INFO     | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
+2026-04-24 07:36:49.848 | INFO     | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.521 ffpp=n/a eff=n/a → 0.521
+2026-04-24 07:36:51.638 | INFO     | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
+2026-04-24 07:36:51.638 | WARNING  | services.artifact_detector:detect_face_based_artifacts:213 - Face-based artifact detection failed: module 'mediapipe' has no attribute 'solutions'
+2026-04-24 07:36:51.649 | INFO     | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
+2026-04-24 07:36:51.696 | INFO     | services.ela_service:generate_ela_base64:60 - ELA map generated (512x512)
+2026-04-24 07:36:52.470 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
+2026-04-24 07:36:52.519 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-24 07:36:52.542 | INFO     | api.v1.analyze:analyze_image:215 - Saved AnalysisRecord id=22 score=48 verdict=Possibly Manipulated
+2026-04-24 07:36:53.674 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 6.748563195s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 6
+}
+]
+2026-04-24 07:36:54.760 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 5.653927512s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 5
+}
+]
+2026-04-24 15:16:36.138 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 15:16:43.946 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 15:16:44.719 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
+2026-04-24 15:16:44.721 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
+2026-04-24 15:16:44.723 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
+2026-04-24 15:16:45.864 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 15:16:47.113 | INFO     | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=23 text score=15 verdict=Very Likely Fake
+2026-04-24 15:16:48.348 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 12.294521515s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 12
+}
+]
+2026-04-24 15:16:48.553 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-24 15:16:50.111 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-24 15:16:51.265 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-24 15:16:52.685 | INFO     | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
+2026-04-24 15:16:52.723 | INFO     | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.517 ffpp=n/a eff=n/a → 0.517
+2026-04-24 15:16:52.735 | INFO     | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
+2026-04-24 15:16:54.934 | WARNING  | services.artifact_detector:detect_face_based_artifacts:211 - Face-based artifact detection failed: module 'mediapipe' has no attribute 'solutions'
+2026-04-24 15:16:54.949 | INFO     | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
+2026-04-24 15:16:54.965 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (256x256)
+2026-04-24 15:16:55.916 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
+2026-04-24 15:16:55.975 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-24 15:16:55.989 | INFO     | api.v1.analyze:analyze_image:214 - Saved AnalysisRecord id=24 score=48 verdict=Possibly Manipulated
+2026-04-24 15:16:56.236 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 4.477916448s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 4
+}
+]
+2026-04-24 15:16:57.419 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 3.282459328s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 3
+}
+]
+2026-04-24 15:16:57.445 | INFO     | models.model_loader:load_ocr_engine:130 - Loading EasyOCR reader (langs: ['en', 'hi'])
+2026-04-24 15:17:27.399 | INFO     | models.model_loader:load_ocr_engine:136 - EasyOCR loaded
+2026-04-24 15:17:27.870 | INFO     | services.screenshot_service:run_ocr:48 - OCR extracted 0 text regions
+2026-04-24 15:17:27.881 | INFO     | api.v1.analyze:analyze_screenshot_endpoint:726 - Saved AnalysisRecord id=25 screenshot score=50 verdict=Possibly Manipulated
+2026-04-24 15:17:28.066 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+Please retry in 32.593323033s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 32
+}
+]
+2026-04-24 15:17:54.819 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 15:18:00.795 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 15:18:00.888 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
+2026-04-24 15:18:00.889 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
+2026-04-24 15:18:00.891 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
+2026-04-24 15:18:01.659 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 15:18:02.878 | INFO     | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=26 text score=15 verdict=Very Likely Fake
+2026-04-24 15:18:03.994 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
+Please retry in 56.638939454s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
+  quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-pro"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+}
+, retry_delay {
+  seconds: 56
+}
+]
+2026-04-24 15:20:38.285 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 15:20:43.929 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 15:20:44.034 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
+2026-04-24 15:20:44.035 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
+2026-04-24 15:20:44.037 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
+2026-04-24 15:20:44.806 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 15:20:46.001 | INFO     | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=27 text score=15 verdict=Very Likely Fake
+2026-04-24 15:20:56.376 | INFO     | services.llm_explainer:generate_llm_summary:175 - LLM summary generated via gemini/gemini-2.5-flash
+2026-04-24 15:33:56.592 | INFO     | api.v1.auth:register:33 - Registered user id=3 email=***@example.com
+2026-04-24 15:33:57.227 | INFO     | api.v1.auth:login:42 - Login user id=3 email=***@example.com
+2026-04-24 15:33:57.553 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 15:34:06.986 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 15:34:07.731 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.997 fake_p=0.997
+2026-04-24 15:34:07.733 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:34:07.736 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:34:09.017 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 15:34:10.285 | INFO     | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=28 text score=30 verdict=Likely Fake
+2026-04-24 15:34:41.718 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
+Please retry in 19.188761533s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-flash"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+  quota_value: 5
+}
+, retry_delay {
+  seconds: 19
+}
+]
+2026-04-24 15:34:41.788 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.997 fake_p=0.997
+2026-04-24 15:34:41.788 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 76 (High) excl=3 caps=2 cb=1 emo=3
+2026-04-24 15:34:41.789 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:34:41.791 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 15:34:43.147 | INFO     | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=29 text score=15 verdict=Very Likely Fake
+2026-04-24 15:34:43.555 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
+Please retry in 17.333464233s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-flash"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+  quota_value: 5
+}
+, retry_delay {
+  seconds: 17
+}
+]
+2026-04-24 15:34:43.615 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.996 fake_p=0.996
+2026-04-24 15:34:43.616 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:34:43.616 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:34:43.618 | WARNING  | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
+2026-04-24 15:34:44.924 | INFO     | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=30 text score=30 verdict=Likely Fake
+2026-04-24 15:34:45.353 | ERROR    | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
+* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
+Please retry in 15.553103918s. [links {
+  description: "Learn more about Gemini API quotas"
+  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
+}
+, violations {
+  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
+  quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
+  quota_dimensions {
+    key: "model"
+    value: "gemini-2.5-flash"
+  }
+  quota_dimensions {
+    key: "location"
+    value: "global"
+  }
+  quota_value: 5
+}
+, retry_delay {
+  seconds: 15
+}
+]
+2026-04-24 15:43:27.438 | INFO     | api.v1.auth:register:33 - Registered user id=4 email=***@example.com
+2026-04-24 15:43:27.463 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 15:43:33.684 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 15:43:33.796 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
+2026-04-24 15:43:33.797 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:43:33.799 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:43:35.106 | INFO     | models.model_loader:load_spacy_nlp:96 - spaCy en_core_web_sm loaded
+2026-04-24 15:43:35.120 | INFO     | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
+2026-04-24 15:43:36.284 | INFO     | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=31 text score=31 verdict=Likely Fake
+2026-04-24 15:43:36.352 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
+2026-04-24 15:43:36.352 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:43:36.353 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:43:36.370 | INFO     | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
+2026-04-24 15:43:37.567 | INFO     | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=32 text score=31 verdict=Likely Fake
+2026-04-24 15:43:47.549 | INFO     | services.llm_explainer:generate_llm_summary:207 - LLM summary generated via gemini/gemini-2.5-flash
+2026-04-24 15:43:47.614 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
+2026-04-24 15:43:47.614 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:43:47.615 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:43:47.630 | INFO     | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
+2026-04-24 15:43:49.134 | INFO     | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=33 text score=31 verdict=Likely Fake
+2026-04-24 15:44:11.346 | WARNING  | services.llm_explainer:mark_rate_limited:42 - LLM rate-limited — pausing all LLM calls for 300s
+2026-04-24 15:44:11.346 | WARNING  | services.llm_explainer:generate_llm_summary:220 - LLM quota hit (ResourceExhausted) — circuit open for 300s
+2026-04-24 15:44:11.352 | WARNING  | services.llm_explainer:mark_rate_limited:42 - LLM rate-limited — pausing all LLM calls for 5s
+2026-04-24 15:44:11.404 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
+2026-04-24 15:44:11.404 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:44:11.405 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:44:12.724 | INFO     | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=34 text score=30 verdict=Likely Fake
+2026-04-24 15:57:39.916 | INFO     | api.v1.auth:register:33 - Registered user id=5 email=***@example.com
+2026-04-24 15:57:39.958 | INFO     | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
+2026-04-24 15:57:46.475 | INFO     | models.model_loader:load_text_model:65 - Text model loaded
+2026-04-24 15:57:46.582 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
+2026-04-24 15:57:46.584 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:57:46.586 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:57:47.954 | INFO     | models.model_loader:load_spacy_nlp:96 - spaCy en_core_web_sm loaded
+2026-04-24 15:57:49.166 | INFO     | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=35 text score=30 verdict=Likely Fake
+2026-04-24 15:57:58.130 | INFO     | services.llm_explainer:generate_llm_summary:271 - LLM summary generated via gemini/gemini-2.5-flash
+2026-04-24 15:57:58.196 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
+2026-04-24 15:57:58.197 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:57:58.197 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:57:59.705 | INFO     | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=36 text score=30 verdict=Likely Fake
+2026-04-24 15:58:02.948 | ERROR    | services.llm_explainer:generate_llm_summary:287 - LLM explainer failed: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'This model is currently experiencing high demand. Spikes in demand are usually temporary. Please try again later.', 'status': 'UNAVAILABLE'}}
+2026-04-24 15:58:03.008 | INFO     | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
+2026-04-24 15:58:03.008 | INFO     | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
+2026-04-24 15:58:03.009 | INFO     | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
+2026-04-24 15:58:04.488 | INFO     | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=37 text score=30 verdict=Likely Fake
+2026-04-24 15:59:52.694 | INFO     | services.llm_explainer:_get_provider:176 - LLM chain initialized: gemini/gemini-2.5-flash → groq/llama-3.3-70b-versatile
+2026-04-24 15:59:52.695 | INFO     | services.llm_explainer:generate:161 - gemini/gemini-2.5-flash quota hit — failing over to groq/llama-3.3-70b-versatile
+2026-04-24 23:15:36.409 | INFO     | main:lifespan:108 - Starting DeepShield backend
+2026-04-24 23:15:36.470 | INFO     | main:lifespan:110 - Database initialized
+2026-04-24 23:15:36.470 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-24 23:15:46.404 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-24 23:15:57.188 | INFO     | api.v1.analyze:analyze_image:118 - cache hit image sha=6de55b9fc5bd record=19
+2026-04-24 23:16:59.860 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-24 23:17:03.920 | INFO     | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
+2026-04-24 23:17:04.519 | INFO     | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.868 ffpp=n/a eff=0.03269108012318611 → 0.450
+2026-04-24 23:17:04.569 | INFO     | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
+2026-04-24 23:17:13.315 | INFO     | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
+2026-04-24 23:17:16.988 | INFO     | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
+2026-04-24 23:17:17.131 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (800x450)
+2026-04-24 23:17:18.394 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
+2026-04-24 23:17:18.714 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-24 23:17:18.757 | INFO     | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=38 score=45 verdict=Possibly Manipulated
+2026-04-24 23:29:04.622 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-24 23:29:05.312 | INFO     | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Fake | vit=0.767 ffpp=n/a eff=0.36121347546577454 → 0.564
+2026-04-24 23:29:06.604 | INFO     | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
+2026-04-24 23:29:10.091 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (2393x4096)
+2026-04-24 23:29:11.326 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
+2026-04-24 23:29:11.344 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-24 23:29:11.436 | INFO     | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=39 score=44 verdict=Possibly Manipulated
+2026-04-24 23:30:58.303 | ERROR    | api.v1.report:generate:51 - Report generation failed: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
+Traceback (most recent call last):
+  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\threading.py", line 1002, in _bootstrap
+    self._bootstrap_inner()
+    │    └ <function Thread._bootstrap_inner at 0x000001A73BF11A80>
+    └ <WorkerThread(AnyIO worker thread, started 18584)>
+  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\threading.py", line 1045, in _bootstrap_inner
+    self.run()
+    │    └ <function WorkerThread.run at 0x000001A7030349A0>
+    └ <WorkerThread(AnyIO worker thread, started 18584)>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\anyio\_backends\_asyncio.py", line 1002, in run
+    result = context.run(func, *args)
+             │       │   │      └ ()
+             │       │   └ functools.partial(<function generate at 0x000001A7011BA0C0>, db=<sqlalchemy.orm.session.Session object at 0x000001A70D16E390>...
+             │       └ <method 'run' of '_contextvars.Context' objects>
+             └ <_contextvars.Context object at 0x000001A70D16CD40>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\slowapi\extension.py", line 766, in sync_wrapper
+    response = func(*args, **kwargs)
+               │     │       └ {'db': <sqlalchemy.orm.session.Session object at 0x000001A70D16E390>, 'user': None, 'analysis_id': 39, 'request': <starlette....
+               │     └ ()
+               └ <function generate at 0x000001A7011BA160>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\slowapi\extension.py", line 766, in sync_wrapper
+    response = func(*args, **kwargs)
+               │     │       └ {'db': <sqlalchemy.orm.session.Session object at 0x000001A70D16E390>, 'user': None, 'analysis_id': 39, 'request': <starlette....
+               │     └ ()
+               └ <function generate at 0x000001A7011BA020>
+> File "C:\Users\athar\Desktop\minor2\backend\api\v1\report.py", line 49, in generate
+    path = generate_report(record)
+           │               └ <db.models.AnalysisRecord object at 0x000001A70D17A2D0>
+           └ <function generate_report at 0x000001A7011B9D00>
+  File "C:\Users\athar\Desktop\minor2\backend\services\report_service.py", line 119, in generate_report
+    html_to_pdf(html, out_path)
+    │           │     └ WindowsPath('temp_reports/deepshield_39_c2b71295.pdf')
+    │           └ '<!DOCTYPE html>\n<html>\n<head>\n  <meta charset="utf-8" />\n  <title>DeepShield Analysis Report — c9f44067-528d-4e96-9365-2...
+    └ <function html_to_pdf at 0x000001A7011B9C60>
+  File "C:\Users\athar\Desktop\minor2\backend\services\report_service.py", line 107, in html_to_pdf
+    result = pisa.CreatePDF(html, dest=f)
+             │    │         │          └ <_io.BufferedWriter name='temp_reports\\deepshield_39_c2b71295.pdf'>
+             │    │         └ '<!DOCTYPE html>\n<html>\n<head>\n  <meta charset="utf-8" />\n  <title>DeepShield Analysis Report — c9f44067-528d-4e96-9365-2...
+             │    └ <function pisaDocument at 0x000001A7011B9440>
+             └ <module 'xhtml2pdf.pisa' from 'C:\\Users\\athar\\Desktop\\minor2\\backend\\.venv\\Lib\\site-packages\\xhtml2pdf\\pisa.py'>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\document.py", line 196, in pisaDocument
+    doc.build(context.story)
+    │   │     │       └ [PmlParagraph(
+    │   │     │           'dir'
+    │   │     │               'dir'
+    │   │     │           'caseSensitive'
+    │   │     │               'caseSensitive'
+    │   │     │           'encoding'
+    │   │     │               'encoding'
+    │   │     │           'text'
+    │   │     │               'text...
+    │   │     └ <xhtml2pdf.context.pisaContext object at 0x000001A703A22990>
+    │   └ <function BaseDocTemplate.build at 0x000001A77EFA8E00>
+    └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\doctemplate.py", line 1083, in build
+    self.handle_flowable(flowables)
+    │    │               └ [PmlParagraph(
+    │    │                   'dir'
+    │    │                       'dir'
+    │    │                   'caseSensitive'
+    │    │                       'caseSensitive'
+    │    │                   'encoding'
+    │    │                       'encoding'
+    │    │                   'text'
+    │    │                       'text...
+    │    └ <function BaseDocTemplate.handle_flowable at 0x000001A77EFA8B80>
+    └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\doctemplate.py", line 932, in handle_flowable
+    if frame.add(f, canv, trySplit=self.allowSplitting):
+       │     │   │  │              │    └ 1
+       │     │   │  │              └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
+       │     │   │  └ <reportlab.pdfgen.canvas.Canvas object at 0x000001A70D1DED50>
+       │     │   └ PmlTable(
+       │     │      rowHeights=[None],
+       │     │      colWidths=[4.93228346456693, 488.29606299212605],
+       │     │     [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+       │     └ <function Frame._add at 0x000001A77EECDF80>
+       └ <reportlab.platypus.frames.Frame object at 0x000001A70344D6D0>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\frames.py", line 158, in _add
+    w, h = flowable.wrap(aW, h)
+           │        │    │   └ 751.1811023622049
+           │        │    └ 493.228346456693
+           │        └ <function PmlTable.wrap at 0x000001A7011719E0>
+           └ PmlTable(
+              rowHeights=[None],
+              colWidths=[4.93228346456693, 488.29606299212605],
+             [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\xhtml2pdf_reportlab.py", line 858, in wrap
+    return Table.wrap(self, availWidth, availHeight)
+           │     │    │     │           └ 751.1811023622049
+           │     │    │     └ 493.228346456693
+           │     │    └ PmlTable(
+           │     │       rowHeights=[None],
+           │     │       colWidths=[4.93228346456693, 488.29606299212605],
+           │     │      [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+           │     └ <function Table.wrap at 0x000001A77EFAC400>
+           └ <class 'reportlab.platypus.tables.Table'>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 1354, in wrap
+    self._calc(availWidth, availHeight)
+    │    │     │           └ 751.1811023622049
+    │    │     └ 493.228346456693
+    │    └ <function Table._calc at 0x000001A77EFAB600>
+    └ PmlTable(
+       rowHeights=[None],
+       colWidths=[4.93228346456693, 488.29606299212605],
+      [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 740, in _calc
+    self._calc_height(availHeight,availWidth,W=W)
+    │    │            │           │            └ None
+    │    │            │           └ 493.228346456693
+    │    │            └ 751.1811023622049
+    │    └ <function Table._calc_height at 0x000001A77EFAB560>
+    └ PmlTable(
+       rowHeights=[None],
+       colWidths=[4.93228346456693, 488.29606299212605],
+      [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 664, in _calc_height
+    dW,t = self._listCellGeom(v,w or self._listValueWidth(v),s)
+           │    │             │ │    │    │               │  └ <CellStyle '(0, 0)'>
+           │    │             │ │    │    │               └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
+           │    │             │ │    │    └ <function Table._listValueWidth at 0x000001A77EFAB380>
+           │    │             │ │    └ PmlTable(
+           │    │             │ │       rowHeights=[None],
+           │    │             │ │       colWidths=[4.93228346456693, 488.29606299212605],
+           │    │             │ │      [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+           │    │             │ └ 4.93228346456693
+           │    │             └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
+           │    └ <function PmlTable._listCellGeom at 0x000001A701171940>
+           └ PmlTable(
+              rowHeights=[None],
+              colWidths=[4.93228346456693, 488.29606299212605],
+             [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\xhtml2pdf_reportlab.py", line 810, in _listCellGeom
+    return Table._listCellGeom(self, V, w, s, W=W, H=H, aH=aH)
+           │     │             │     │  │  │    │    │     └ 751.1811023622049
+           │     │             │     │  │  │    │    └ None
+           │     │             │     │  │  │    └ None
+           │     │             │     │  │  └ <CellStyle '(0, 0)'>
+           │     │             │     │  └ 4.93228346456693
+           │     │             │     └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
+           │     │             └ PmlTable(
+           │     │                rowHeights=[None],
+           │     │                colWidths=[4.93228346456693, 488.29606299212605],
+           │     │               [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
+           │     └ <function Table._listCellGeom at 0x000001A77EFAB2E0>
+           └ <class 'reportlab.platypus.tables.Table'>
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 490, in _listCellGeom
+    raise ValueError(f'{self.identity()}: flowable given negative availWidth={aW} == width={w} - leftPadding={s.leftPadding} - rightPadding={s.rightPadding}')
+  File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 440, in identity
+    tallest = '(tallest row %d)' % int(max(rh))
+                                           └ [None]
+TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
+2026-04-24 23:44:20.465 | INFO     | api.v1.auth:register:33 - Registered user id=6 email=***@gmail.com
+2026-04-24 23:45:54.152 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-24 23:45:54.595 | INFO     | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.668 ffpp=n/a eff=0.00913542602211237 → 0.339
+2026-04-24 23:45:55.772 | INFO     | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
+2026-04-24 23:45:58.926 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (2268x4032)
+2026-04-24 23:46:00.276 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 2 regions
+2026-04-24 23:46:00.291 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=Google, model=Pixel 7 Pro, adjustment=-20 (valid camera metadata (Make/Model/DateTime); GPS coordinates present)
+2026-04-24 23:46:00.379 | INFO     | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=40 score=14 verdict=Very Likely Fake
+2026-04-24 23:46:00.382 | ERROR    | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
+2026-04-24 23:46:00.386 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
+2026-04-24 23:47:37.291 | INFO     | services.report_service:generate_report:120 - Report generated id=40 path=temp_reports\deepshield_40_3f0f8ff7.pdf size=14978B
+2026-04-24 23:50:59.570 | INFO     | api.v1.auth:login:42 - Login user id=6 email=***@gmail.com
+2026-04-25 02:48:29.295 | INFO     | services.report_service:cleanup_expired:149 - Cleaned up 2 expired reports
+2026-04-25 02:48:29.419 | WARNING  | services.report_service:cleanup_expired:149 - Cleanup failed for temp_reports\deepshield_40_3f0f8ff7.pdf: [WinError 2] The system cannot find the file specified: 'temp_reports\\deepshield_40_3f0f8ff7.pdf'
+2026-04-25 21:48:15.075 | INFO     | main:lifespan:108 - Starting DeepShield backend
+2026-04-25 21:48:15.082 | INFO     | main:lifespan:110 - Database initialized
+2026-04-25 21:48:15.082 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-25 21:48:18.709 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-25 21:48:18.712 | INFO     | main:lifespan:118 - Shutting down DeepShield backend
+2026-04-25 21:52:02.663 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-25 21:52:03.239 | INFO     | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.870 ffpp=n/a eff=0.0529196597635746 → 0.462
+2026-04-25 21:52:04.390 | INFO     | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
+2026-04-25 21:52:04.682 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (1223x640)
+2026-04-25 21:52:05.863 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
+2026-04-25 21:52:05.883 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-25 21:52:05.927 | INFO     | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=41 score=46 verdict=Possibly Manipulated
+2026-04-25 22:02:22.021 | INFO     | main:lifespan:108 - Starting DeepShield backend
+2026-04-25 22:02:22.057 | INFO     | main:lifespan:110 - Database initialized
+2026-04-25 22:02:22.057 | INFO     | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-25 22:02:30.014 | INFO     | models.model_loader:load_image_model:51 - Image model loaded
+2026-04-25 22:13:05.431 | INFO     | api.v1.auth:login:42 - Login user id=6 email=***@gmail.com
+2026-04-25 22:13:28.224 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-25 22:13:28.471 | INFO     | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.694 ffpp=n/a eff=n/a → 0.694
+2026-04-25 22:13:28.859 | INFO     | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
+2026-04-25 22:13:31.674 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (2268x4032)
+2026-04-25 22:13:33.044 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 2 regions
+2026-04-25 22:13:33.062 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=Apple, model=iPhone 16 Pro, adjustment=-20 (valid camera metadata (Make/Model/DateTime); GPS coordinates present)
+2026-04-25 22:13:33.166 | INFO     | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=42 score=11 verdict=Very Likely Fake
+2026-04-25 22:13:33.169 | ERROR    | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
+2026-04-25 22:13:33.171 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
+2026-04-26 22:05:50.626 | INFO     | main:lifespan:108 - Starting DeepShield backend
+2026-04-26 22:05:50.640 | INFO     | main:lifespan:110 - Database initialized
+2026-04-26 22:05:50.641 | INFO     | models.model_loader:load_image_model:44 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
+2026-04-26 22:05:58.170 | INFO     | models.model_loader:load_image_model:52 - Image model loaded
+2026-04-26 22:07:47.526 | WARNING  | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
+2026-04-26 22:07:48.484 | INFO     | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.834 ffpp=n/a eff=0.02755815163254738 → 0.431
+2026-04-26 22:07:50.164 | INFO     | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
+2026-04-26 22:07:50.584 | INFO     | services.ela_service:generate_ela_base64:59 - ELA map generated (1290x1290)
+2026-04-26 22:07:52.661 | INFO     | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 1 regions
+2026-04-26 22:07:52.670 | INFO     | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
+2026-04-26 22:07:52.747 | INFO     | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=43 score=43 verdict=Possibly Manipulated
+2026-04-26 22:07:52.752 | ERROR    | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
+2026-04-26 22:07:52.756 | ERROR    | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
+2026-04-26 22:09:45.469 | INFO     | services.report_service:generate_report:120 - Report generated id=43 path=temp_reports\deepshield_43_262befa5.pdf size=15602B

main.py CHANGED Viewed

@@ -1,17 +1,98 @@
 import asyncio
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from loguru import logger
 from api.router import api_router
 from config import settings
 from db.database import init_db
 from models.model_loader import get_model_loader
 from services.report_service import cleanup_expired
 async def _report_cleanup_loop():
     while True:
         try:
@@ -23,6 +104,7 @@ async def _report_cleanup_loop():
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     logger.info("Starting DeepShield backend")
     init_db()
     logger.info("Database initialized")
@@ -43,16 +125,32 @@ app = FastAPI(
     lifespan=lifespan,
 )
 app.add_middleware(
     CORSMiddleware,
     allow_origins=settings.CORS_ORIGINS,
     allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
 )
 app.include_router(api_router)
 @app.get("/")
 def root():

 import asyncio
+import secrets
+import sys
 from contextlib import asynccontextmanager
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
 from loguru import logger
+from slowapi import _rate_limit_exceeded_handler
+from slowapi.errors import RateLimitExceeded
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import JSONResponse
 from api.router import api_router
 from config import settings
 from db.database import init_db
 from models.model_loader import get_model_loader
+from services.rate_limit import RateLimitContextMiddleware, limiter
 from services.report_service import cleanup_expired
+class ContentLengthLimitMiddleware(BaseHTTPMiddleware):
+    """Reject oversized uploads via Content-Length header before reading body.
+    Saves bandwidth + memory vs letting read_upload_bytes reject post-read."""
+    def __init__(self, app, max_bytes: int) -> None:
+        super().__init__(app)
+        self._max = max_bytes
+    async def dispatch(self, request, call_next):
+        cl = request.headers.get("content-length")
+        if cl and cl.isdigit() and int(cl) > self._max:
+            return JSONResponse(
+                status_code=413,
+                content={"detail": f"Upload exceeds {self._max // (1024 * 1024)} MB limit"},
+            )
+        return await call_next(request)
+# === Phase 15.3 — JWT / CORS / logging hardening ===
+_DEFAULT_JWT_SECRET = "change-me-in-production"
+def _enforce_production_hardening() -> None:
+    """Refuse to start in production with unsafe defaults (Phase 15.3)."""
+    if settings.JWT_SECRET_KEY == _DEFAULT_JWT_SECRET or not settings.JWT_SECRET_KEY:
+        example = secrets.token_urlsafe(48)
+        if settings.DEBUG:
+            logger.warning(
+                "JWT_SECRET_KEY is unset or default — safe in dev only. "
+                f"Set it before deploying. Example: {example}"
+            )
+        else:
+            logger.error(
+                "Refusing to start: JWT_SECRET_KEY is unset or default. "
+                f"Set JWT_SECRET_KEY in your environment. Example: {example}"
+            )
+            sys.exit(1)
+    if "*" in settings.CORS_ORIGINS and not settings.DEBUG:
+        logger.error(
+            "Refusing to start: CORS_ORIGINS contains '*' while allow_credentials=True. "
+            "Set an explicit origin list."
+        )
+        sys.exit(1)
+def _configure_logging() -> None:
+    """Rotate + retain logs, scrub emails."""
+    import re
+    email_re = re.compile(r"([A-Za-z0-9._%+-]+)@([A-Za-z0-9.-]+\.[A-Za-z]{2,})")
+    def _scrub(record):
+        msg = record["message"]
+        record["message"] = email_re.sub(r"***@\2", msg)
+        return True
+    logger.remove()
+    logger.add(sys.stderr, filter=_scrub, level="INFO")
+    logger.add(
+        "logs/deepshield.log",
+        rotation="10 MB",
+        retention="7 days",
+        filter=_scrub,
+        level="INFO",
+        enqueue=True,
+    )
+_configure_logging()
 async def _report_cleanup_loop():
     while True:
         try:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    _enforce_production_hardening()
     logger.info("Starting DeepShield backend")
     init_db()
     logger.info("Database initialized")
     lifespan=lifespan,
 )
+# Phase 15.2 — slowapi rate limiter
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+app.add_middleware(RateLimitContextMiddleware)
+# Phase 15.3 — reject oversized uploads before reading body
+app.add_middleware(ContentLengthLimitMiddleware, max_bytes=settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024)
+# Phase 15.3 — explicit CORS methods/headers (no wildcards with credentials)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=settings.CORS_ORIGINS,
     allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
+    allow_headers=["Authorization", "Content-Type", "Accept", "Origin", "X-Requested-With"],
 )
 app.include_router(api_router)
+# Phase 19.2 — serve stored thumbnails / media under /media/*
+import os as _os
+_media_root = _os.environ.get("MEDIA_ROOT", "./media")
+_os.makedirs(_os.path.join(_media_root, "thumbs"), exist_ok=True)
+app.mount("/media", StaticFiles(directory=_media_root), name="media")
 @app.get("/")
 def root():

media/03/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43.webp ADDED Viewed

models/icpr2020dfdc/blazeface/blazeface.pth → media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg RENAMED Viewed

File without changes

media/50/502e5d7120817956b7ed208987ecad441ef95a527ae8f975340f46669330a27c.jpg ADDED Viewed

models/icpr2020dfdc/blazeface/anchors.npy → media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg RENAMED Viewed

File without changes

media/6d/6de55b9fc5bdc37898418b7c25d29080f32053a1825e3a7dc2a2ff9df1292015.jpg ADDED Viewed

media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg ADDED Viewed

Git LFS Details

SHA256: 7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd
Pointer size: 132 Bytes
Size of remote file: 4.01 MB

media/bf/bf7ec0c425d20a2161b6a55356a869aad486cf7c6a196420b75be117bf8a47cb.webp ADDED Viewed

media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg ADDED Viewed

Git LFS Details

SHA256: c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028
Pointer size: 132 Bytes
Size of remote file: 3.13 MB

media/f0/f0eec5199108c2a4476f9b44aa5454ee0506949b5480b11a6578f2bbcb1f954f.jpg ADDED Viewed

media/f1/f1c22499ba7787be66a12c32ab2991df97fc4d25c88560207367214e75d7463c.jpg ADDED Viewed

media/thumbs/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43_400.jpg ADDED Viewed

media/thumbs/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06_400.jpg ADDED Viewed