ar07xd commited on
Commit
fba30db
·
verified ·
1 Parent(s): cce3df7

Sync from GitHub via hub-sync

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +44 -0
  2. .gitattributes +4 -0
  3. Colab_ViT_Training.ipynb +0 -233
  4. README.md +0 -13
  5. analyze.py +0 -177
  6. api/router.py +1 -0
  7. api/v1/health.py +72 -1
  8. api/v1/history.py +7 -1
  9. api/v1/report.py +41 -6
  10. artifact_detector.py +0 -229
  11. auth.py +0 -30
  12. auth_service.py +0 -67
  13. common.py +0 -88
  14. config.py +34 -1
  15. database.py +0 -30
  16. datasets/__init__.py +0 -0
  17. datasets/build_manifest.py +0 -93
  18. datasets/download_dfdc_sample.py +0 -44
  19. datasets/download_ffhq.py +0 -49
  20. datasets/extract_frames.py +0 -90
  21. datasets/procure_all.ps1 +0 -40
  22. datasets/procure_all.sh +0 -37
  23. db/database.py +41 -17
  24. db/models.py +21 -7
  25. deepshield.db-shm +0 -0
  26. deepshield.db-wal +0 -0
  27. deepshield_13_5bcf1328.pdf +0 -148
  28. deps.py +0 -46
  29. download_ffpp.py +0 -261
  30. ela_service.py +0 -88
  31. exif_service.py +0 -129
  32. file_handler.py +0 -96
  33. generate_colab_nb.py +0 -213
  34. heatmap_generator.py +0 -164
  35. image_service.py +0 -58
  36. llm_explainer.py +0 -191
  37. logs/deepshield.log +949 -0
  38. main.py +100 -2
  39. media/03/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43.webp +0 -0
  40. models/icpr2020dfdc/blazeface/blazeface.pth → media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg +2 -2
  41. media/50/502e5d7120817956b7ed208987ecad441ef95a527ae8f975340f46669330a27c.jpg +0 -0
  42. models/icpr2020dfdc/blazeface/anchors.npy → media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg +2 -2
  43. media/6d/6de55b9fc5bdc37898418b7c25d29080f32053a1825e3a7dc2a2ff9df1292015.jpg +0 -0
  44. media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg +3 -0
  45. media/bf/bf7ec0c425d20a2161b6a55356a869aad486cf7c6a196420b75be117bf8a47cb.webp +0 -0
  46. media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg +3 -0
  47. media/f0/f0eec5199108c2a4476f9b44aa5454ee0506949b5480b11a6578f2bbcb1f954f.jpg +0 -0
  48. media/f1/f1c22499ba7787be66a12c32ab2991df97fc4d25c88560207367214e75d7463c.jpg +0 -0
  49. media/thumbs/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43_400.jpg +0 -0
  50. media/thumbs/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06_400.jpg +0 -0
.env.example ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === DeepShield backend config example ===
2
+
3
+ # Server
4
+ APP_HOST=0.0.0.0
5
+ APP_PORT=8000
6
+ DEBUG=false
7
+ CORS_ORIGINS=["http://localhost:5173"]
8
+
9
+ # === Database ===
10
+ # SQLite (default — zero-config, great for dev / college demo):
11
+ DATABASE_URL=sqlite:///./deepshield.db
12
+ # Postgres (production path — run migrations are applied automatically
13
+ # by init_db via ALTER TABLE when new columns are missing):
14
+ # DATABASE_URL=postgresql+psycopg2://deepshield:CHANGEME@localhost:5432/deepshield
15
+
16
+ # Phase 19.1 — SHA-256 dedup cache TTL (days)
17
+ CACHE_TTL_DAYS=30
18
+
19
+ # Phase 19.2 — object storage root (content-addressed media + thumbnails)
20
+ MEDIA_ROOT=./media
21
+
22
+ # File upload
23
+ MAX_UPLOAD_SIZE_MB=100
24
+ UPLOAD_DIR=./temp_uploads
25
+
26
+ # AI models
27
+ PRELOAD_MODELS=true
28
+ DEVICE=cpu
29
+
30
+ # LLM explainability (Phase 12)
31
+ LLM_PROVIDER=gemini
32
+ LLM_API_KEY=
33
+ LLM_MODEL=gemini-1.5-flash
34
+
35
+ # News lookup (Phase 13)
36
+ NEWS_API_KEY=
37
+
38
+ # Auth (REQUIRED in production — generate with python -c "import secrets; print(secrets.token_urlsafe(48))")
39
+ JWT_SECRET_KEY=change-me-in-production
40
+ JWT_ALGORITHM=HS256
41
+ JWT_EXPIRATION_MINUTES=1440
42
+
43
+ # Optional metadata writer
44
+ EXIFTOOL_PATH=
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg filter=lfs diff=lfs merge=lfs -text
37
+ media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg filter=lfs diff=lfs merge=lfs -text
38
+ media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg filter=lfs diff=lfs merge=lfs -text
39
+ media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg filter=lfs diff=lfs merge=lfs -text
Colab_ViT_Training.ipynb DELETED
@@ -1,233 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "id": "1e0e7b4a",
6
- "metadata": {},
7
- "source": [
8
- "# DeepShield: FaceForensics++ ViT Training \n",
9
- "Run this entirely in Google Colab.\n",
10
- "**Before running**:\n",
11
- "1. Go to `Runtime` -> `Change runtime type` -> select **T4 GPU**.\n",
12
- "2. Run the cells below sequentially.\n"
13
- ]
14
- },
15
- {
16
- "cell_type": "code",
17
- "execution_count": null,
18
- "id": "4fe293e7",
19
- "metadata": {},
20
- "outputs": [],
21
- "source": [
22
- "!pip install timm transformers datasets accelerate evaluate opencv-python\n"
23
- ]
24
- },
25
- {
26
- "cell_type": "code",
27
- "execution_count": null,
28
- "id": "c9387c0f",
29
- "metadata": {},
30
- "outputs": [],
31
- "source": [
32
- "# We create the download script inside the Colab environment\n",
33
- "download_script = '''#!/usr/bin/env python\n",
34
- "import argparse\n",
35
- "import os\n",
36
- "import urllib.request\n",
37
- "import tempfile\n",
38
- "import time\n",
39
- "import sys\n",
40
- "import json\n",
41
- "from tqdm import tqdm\n",
42
- "from os.path import join\n",
43
- "\n",
44
- "FILELIST_URL = 'misc/filelist.json'\n",
45
- "DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'\n",
46
- "DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]\n",
47
- "DATASETS = {\n",
48
- " 'original': 'original_sequences/youtube',\n",
49
- " 'Deepfakes': 'manipulated_sequences/Deepfakes',\n",
50
- " 'Face2Face': 'manipulated_sequences/Face2Face',\n",
51
- " 'FaceShifter': 'manipulated_sequences/FaceShifter',\n",
52
- " 'FaceSwap': 'manipulated_sequences/FaceSwap',\n",
53
- " 'NeuralTextures': 'manipulated_sequences/NeuralTextures'\n",
54
- "}\n",
55
- "ALL_DATASETS = ['original', 'Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']\n",
56
- "COMPRESSION = ['raw', 'c23', 'c40']\n",
57
- "TYPE = ['videos']\n",
58
- "\n",
59
- "def download_file(url, out_file):\n",
60
- " os.makedirs(os.path.dirname(out_file), exist_ok=True)\n",
61
- " if not os.path.isfile(out_file):\n",
62
- " urllib.request.urlretrieve(url, out_file)\n",
63
- "\n",
64
- "def main():\n",
65
- " parser = argparse.ArgumentParser()\n",
66
- " parser.add_argument('output_path', type=str)\n",
67
- " parser.add_argument('-d', '--dataset', type=str, default='all')\n",
68
- " parser.add_argument('-c', '--compression', type=str, default='c40')\n",
69
- " parser.add_argument('-t', '--type', type=str, default='videos')\n",
70
- " parser.add_argument('-n', '--num_videos', type=int, default=50) # Small amount for tutorial\n",
71
- " args = parser.parse_args()\n",
72
- " \n",
73
- " base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'\n",
74
- " \n",
75
- " datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS\n",
76
- " for dataset in datasets:\n",
77
- " dataset_path = DATASETS[dataset]\n",
78
- " print(f'Downloading {args.compression} of {dataset}')\n",
79
- " \n",
80
- " file_pairs = json.loads(urllib.request.urlopen(base_url + FILELIST_URL).read().decode(\"utf-8\"))\n",
81
- " filelist = []\n",
82
- " if 'original' in dataset_path:\n",
83
- " for pair in file_pairs:\n",
84
- " filelist += pair\n",
85
- " else:\n",
86
- " for pair in file_pairs:\n",
87
- " filelist.append('_'.join(pair))\n",
88
- " filelist.append('_'.join(pair[::-1]))\n",
89
- " \n",
90
- " filelist = filelist[:args.num_videos]\n",
91
- " dataset_videos_url = base_url + f'{dataset_path}/{args.compression}/{args.type}/'\n",
92
- " dataset_output_path = join(args.output_path, dataset_path, args.compression, args.type)\n",
93
- " \n",
94
- " for filename in tqdm(filelist):\n",
95
- " download_file(dataset_videos_url + filename + \".mp4\", join(dataset_output_path, filename + \".mp4\"))\n",
96
- "\n",
97
- "if __name__ == \"__main__\":\n",
98
- " main()\n",
99
- "'''\n",
100
- "\n",
101
- "with open(\"download_ffpp.py\", \"w\") as f:\n",
102
- " f.write(download_script)\n",
103
- "\n",
104
- "!python download_ffpp.py ./data -d all -c c40 -t videos -n 50\n"
105
- ]
106
- },
107
- {
108
- "cell_type": "code",
109
- "execution_count": null,
110
- "id": "f33716f6",
111
- "metadata": {},
112
- "outputs": [],
113
- "source": [
114
- "import cv2\n",
115
- "import os\n",
116
- "import glob\n",
117
- "from tqdm import tqdm\n",
118
- "\n",
119
- "def extract_frames(video_folder, output_folder, label, max_frames=4):\n",
120
- " os.makedirs(output_folder, exist_ok=True)\n",
121
- " videos = glob.glob(os.path.join(video_folder, \"*.mp4\"))\n",
122
- " \n",
123
- " for vid_path in tqdm(videos, desc=f\"Extracting {label}\"):\n",
124
- " vid_name = os.path.basename(vid_path).replace('.mp4','')\n",
125
- " cap = cv2.VideoCapture(vid_path)\n",
126
- " count = 0\n",
127
- " while cap.isOpened() and count < max_frames:\n",
128
- " ret, frame = cap.read()\n",
129
- " if not ret: break\n",
130
- " frame = cv2.resize(frame, (224, 224))\n",
131
- " out_path = os.path.join(output_folder, f\"{vid_name}_f{count}.jpg\")\n",
132
- " cv2.imwrite(out_path, frame)\n",
133
- " count += 1\n",
134
- " cap.release()\n",
135
- "\n",
136
- "# Extract Real\n",
137
- "extract_frames('./data/original_sequences/youtube/c40/videos', './dataset/real', 'real')\n",
138
- "\n",
139
- "# Extract Fakes\n",
140
- "fakes = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']\n",
141
- "for f in fakes:\n",
142
- " extract_frames(f'./data/manipulated_sequences/{f}/c40/videos', './dataset/fake', 'fake')\n"
143
- ]
144
- },
145
- {
146
- "cell_type": "code",
147
- "execution_count": null,
148
- "id": "b79cdd85",
149
- "metadata": {},
150
- "outputs": [],
151
- "source": [
152
- "import numpy as np\n",
153
- "from datasets import load_dataset\n",
154
- "from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer\n",
155
- "import torch\n",
156
- "\n",
157
- "# 1. Load Dataset\n",
158
- "dataset = load_dataset('imagefolder', data_dir='./dataset')\n",
159
- "# Split into train/validation\n",
160
- "dataset = dataset['train'].train_test_split(test_size=0.1)\n",
161
- "\n",
162
- "# 2. Preprocessor\n",
163
- "model_name_or_path = 'google/vit-base-patch16-224-in21k'\n",
164
- "processor = ViTImageProcessor.from_pretrained(model_name_or_path)\n",
165
- "\n",
166
- "def transform(example_batch):\n",
167
- " # Take a list of PIL images and turn them to pixel values\n",
168
- " inputs = processor([x.convert(\"RGB\") for x in example_batch['image']], return_tensors='pt')\n",
169
- " inputs['labels'] = example_batch['label']\n",
170
- " return inputs\n",
171
- "\n",
172
- "prepared_ds = dataset.with_transform(transform)\n",
173
- "\n",
174
- "def collate_fn(batch):\n",
175
- " return {\n",
176
- " 'pixel_values': torch.stack([x['pixel_values'] for x in batch]),\n",
177
- " 'labels': torch.tensor([x['labels'] for x in batch])\n",
178
- " }\n",
179
- "\n",
180
- "# 3. Load Model\n",
181
- "labels = dataset['train'].features['label'].names\n",
182
- "model = ViTForImageClassification.from_pretrained(\n",
183
- " model_name_or_path,\n",
184
- " num_labels=len(labels),\n",
185
- " id2label={str(i): c for i, c in enumerate(labels)},\n",
186
- " label2id={c: str(i) for i, c in enumerate(labels)}\n",
187
- ")\n",
188
- "\n",
189
- "training_args = TrainingArguments(\n",
190
- " output_dir=\"./vit-deepshield\",\n",
191
- " per_device_train_batch_size=16,\n",
192
- " eval_strategy=\"steps\",\n",
193
- " num_train_epochs=3,\n",
194
- " fp16=True, # Mixed precision for speed\n",
195
- " save_steps=100,\n",
196
- " eval_steps=100,\n",
197
- " logging_steps=10,\n",
198
- " learning_rate=2e-4,\n",
199
- " save_total_limit=2,\n",
200
- " remove_unused_columns=False,\n",
201
- " push_to_hub=False,\n",
202
- " load_best_model_at_end=True,\n",
203
- ")\n",
204
- "\n",
205
- "import evaluate\n",
206
- "metric = evaluate.load(\"accuracy\")\n",
207
- "def compute_metrics(p):\n",
208
- " return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)\n",
209
- "\n",
210
- "trainer = Trainer(\n",
211
- " model=model,\n",
212
- " args=training_args,\n",
213
- " data_collator=collate_fn,\n",
214
- " compute_metrics=compute_metrics,\n",
215
- " train_dataset=prepared_ds[\"train\"],\n",
216
- " eval_dataset=prepared_ds[\"test\"],\n",
217
- ")\n",
218
- "\n",
219
- "# 4. Train\n",
220
- "train_results = trainer.train()\n",
221
- "trainer.save_model(\"deepshield_vit_model\")\n",
222
- "processor.save_pretrained(\"deepshield_vit_model\")\n",
223
- "trainer.log_metrics(\"train\", train_results.metrics)\n",
224
- "trainer.save_metrics(\"train\", train_results.metrics)\n",
225
- "trainer.save_state()\n",
226
- "print(\"Training Complete! The model is saved to ./deepshield_vit_model\")\n"
227
- ]
228
- }
229
- ],
230
- "metadata": {},
231
- "nbformat": 4,
232
- "nbformat_minor": 5
233
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Deepshield
3
- emoji: 🛡️
4
- colorFrom: blue
5
- colorTo: indigo
6
- sdk: docker
7
- app_port: 7860
8
- pinned: true
9
- ---
10
-
11
- # DeepShield Backend
12
-
13
- This space hosts the FastAPI backend for DeepShield.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
analyze.py DELETED
@@ -1,177 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import List
4
-
5
- from pydantic import BaseModel
6
-
7
- from schemas.common import (
8
- ArtifactIndicator,
9
- ContradictingEvidence,
10
- ExifSummary,
11
- LLMExplainabilitySummary,
12
- ProcessingSummary,
13
- TrustedSource,
14
- TruthOverride,
15
- Verdict,
16
- VLMBreakdown,
17
- )
18
-
19
-
20
- class SensationalismBreakdown(BaseModel):
21
- score: int = 0
22
- level: str = "Low"
23
- exclamation_count: int = 0
24
- caps_word_count: int = 0
25
- clickbait_matches: int = 0
26
- emotional_word_count: int = 0
27
- superlative_count: int = 0
28
-
29
-
30
- class ManipulationIndicatorOut(BaseModel):
31
- pattern_type: str
32
- matched_text: str
33
- start_pos: int
34
- end_pos: int
35
- severity: str
36
- description: str
37
-
38
-
39
- class TextExplainability(BaseModel):
40
- fake_probability: float
41
- top_label: str
42
- all_scores: dict = {}
43
- keywords: List[str] = []
44
- sensationalism: SensationalismBreakdown = SensationalismBreakdown()
45
- manipulation_indicators: List[ManipulationIndicatorOut] = []
46
- detected_language: str = "en" # ISO 639-1 code, e.g. "en", "hi"
47
- truth_override: TruthOverride | None = None
48
-
49
-
50
- class TextAnalysisResponse(BaseModel):
51
- analysis_id: str
52
- record_id: int = 0
53
- media_type: str = "text"
54
- timestamp: str
55
- verdict: Verdict
56
- explainability: TextExplainability
57
- llm_summary: LLMExplainabilitySummary | None = None
58
- trusted_sources: List[TrustedSource] = []
59
- contradicting_evidence: List[ContradictingEvidence] = []
60
- processing_summary: ProcessingSummary
61
- responsible_ai_notice: str = (
62
- "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
63
- )
64
-
65
-
66
- class OCRBoxOut(BaseModel):
67
- text: str
68
- bbox: List[List[int]]
69
- confidence: float
70
-
71
-
72
- class SuspiciousPhraseOut(BaseModel):
73
- text: str
74
- bbox: List[List[int]]
75
- pattern_type: str
76
- severity: str
77
- description: str
78
-
79
-
80
- class LayoutAnomalyOut(BaseModel):
81
- type: str
82
- severity: str
83
- description: str
84
- confidence: float
85
-
86
-
87
- class ScreenshotExplainability(BaseModel):
88
- extracted_text: str = ""
89
- ocr_boxes: List[OCRBoxOut] = []
90
- fake_probability: float = 0.0
91
- sensationalism: SensationalismBreakdown = SensationalismBreakdown()
92
- suspicious_phrases: List[SuspiciousPhraseOut] = []
93
- layout_anomalies: List[LayoutAnomalyOut] = []
94
- keywords: List[str] = []
95
- detected_language: str = "en"
96
- truth_override: TruthOverride | None = None
97
-
98
-
99
- class ScreenshotAnalysisResponse(BaseModel):
100
- analysis_id: str
101
- record_id: int = 0
102
- media_type: str = "screenshot"
103
- timestamp: str
104
- verdict: Verdict
105
- explainability: ScreenshotExplainability
106
- llm_summary: LLMExplainabilitySummary | None = None
107
- trusted_sources: List[TrustedSource] = []
108
- contradicting_evidence: List[ContradictingEvidence] = []
109
- processing_summary: ProcessingSummary
110
- responsible_ai_notice: str = (
111
- "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
112
- )
113
-
114
-
115
- class ImageExplainability(BaseModel):
116
- heatmap_base64: str = ""
117
- ela_base64: str = ""
118
- boxes_base64: str = ""
119
- heatmap_status: str = "success" # success | failed | degraded
120
- artifact_indicators: List[ArtifactIndicator] = []
121
- exif: ExifSummary | None = None
122
- llm_summary: LLMExplainabilitySummary | None = None
123
- vlm_breakdown: VLMBreakdown | None = None
124
-
125
-
126
- class FrameAnalysisOut(BaseModel):
127
- index: int
128
- timestamp_s: float
129
- label: str
130
- confidence: float
131
- suspicious_prob: float
132
- is_suspicious: bool
133
- has_face: bool = False
134
- scored: bool = False
135
-
136
-
137
- class VideoExplainability(BaseModel):
138
- num_frames_sampled: int
139
- num_face_frames: int = 0
140
- num_suspicious_frames: int
141
- mean_suspicious_prob: float
142
- max_suspicious_prob: float
143
- suspicious_ratio: float
144
- insufficient_faces: bool = False
145
- suspicious_timestamps: List[float] = []
146
- frames: List[FrameAnalysisOut] = []
147
-
148
-
149
- class VideoAnalysisResponse(BaseModel):
150
- analysis_id: str
151
- record_id: int = 0
152
- media_type: str = "video"
153
- timestamp: str
154
- verdict: Verdict
155
- explainability: VideoExplainability
156
- llm_summary: LLMExplainabilitySummary | None = None
157
- trusted_sources: List[TrustedSource] = []
158
- contradicting_evidence: List[ContradictingEvidence] = []
159
- processing_summary: ProcessingSummary
160
- responsible_ai_notice: str = (
161
- "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
162
- )
163
-
164
-
165
- class ImageAnalysisResponse(BaseModel):
166
- analysis_id: str
167
- record_id: int = 0
168
- media_type: str = "image"
169
- timestamp: str
170
- verdict: Verdict
171
- explainability: ImageExplainability
172
- trusted_sources: List[TrustedSource] = []
173
- contradicting_evidence: List[ContradictingEvidence] = []
174
- processing_summary: ProcessingSummary
175
- responsible_ai_notice: str = (
176
- "AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
177
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
api/router.py CHANGED
@@ -5,6 +5,7 @@ from api.v1 import analyze, auth, health, history, report
5
  api_router = APIRouter(prefix="/api/v1")
6
  api_router.include_router(health.router)
7
  api_router.include_router(analyze.router)
 
8
  api_router.include_router(report.router)
9
  api_router.include_router(auth.router)
10
  api_router.include_router(history.router)
 
5
  api_router = APIRouter(prefix="/api/v1")
6
  api_router.include_router(health.router)
7
  api_router.include_router(analyze.router)
8
+ api_router.include_router(analyze.jobs_router) # Phase 19.3
9
  api_router.include_router(report.router)
10
  api_router.include_router(auth.router)
11
  api_router.include_router(history.router)
api/v1/health.py CHANGED
@@ -1,8 +1,79 @@
1
- from fastapi import APIRouter
 
 
 
 
 
 
2
 
3
  router = APIRouter(tags=["health"])
4
 
5
 
6
  @router.get("/health")
7
  def health():
 
8
  return {"status": "ok", "service": "deepshield-backend"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, Response, status
2
+ from loguru import logger
3
+ from sqlalchemy import text
4
+
5
+ from config import settings
6
+ from db.database import engine
7
+ from services.llm_explainer import is_rate_limited
8
 
9
  router = APIRouter(tags=["health"])
10
 
11
 
12
  @router.get("/health")
13
  def health():
14
+ """Legacy combined healthcheck — kept for backwards compatibility."""
15
  return {"status": "ok", "service": "deepshield-backend"}
16
+
17
+
18
+ @router.get("/health/live")
19
+ def health_live():
20
+ """Liveness probe — returns 200 as long as the process is up."""
21
+ return {"status": "alive"}
22
+
23
+
24
+ @router.get("/health/ready")
25
+ def health_ready(response: Response):
26
+ """Readiness probe — 200 only when DB is reachable and models are loaded.
27
+
28
+ Phase 19.5: the frontend disables the Analyze button while this returns 503.
29
+ """
30
+ checks: dict[str, bool] = {}
31
+
32
+ try:
33
+ with engine.connect() as conn:
34
+ conn.execute(text("SELECT 1"))
35
+ checks["db"] = True
36
+ except Exception as e: # noqa: BLE001
37
+ logger.warning(f"readiness db check failed: {e}")
38
+ checks["db"] = False
39
+
40
+ try:
41
+ from models.model_loader import get_model_loader
42
+ checks["models"] = bool(get_model_loader().is_ready())
43
+ except AttributeError:
44
+ # No is_ready() — fall back to "ready if loader constructs"
45
+ try:
46
+ from models.model_loader import get_model_loader
47
+ get_model_loader()
48
+ checks["models"] = True
49
+ except Exception: # noqa: BLE001
50
+ checks["models"] = False
51
+ except Exception as e: # noqa: BLE001
52
+ logger.warning(f"readiness model check failed: {e}")
53
+ checks["models"] = False
54
+
55
+ ok = all(checks.values())
56
+ if not ok:
57
+ response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
58
+ return {"status": "ready" if ok else "not_ready", "checks": checks}
59
+
60
+
61
+ @router.get("/health/llm")
62
+ def health_llm(response: Response):
63
+ """LLM availability probe — lets the frontend decide whether to request/show
64
+ the AI summary card. Doesn't spend tokens; only checks config + breaker state.
65
+ """
66
+ has_primary = bool(settings.LLM_API_KEY)
67
+ has_fallback = bool(settings.GROQ_API_KEY)
68
+ cooldown = is_rate_limited()
69
+
70
+ # Available if (any provider configured) AND (not rate-limited OR fallback exists)
71
+ available = (has_primary or has_fallback) and (not cooldown or has_fallback)
72
+ if not available:
73
+ response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
74
+ return {
75
+ "available": available,
76
+ "primary": f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}" if has_primary else None,
77
+ "fallback": f"groq/{settings.GROQ_MODEL}" if has_fallback else None,
78
+ "rate_limited": cooldown,
79
+ }
api/v1/history.py CHANGED
@@ -60,7 +60,13 @@ def get_history_detail(
60
  if not r or r.user_id != user.id:
61
  raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
62
  try:
63
- return json.loads(r.result_json)
 
 
 
 
 
 
64
  except Exception:
65
  raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")
66
 
 
60
  if not r or r.user_id != user.id:
61
  raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
62
  try:
63
+ payload = json.loads(r.result_json)
64
+ # Inject storage fields from DB columns so the frontend can display full-size media
65
+ if r.media_path and not payload.get("media_path"):
66
+ payload["media_path"] = r.media_path
67
+ if r.thumbnail_url and not payload.get("thumbnail_url"):
68
+ payload["thumbnail_url"] = r.thumbnail_url
69
+ return payload
70
  except Exception:
71
  raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")
72
 
api/v1/report.py CHANGED
@@ -2,24 +2,45 @@ from __future__ import annotations
2
 
3
  from pathlib import Path
4
 
5
- from fastapi import APIRouter, Depends, HTTPException
6
  from fastapi.responses import FileResponse
7
  from loguru import logger
8
  from sqlalchemy.orm import Session
9
 
 
10
  from db.database import get_db
11
- from db.models import AnalysisRecord, Report
 
12
  from services.report_service import cleanup_expired, create_report_row, generate_report
13
 
14
  router = APIRouter(prefix="/report", tags=["report"])
15
 
16
 
 
 
 
 
 
 
 
 
 
 
17
  @router.post("/{analysis_id}")
18
- def generate(analysis_id: int, db: Session = Depends(get_db)):
 
 
 
 
 
 
 
19
  record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
20
  if not record:
21
  raise HTTPException(status_code=404, detail="analysis not found")
22
 
 
 
23
  existing = db.query(Report).filter(Report.analysis_id == analysis_id).first()
24
  if existing and Path(existing.file_path).exists():
25
  return {"report_id": existing.id, "analysis_id": analysis_id, "ready": True}
@@ -44,7 +65,19 @@ def generate(analysis_id: int, db: Session = Depends(get_db)):
44
 
45
 
46
  @router.get("/{analysis_id}/download")
47
- def download(analysis_id: int, db: Session = Depends(get_db)):
 
 
 
 
 
 
 
 
 
 
 
 
48
  row = db.query(Report).filter(Report.analysis_id == analysis_id).first()
49
  if not row:
50
  raise HTTPException(status_code=404, detail="report not found — generate first")
@@ -58,7 +91,9 @@ def download(analysis_id: int, db: Session = Depends(get_db)):
58
  )
59
 
60
 
61
- @router.post("/cleanup")
62
- def cleanup():
 
 
63
  n = cleanup_expired()
64
  return {"deleted": n}
 
2
 
3
  from pathlib import Path
4
 
5
+ from fastapi import APIRouter, Depends, HTTPException, Request, status
6
  from fastapi.responses import FileResponse
7
  from loguru import logger
8
  from sqlalchemy.orm import Session
9
 
10
+ from api.deps import get_current_user, optional_current_user
11
  from db.database import get_db
12
+ from db.models import AnalysisRecord, Report, User
13
+ from services.rate_limit import ANON_REPORT, AUTH_REPORT, is_anon, is_authed, limiter
14
  from services.report_service import cleanup_expired, create_report_row, generate_report
15
 
16
  router = APIRouter(prefix="/report", tags=["report"])
17
 
18
 
19
+ def _assert_record_access(record: AnalysisRecord, user: User | None) -> None:
20
+ """Phase 15.1 — allow access if the requester owns the record, or if the record
21
+ is anonymous (user_id is None). Everything else is 403."""
22
+ if record.user_id is None:
23
+ return
24
+ if user is not None and record.user_id == user.id:
25
+ return
26
+ raise HTTPException(status.HTTP_403_FORBIDDEN, "You do not own this analysis")
27
+
28
+
29
  @router.post("/{analysis_id}")
30
+ @limiter.limit(ANON_REPORT, exempt_when=is_authed)
31
+ @limiter.limit(AUTH_REPORT, exempt_when=is_anon)
32
+ def generate(
33
+ request: Request,
34
+ analysis_id: int,
35
+ db: Session = Depends(get_db),
36
+ user: User | None = Depends(optional_current_user),
37
+ ):
38
  record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
39
  if not record:
40
  raise HTTPException(status_code=404, detail="analysis not found")
41
 
42
+ _assert_record_access(record, user)
43
+
44
  existing = db.query(Report).filter(Report.analysis_id == analysis_id).first()
45
  if existing and Path(existing.file_path).exists():
46
  return {"report_id": existing.id, "analysis_id": analysis_id, "ready": True}
 
65
 
66
 
67
  @router.get("/{analysis_id}/download")
68
+ @limiter.limit(ANON_REPORT, exempt_when=is_authed)
69
+ @limiter.limit(AUTH_REPORT, exempt_when=is_anon)
70
+ def download(
71
+ request: Request,
72
+ analysis_id: int,
73
+ db: Session = Depends(get_db),
74
+ user: User | None = Depends(optional_current_user),
75
+ ):
76
+ record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
77
+ if not record:
78
+ raise HTTPException(status_code=404, detail="analysis not found")
79
+ _assert_record_access(record, user)
80
+
81
  row = db.query(Report).filter(Report.analysis_id == analysis_id).first()
82
  if not row:
83
  raise HTTPException(status_code=404, detail="report not found — generate first")
 
91
  )
92
 
93
 
94
+ @router.post("/cleanup", include_in_schema=False)
95
+ def cleanup(user: User = Depends(get_current_user)):
96
+ # Phase 15.1 — auth-guarded. Exposed only to authenticated users; an internal
97
+ # scheduler loop in main.py handles periodic cleanup automatically.
98
  n = cleanup_expired()
99
  return {"deleted": n}
artifact_detector.py DELETED
@@ -1,229 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import io
4
- from typing import List
5
-
6
- import numpy as np
7
- from loguru import logger
8
- from PIL import Image
9
-
10
- from schemas.common import ArtifactIndicator
11
-
12
-
13
- def _severity_from_score(score: float) -> str:
14
- if score >= 0.7:
15
- return "high"
16
- if score >= 0.4:
17
- return "medium"
18
- return "low"
19
-
20
-
21
- # ---------- 1. GAN high-frequency signature (FFT) ----------
22
- def detect_gan_hf_artifact(pil_img: Image.Image) -> ArtifactIndicator | None:
23
- """Compute high-frequency energy ratio on the luminance channel.
24
- Real photos typically follow a ~1/f spectrum; many GAN outputs show
25
- elevated HF energy or spectral peaks.
26
- """
27
- try:
28
- gray = np.asarray(pil_img.convert("L"), dtype=np.float32)
29
- # downsample for speed
30
- if max(gray.shape) > 512:
31
- import cv2
32
-
33
- scale = 512 / max(gray.shape)
34
- gray = cv2.resize(gray, (int(gray.shape[1] * scale), int(gray.shape[0] * scale)))
35
-
36
- fft = np.fft.fftshift(np.fft.fft2(gray))
37
- mag = np.abs(fft)
38
- h, w = mag.shape
39
- cy, cx = h // 2, w // 2
40
- y, x = np.ogrid[:h, :w]
41
- r = np.sqrt((x - cx) ** 2 + (y - cy) ** 2)
42
- r_max = np.sqrt(cx * cx + cy * cy)
43
- hf_mask = r > (0.5 * r_max)
44
-
45
- total = float(mag.sum() + 1e-9)
46
- hf = float(mag[hf_mask].sum())
47
- ratio = hf / total # typically 0.05–0.20 for natural photos
48
-
49
- # normalize to [0,1] suspiciousness
50
- score = max(0.0, min(1.0, (ratio - 0.10) / 0.20))
51
- sev = _severity_from_score(score)
52
- return ArtifactIndicator(
53
- type="gan_artifact",
54
- severity=sev,
55
- description=(
56
- f"High-frequency energy ratio {ratio:.3f} — "
57
- + ("elevated HF energy consistent with GAN/diffusion outputs" if score > 0.4
58
- else "natural frequency falloff")
59
- ),
60
- confidence=float(score),
61
- )
62
- except Exception as e: # noqa: BLE001
63
- logger.warning(f"GAN HF detection failed: {e}")
64
- return None
65
-
66
-
67
- # ---------- 2. JPEG quantization table anomaly ----------
68
- _STANDARD_Q_SUMS = { # rough heuristic: camera JPEGs fall in these ranges
69
- 50: (1500, 4500),
70
- 75: (600, 2500),
71
- 90: (200, 1000),
72
- 95: (100, 600),
73
- }
74
-
75
-
76
- def detect_compression_anomaly(raw_bytes: bytes) -> ArtifactIndicator | None:
77
- """Inspect JPEG quantization tables. Missing tables, non-standard layouts,
78
- or re-saved tables often indicate manipulation or re-encoding.
79
- """
80
- try:
81
- img = Image.open(io.BytesIO(raw_bytes))
82
- if img.format != "JPEG":
83
- return ArtifactIndicator(
84
- type="compression",
85
- severity="low",
86
- description=f"Non-JPEG format ({img.format}); compression signature not available",
87
- confidence=0.1,
88
- )
89
-
90
- q = getattr(img, "quantization", None)
91
- if not q:
92
- return ArtifactIndicator(
93
- type="compression",
94
- severity="low",
95
- description="No JPEG quantization tables readable",
96
- confidence=0.2,
97
- )
98
-
99
- tables = list(q.values())
100
- sums = [int(sum(t)) for t in tables]
101
- num_tables = len(tables)
102
-
103
- # Heuristics: very low sum → very high quality (possibly re-saved);
104
- # non-standard number of tables; extreme values.
105
- suspicious = 0.0
106
- reasons: list[str] = []
107
- if num_tables not in (1, 2):
108
- suspicious += 0.4
109
- reasons.append(f"unusual table count ({num_tables})")
110
- if any(s < 60 for s in sums):
111
- suspicious += 0.3
112
- reasons.append("very low quantization sums (possible re-encoding)")
113
- if any(s > 8000 for s in sums):
114
- suspicious += 0.2
115
- reasons.append("very high quantization sums")
116
-
117
- score = max(0.0, min(1.0, suspicious))
118
- sev = _severity_from_score(score)
119
- desc = (
120
- f"JPEG Q-table sums {sums}"
121
- + (f"; {', '.join(reasons)}" if reasons else "; within typical camera range")
122
- )
123
- return ArtifactIndicator(
124
- type="compression",
125
- severity=sev,
126
- description=desc,
127
- confidence=float(score),
128
- )
129
- except Exception as e: # noqa: BLE001
130
- logger.warning(f"Compression anomaly detection failed: {e}")
131
- return None
132
-
133
-
134
- # ---------- 3. Facial boundary + 4. Lighting (MediaPipe) ----------
135
- def detect_face_based_artifacts(pil_img: Image.Image) -> List[ArtifactIndicator]:
136
- """If a face is detected, analyze jaw boundary variance and per-quadrant
137
- luminance balance. Returns 0, 1, or 2 indicators.
138
- """
139
- results: List[ArtifactIndicator] = []
140
- try:
141
- import mediapipe as mp # type: ignore
142
-
143
- from models.model_loader import get_model_loader
144
-
145
- detector = get_model_loader().load_face_detector()
146
- rgb = np.asarray(pil_img.convert("RGB"))
147
- h, w = rgb.shape[:2]
148
- mp_result = detector.process(rgb)
149
-
150
- if not mp_result.multi_face_landmarks:
151
- return results
152
-
153
- landmarks = mp_result.multi_face_landmarks[0].landmark
154
-
155
- # ----- Jaw boundary jitter -----
156
- # FaceMesh jaw/oval landmark indices (approximate face contour)
157
- JAW_IDX = [
158
- 10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361,
159
- 288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149,
160
- 150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109,
161
- ]
162
- pts = np.array([(landmarks[i].x * w, landmarks[i].y * h) for i in JAW_IDX])
163
- # Second-difference magnitude = local curvature jitter
164
- diffs = np.diff(pts, axis=0)
165
- seconds = np.diff(diffs, axis=0)
166
- jitter = float(np.linalg.norm(seconds, axis=1).mean()) / max(w, h)
167
- jitter_score = max(0.0, min(1.0, (jitter - 0.003) / 0.010))
168
- results.append(
169
- ArtifactIndicator(
170
- type="facial_boundary",
171
- severity=_severity_from_score(jitter_score),
172
- description=(
173
- f"Jaw-contour jitter {jitter:.4f} (normalized) — "
174
- + ("inconsistent boundary blending detected" if jitter_score > 0.4
175
- else "face boundary appears smooth")
176
- ),
177
- confidence=float(jitter_score),
178
- )
179
- )
180
-
181
- # ----- Lighting inconsistency (per-quadrant luminance) -----
182
- xs = np.array([lm.x * w for lm in landmarks])
183
- ys = np.array([lm.y * h for lm in landmarks])
184
- x0, x1 = int(max(0, xs.min())), int(min(w, xs.max()))
185
- y0, y1 = int(max(0, ys.min())), int(min(h, ys.max()))
186
- if x1 > x0 + 4 and y1 > y0 + 4:
187
- face_crop = rgb[y0:y1, x0:x1]
188
- gray = 0.299 * face_crop[..., 0] + 0.587 * face_crop[..., 1] + 0.114 * face_crop[..., 2]
189
- hh, ww = gray.shape
190
- quads = [
191
- gray[: hh // 2, : ww // 2],
192
- gray[: hh // 2, ww // 2 :],
193
- gray[hh // 2 :, : ww // 2],
194
- gray[hh // 2 :, ww // 2 :],
195
- ]
196
- means = np.array([q.mean() for q in quads if q.size > 0])
197
- if means.size == 4 and means.mean() > 1e-3:
198
- imbalance = float(means.std() / means.mean())
199
- lighting_score = max(0.0, min(1.0, (imbalance - 0.08) / 0.20))
200
- results.append(
201
- ArtifactIndicator(
202
- type="lighting",
203
- severity=_severity_from_score(lighting_score),
204
- description=(
205
- f"Luminance imbalance across face quadrants {imbalance:.3f} — "
206
- + ("inconsistent lighting direction" if lighting_score > 0.4
207
- else "lighting appears uniform")
208
- ),
209
- confidence=float(lighting_score),
210
- )
211
- )
212
- except Exception as e: # noqa: BLE001
213
- logger.warning(f"Face-based artifact detection failed: {e}")
214
-
215
- return results
216
-
217
-
218
- # ---------- Orchestrator ----------
219
- def scan_artifacts(pil_img: Image.Image, raw_bytes: bytes) -> List[ArtifactIndicator]:
220
- indicators: List[ArtifactIndicator] = []
221
- for fn in (
222
- lambda: detect_gan_hf_artifact(pil_img),
223
- lambda: detect_compression_anomaly(raw_bytes),
224
- ):
225
- ind = fn()
226
- if ind is not None:
227
- indicators.append(ind)
228
- indicators.extend(detect_face_based_artifacts(pil_img))
229
- return indicators
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
auth.py DELETED
@@ -1,30 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from datetime import datetime
4
-
5
- from pydantic import BaseModel, EmailStr, Field
6
-
7
-
8
- class RegisterBody(BaseModel):
9
- email: EmailStr
10
- password: str = Field(min_length=6, max_length=128)
11
- name: str | None = Field(default=None, max_length=255)
12
-
13
-
14
- class LoginBody(BaseModel):
15
- email: EmailStr
16
- password: str
17
-
18
-
19
- class UserOut(BaseModel):
20
- id: int
21
- email: str
22
- name: str | None = None
23
- created_at: datetime
24
-
25
-
26
- class TokenResponse(BaseModel):
27
- access_token: str
28
- token_type: str = "bearer"
29
- expires_in_minutes: int
30
- user: UserOut
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
auth_service.py DELETED
@@ -1,67 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from datetime import datetime, timedelta, timezone
4
- from typing import Any
5
-
6
- import bcrypt
7
- from jose import JWTError, jwt
8
- from sqlalchemy.orm import Session
9
-
10
- from config import settings
11
- from db.models import User
12
-
13
-
14
- def _encode_pw(plain: str) -> bytes:
15
- # bcrypt truncates to 72 bytes silently in some builds and hard-errors in others.
16
- # Truncate explicitly so behavior is deterministic across versions.
17
- return plain.encode("utf-8")[:72]
18
-
19
-
20
- def hash_password(plain: str) -> str:
21
- return bcrypt.hashpw(_encode_pw(plain), bcrypt.gensalt()).decode("utf-8")
22
-
23
-
24
- def verify_password(plain: str, hashed: str) -> bool:
25
- try:
26
- return bcrypt.checkpw(_encode_pw(plain), hashed.encode("utf-8"))
27
- except Exception:
28
- return False
29
-
30
-
31
- def create_access_token(user_id: int, email: str) -> str:
32
- now = datetime.now(timezone.utc)
33
- payload = {
34
- "sub": str(user_id),
35
- "email": email,
36
- "iat": int(now.timestamp()),
37
- "exp": int((now + timedelta(minutes=settings.JWT_EXPIRATION_MINUTES)).timestamp()),
38
- }
39
- return jwt.encode(payload, settings.JWT_SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
40
-
41
-
42
- def decode_token(token: str) -> dict[str, Any] | None:
43
- try:
44
- return jwt.decode(token, settings.JWT_SECRET_KEY, algorithms=[settings.JWT_ALGORITHM])
45
- except JWTError:
46
- return None
47
-
48
-
49
- def register_user(db: Session, email: str, password: str, name: str | None) -> User:
50
- email = email.strip().lower()
51
- user = User(email=email, password_hash=hash_password(password), name=(name or None))
52
- db.add(user)
53
- db.commit()
54
- db.refresh(user)
55
- return user
56
-
57
-
58
- def authenticate(db: Session, email: str, password: str) -> User | None:
59
- email = email.strip().lower()
60
- user = db.query(User).filter(User.email == email).first()
61
- if not user or not verify_password(password, user.password_hash):
62
- return None
63
- return user
64
-
65
-
66
- def get_user(db: Session, user_id: int) -> User | None:
67
- return db.query(User).filter(User.id == user_id).first()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
common.py DELETED
@@ -1,88 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import List, Optional
4
-
5
- from pydantic import BaseModel, ConfigDict, Field
6
-
7
-
8
- class Verdict(BaseModel):
9
- model_config = ConfigDict(protected_namespaces=())
10
-
11
- label: str
12
- severity: str
13
- authenticity_score: int = Field(ge=0, le=100)
14
- model_confidence: float = Field(ge=0.0, le=1.0)
15
- model_label: str
16
-
17
-
18
- class ArtifactIndicator(BaseModel):
19
- type: str
20
- severity: str # low | medium | high
21
- description: str
22
- confidence: float = Field(ge=0.0, le=1.0)
23
-
24
-
25
- class TrustedSource(BaseModel):
26
- source_name: str
27
- title: str
28
- url: str
29
- published_at: Optional[str] = None
30
- relevance_score: float = Field(ge=0.0, le=1.0)
31
-
32
-
33
- class ContradictingEvidence(BaseModel):
34
- source_name: str
35
- title: str
36
- url: str
37
- type: str = "fact_check"
38
-
39
-
40
- class TruthOverride(BaseModel):
41
- applied: bool = False
42
- source_url: str = ""
43
- source_name: str = ""
44
- similarity: float = 0.0
45
- fake_prob_before: float = 0.0
46
- fake_prob_after: float = 0.0
47
-
48
-
49
- class ExifSummary(BaseModel):
50
- make: Optional[str] = None
51
- model: Optional[str] = None
52
- datetime_original: Optional[str] = None
53
- gps_info: Optional[str] = None
54
- software: Optional[str] = None
55
- lens_model: Optional[str] = None
56
- trust_adjustment: int = 0 # negative = more real, positive = more fake
57
- trust_reason: str = ""
58
-
59
-
60
- class LLMExplainabilitySummary(BaseModel):
61
- paragraph: str = ""
62
- bullets: List[str] = []
63
- model_used: str = ""
64
- cached: bool = False
65
-
66
-
67
- class VLMComponentScore(BaseModel):
68
- score: int = Field(ge=0, le=100, default=75)
69
- notes: str = ""
70
-
71
-
72
- class VLMBreakdown(BaseModel):
73
- facial_symmetry: VLMComponentScore = VLMComponentScore()
74
- skin_texture: VLMComponentScore = VLMComponentScore()
75
- lighting_consistency: VLMComponentScore = VLMComponentScore()
76
- background_coherence: VLMComponentScore = VLMComponentScore()
77
- anatomy_hands_eyes: VLMComponentScore = VLMComponentScore()
78
- context_objects: VLMComponentScore = VLMComponentScore()
79
- model_used: str = ""
80
- cached: bool = False
81
-
82
-
83
- class ProcessingSummary(BaseModel):
84
- model_config = ConfigDict(protected_namespaces=())
85
-
86
- stages_completed: List[str]
87
- total_duration_ms: int
88
- model_used: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.py CHANGED
@@ -20,6 +20,7 @@ class Settings(BaseSettings):
20
 
21
  # AI Models
22
  IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model"
 
23
  TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect"
24
  # Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID.
25
  TEXT_MULTILANG_MODEL_ID: str = ""
@@ -37,15 +38,47 @@ class Settings(BaseSettings):
37
  REPORT_DIR: str = "./temp_reports"
38
  REPORT_TTL_SECONDS: int = 3600 # 1h expiry
39
 
 
 
 
 
40
  # LLM Explainability (Phase 12)
41
  LLM_PROVIDER: str = "gemini" # "gemini" | "openai"
42
  LLM_API_KEY: str = ""
43
- LLM_MODEL: str = "gemini-2.5-pro" # or "gpt-4o"
 
 
 
 
 
44
 
45
  # EfficientNet (ICPR2020 / DeepShield1 merge)
46
  EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4"
47
  EFFICIENTNET_TRAIN_DB: str = "DFDC"
48
  ENSEMBLE_MODE: bool = True # run both ViT + EfficientNet and average scores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  VIDEO_SAMPLE_FRAMES: int = 16 # frames to sample per video for inference
50
  EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled
51
 
 
20
 
21
  # AI Models
22
  IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model"
23
+ GENERAL_IMAGE_MODEL_ID: str = "umm-maybe/AI-image-detector"
24
  TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect"
25
  # Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID.
26
  TEXT_MULTILANG_MODEL_ID: str = ""
 
38
  REPORT_DIR: str = "./temp_reports"
39
  REPORT_TTL_SECONDS: int = 3600 # 1h expiry
40
 
41
+ # Phase 19 — dedup cache + object storage
42
+ CACHE_TTL_DAYS: int = 30
43
+ MEDIA_ROOT: str = "./media"
44
+
45
  # LLM Explainability (Phase 12)
46
  LLM_PROVIDER: str = "gemini" # "gemini" | "openai"
47
  LLM_API_KEY: str = ""
48
+ LLM_MODEL: str = "gemini-2.5-flash" # flash is ~12x cheaper + larger free-tier quota than pro. Use "gemini-2.5-pro" for harder reasoning.
49
+
50
+ # LLM fallback — Groq (Llama 3.3 70B by default). Used automatically when the
51
+ # primary provider returns 429/quota exceeded. Leave empty to disable fallback.
52
+ GROQ_API_KEY: str = ""
53
+ GROQ_MODEL: str = "llama-3.3-70b-versatile"
54
 
55
  # EfficientNet (ICPR2020 / DeepShield1 merge)
56
  EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4"
57
  EFFICIENTNET_TRAIN_DB: str = "DFDC"
58
  ENSEMBLE_MODE: bool = True # run both ViT + EfficientNet and average scores
59
+
60
+ # Phase 11.3: FFPP-fine-tuned ViT. Path is resolved relative to the repo root.
61
+ # The checkpoint lives at <repo_root>/trained_models/ (the `trained_models/` dir
62
+ # at the project root, alongside `backend/` and `frontend/`).
63
+ FFPP_MODEL_PATH: str = "trained_models"
64
+ # Optional: pull FFPP checkpoint from Hugging Face Hub when local checkpoint
65
+ # is missing (keeps large model files out of GitHub source repo).
66
+ FFPP_MODEL_REPO_ID: str = ""
67
+ FFPP_MODEL_REVISION: str = "main"
68
+ FFPP_BASE_PROCESSOR_ID: str = "google/vit-base-patch16-224-in21k"
69
+ FFPP_ENABLED: bool = True
70
+ # Ensemble weights — FFPP is trained on a better (face-specific FFPP c40) dataset
71
+ # and is weighted more heavily when a face is present. When no face is detected,
72
+ # we still blend it but lean on the generic ViT since FFPP only saw face crops.
73
+ FFPP_WEIGHT_FACE: float = 0.55 # face-present ensemble weight
74
+ VIT_WEIGHT_FACE: float = 0.20
75
+ EFFNET_WEIGHT_FACE: float = 0.25
76
+ FFPP_WEIGHT_NOFACE: float = 0.35 # no-face ensemble weight
77
+ VIT_WEIGHT_NOFACE: float = 0.65
78
+ NOFACE_GENERAL_WEIGHT: float = 0.60
79
+ NOFACE_FORENSICS_WEIGHT: float = 0.20
80
+ NOFACE_EXIF_WEIGHT: float = 0.10
81
+ NOFACE_VLM_WEIGHT: float = 0.10
82
  VIDEO_SAMPLE_FRAMES: int = 16 # frames to sample per video for inference
83
  EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled
84
 
database.py DELETED
@@ -1,30 +0,0 @@
1
- from sqlalchemy import create_engine
2
- from sqlalchemy.orm import DeclarativeBase, sessionmaker
3
-
4
- from config import settings
5
-
6
- engine = create_engine(
7
- settings.DATABASE_URL,
8
- connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {},
9
- pool_pre_ping=True,
10
- pool_recycle=300,
11
- )
12
-
13
- SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
14
-
15
-
16
- class Base(DeclarativeBase):
17
- pass
18
-
19
-
20
- def get_db():
21
- db = SessionLocal()
22
- try:
23
- yield db
24
- finally:
25
- db.close()
26
-
27
-
28
- def init_db():
29
- from db import models # noqa: F401
30
- Base.metadata.create_all(bind=engine)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/__init__.py DELETED
File without changes
datasets/build_manifest.py DELETED
@@ -1,93 +0,0 @@
1
- """Build a unified train/val/test manifest (70/15/15) across all dataset buckets.
2
-
3
- Expected input layout (produced by the other scripts in this package):
4
-
5
- data_root/
6
- real/
7
- ffpp_youtube/*.jpg # frames from FFPP original_sequences
8
- ffhq/*.jpg # FFHQ thumbnails
9
-
10
- fake/
11
- ffpp_deepfakes/*.jpg
12
- ffpp_face2face/*.jpg
13
- ffpp_faceswap/*.jpg
14
- ffpp_neuraltextures/*.jpg
15
- ffpp_faceshifter/*.jpg
16
- dfdc/*.jpg
17
-
18
- The manifest is stratified by (label, source) so FFHQ stays represented
19
- in val/test.
20
-
21
- Usage:
22
- python -m backend.training.datasets.build_manifest \
23
- --data ./data --out ./data/manifest.csv --seed 42
24
- """
25
- from __future__ import annotations
26
-
27
- import argparse
28
- import csv
29
- import random
30
- from collections import defaultdict
31
- from pathlib import Path
32
-
33
- IMG_EXTS = {".jpg", ".jpeg", ".png"}
34
-
35
-
36
- def collect(data_root: Path) -> list[tuple[str, str, str]]:
37
- rows: list[tuple[str, str, str]] = []
38
- for label in ("real", "fake"):
39
- label_root = data_root / label
40
- if not label_root.exists():
41
- continue
42
- for source_dir in sorted(p for p in label_root.iterdir() if p.is_dir()):
43
- for img in source_dir.rglob("*"):
44
- if img.suffix.lower() in IMG_EXTS and img.is_file():
45
- rows.append((str(img.resolve()), label, source_dir.name))
46
- return rows
47
-
48
-
49
- def split(rows: list[tuple[str, str, str]], seed: int) -> dict[str, list[tuple[str, str, str]]]:
50
- buckets: dict[tuple[str, str], list[tuple[str, str, str]]] = defaultdict(list)
51
- for r in rows:
52
- buckets[(r[1], r[2])].append(r)
53
-
54
- rng = random.Random(seed)
55
- out = {"train": [], "val": [], "test": []}
56
- for key, items in buckets.items():
57
- rng.shuffle(items)
58
- n = len(items)
59
- n_train = int(0.70 * n)
60
- n_val = int(0.15 * n)
61
- out["train"].extend(items[:n_train])
62
- out["val"].extend(items[n_train : n_train + n_val])
63
- out["test"].extend(items[n_train + n_val :])
64
- return out
65
-
66
-
67
- def main() -> None:
68
- ap = argparse.ArgumentParser()
69
- ap.add_argument("--data", required=True, type=Path)
70
- ap.add_argument("--out", required=True, type=Path)
71
- ap.add_argument("--seed", type=int, default=42)
72
- args = ap.parse_args()
73
-
74
- rows = collect(args.data)
75
- if not rows:
76
- raise SystemExit(f"No images found under {args.data}")
77
-
78
- splits = split(rows, args.seed)
79
- args.out.parent.mkdir(parents=True, exist_ok=True)
80
- with args.out.open("w", newline="", encoding="utf-8") as f:
81
- w = csv.writer(f)
82
- w.writerow(["path", "label", "source", "split"])
83
- for name, items in splits.items():
84
- for path, label, source in items:
85
- w.writerow([path, label, source, name])
86
-
87
- summary = {k: len(v) for k, v in splits.items()}
88
- print(f"Manifest: {args.out}")
89
- print(f"Totals: {summary} (overall {sum(summary.values())})")
90
-
91
-
92
- if __name__ == "__main__":
93
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/download_dfdc_sample.py DELETED
@@ -1,44 +0,0 @@
1
- """Download a sample of the DFDC (Deepfake Detection Challenge) Preview dataset.
2
-
3
- The full DFDC is ~470GB; the *preview* release (~5GB, Kaggle) is enough for
4
- diversity augmentation alongside FFPP.
5
-
6
- Requires the Kaggle CLI (`pip install kaggle`) and ~/.kaggle/kaggle.json.
7
-
8
- Usage:
9
- python -m backend.training.datasets.download_dfdc_sample --output ./data/dfdc_preview
10
- """
11
- from __future__ import annotations
12
-
13
- import argparse
14
- import shutil
15
- import subprocess
16
- import sys
17
- from pathlib import Path
18
-
19
-
20
- def main() -> None:
21
- ap = argparse.ArgumentParser()
22
- ap.add_argument("--output", required=True, type=Path)
23
- ap.add_argument(
24
- "--competition",
25
- default="deepfake-detection-challenge",
26
- help="Kaggle competition slug (default: deepfake-detection-challenge preview).",
27
- )
28
- args = ap.parse_args()
29
-
30
- kaggle = shutil.which("kaggle")
31
- if kaggle is None:
32
- print("Kaggle CLI not found. Install with: pip install kaggle", file=sys.stderr)
33
- print("Then place kaggle.json in ~/.kaggle/ (chmod 600).", file=sys.stderr)
34
- sys.exit(2)
35
-
36
- args.output.mkdir(parents=True, exist_ok=True)
37
- cmd = [kaggle, "competitions", "download", "-c", args.competition, "-p", str(args.output)]
38
- print("Running:", " ".join(cmd))
39
- subprocess.run(cmd, check=True)
40
- print(f"Downloaded to {args.output}. Unzip with: unzip *.zip")
41
-
42
-
43
- if __name__ == "__main__":
44
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/download_ffhq.py DELETED
@@ -1,49 +0,0 @@
1
- """Download the FFHQ 128x128 thumbnail subset from the official Google Drive mirror.
2
-
3
- Pulls up to N images (default 10k) into the `real` bucket of the training set.
4
- Falls back to the NVlabs 'ffhq-dataset' helper if available; otherwise expects
5
- user to run the manual download once.
6
-
7
- Usage:
8
- python -m backend.training.datasets.download_ffhq --output ./data/real/ffhq -n 10000
9
- """
10
- from __future__ import annotations
11
-
12
- import argparse
13
- import shutil
14
- import subprocess
15
- import sys
16
- from pathlib import Path
17
-
18
-
19
- def try_nvlabs_helper(output: Path, num: int) -> bool:
20
- """Prefer the official ffhq-dataset downloader if installed."""
21
- helper = shutil.which("ffhq-dataset")
22
- if helper is None:
23
- return False
24
- cmd = [helper, "--json", "ffhq-dataset-v2.json", "--thumbs", "--num_threads", "4"]
25
- print("Running:", " ".join(cmd))
26
- subprocess.run(cmd, cwd=output, check=False)
27
- return True
28
-
29
-
30
- def main() -> None:
31
- ap = argparse.ArgumentParser()
32
- ap.add_argument("--output", required=True, type=Path)
33
- ap.add_argument("-n", "--num", type=int, default=10000)
34
- args = ap.parse_args()
35
- args.output.mkdir(parents=True, exist_ok=True)
36
-
37
- if try_nvlabs_helper(args.output, args.num):
38
- return
39
-
40
- print("[!] `ffhq-dataset` helper not installed.")
41
- print(" Install via: pip install ffhq-dataset (requires gdown)")
42
- print(" Or download thumbnails128x128.zip manually from:")
43
- print(" https://github.com/NVlabs/ffhq-dataset")
44
- print(f" Extract into: {args.output}")
45
- sys.exit(1)
46
-
47
-
48
- if __name__ == "__main__":
49
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/extract_frames.py DELETED
@@ -1,90 +0,0 @@
1
- """Convert FFPP / DFDC videos -> 16 sampled frames at 224x224 RGB.
2
-
3
- Usage:
4
- python -m backend.training.datasets.extract_frames \
5
- --input ./ffpp_data/original_sequences/youtube/raw/videos \
6
- --output ./ffpp_data/frames/real \
7
- --label real --frames 16 --size 224
8
- """
9
- from __future__ import annotations
10
-
11
- import argparse
12
- import csv
13
- from pathlib import Path
14
-
15
- import cv2
16
- import numpy as np
17
- from tqdm import tqdm
18
-
19
-
20
- def sample_frame_indices(total: int, n: int) -> list[int]:
21
- if total <= 0:
22
- return []
23
- if total <= n:
24
- return list(range(total))
25
- step = total / float(n)
26
- return [min(total - 1, int(step * i + step / 2)) for i in range(n)]
27
-
28
-
29
- def extract_from_video(path: Path, out_dir: Path, n: int, size: int) -> int:
30
- cap = cv2.VideoCapture(str(path))
31
- if not cap.isOpened():
32
- return 0
33
- total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
34
- indices = set(sample_frame_indices(total, n))
35
- out_dir.mkdir(parents=True, exist_ok=True)
36
-
37
- saved = 0
38
- i = 0
39
- while True:
40
- ok, frame = cap.read()
41
- if not ok:
42
- break
43
- if i in indices:
44
- frame = cv2.resize(frame, (size, size), interpolation=cv2.INTER_AREA)
45
- cv2.imwrite(str(out_dir / f"{path.stem}_f{i:06d}.jpg"), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
46
- saved += 1
47
- i += 1
48
- cap.release()
49
- return saved
50
-
51
-
52
- def main() -> None:
53
- ap = argparse.ArgumentParser(description="Sample N frames per video and resize.")
54
- ap.add_argument("--input", required=True, type=Path, help="Directory of .mp4 videos (recursive).")
55
- ap.add_argument("--output", required=True, type=Path, help="Directory to write .jpg frames.")
56
- ap.add_argument("--label", required=True, choices=["real", "fake"], help="Label tag for manifest.")
57
- ap.add_argument("--frames", type=int, default=16)
58
- ap.add_argument("--size", type=int, default=224)
59
- ap.add_argument("--manifest", type=Path, default=None, help="Optional CSV manifest append path.")
60
- args = ap.parse_args()
61
-
62
- videos = [p for p in args.input.rglob("*.mp4")]
63
- if not videos:
64
- print(f"No .mp4 found under {args.input}")
65
- return
66
-
67
- rows: list[tuple[str, str, str]] = []
68
- total_frames = 0
69
- for vid in tqdm(videos, desc=f"extract[{args.label}]"):
70
- rel_out = args.output / vid.stem
71
- saved = extract_from_video(vid, rel_out, args.frames, args.size)
72
- total_frames += saved
73
- if args.manifest is not None:
74
- for jpg in rel_out.glob("*.jpg"):
75
- rows.append((str(jpg), args.label, vid.stem))
76
-
77
- if args.manifest is not None and rows:
78
- args.manifest.parent.mkdir(parents=True, exist_ok=True)
79
- new_file = not args.manifest.exists()
80
- with args.manifest.open("a", newline="", encoding="utf-8") as f:
81
- w = csv.writer(f)
82
- if new_file:
83
- w.writerow(["path", "label", "source_video"])
84
- w.writerows(rows)
85
-
86
- print(f"Done. Videos: {len(videos)}, frames written: {total_frames}")
87
-
88
-
89
- if __name__ == "__main__":
90
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/procure_all.ps1 DELETED
@@ -1,40 +0,0 @@
1
- # Phase 11.1 orchestrator for Windows (PowerShell)
2
- $ErrorActionPreference = "Stop"
3
-
4
- $ROOT = if ($env:ROOT) { $env:ROOT } else { ".\data" }
5
- $FFPP = if ($env:FFPP) { $env:FFPP } else { ".\ffpp_data" }
6
-
7
- New-Item -ItemType Directory -Force -Path "$ROOT\real" | Out-Null
8
- New-Item -ItemType Directory -Force -Path "$ROOT\fake" | Out-Null
9
- New-Item -ItemType Directory -Force -Path $FFPP | Out-Null
10
-
11
- Write-Host "1. FaceForensics++ (highly compressed c40, 10 videos only) -- requires TOS keypress"
12
- python backend\scripts\download_ffpp.py $FFPP -d all -c c40 -t videos -n 10
13
-
14
- Write-Host "2. Frame extraction: real (original youtube)"
15
- python -m backend.training.datasets.extract_frames `
16
- --input "$FFPP\original_sequences\youtube\c40\videos" `
17
- --output "$ROOT\real\ffpp_youtube" --label real --frames 4 --size 224
18
-
19
- Write-Host "3. Frame extraction: fakes (each manipulation family)"
20
- $Families = @("Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures", "FaceShifter")
21
- foreach ($fam in $Families) {
22
- $famLower = $fam.ToLower()
23
- python -m backend.training.datasets.extract_frames `
24
- --input "$FFPP\manipulated_sequences\$fam\c40\videos" `
25
- --output "$ROOT\fake\ffpp_$famLower" --label fake --frames 4 --size 224
26
- }
27
-
28
- Write-Host "4. FFHQ thumbnails (real - limited to 100 items)"
29
- python -m backend.training.datasets.download_ffhq --output "$ROOT\real\ffhq" -n 100
30
-
31
-
32
- Write-Host "6. DFDC preview sample (fake+real)"
33
- python -m backend.training.datasets.download_dfdc_sample --output "$ROOT\_dfdc_raw"
34
- Write-Host "NOTE: You will need to manually unzip + sort DFDC into $ROOT\fake\dfdc and $ROOT\real\dfdc"
35
-
36
- Write-Host "7. Build manifest"
37
- python -m backend.training.datasets.build_manifest `
38
- --data $ROOT --out "$ROOT\manifest.csv" --seed 42
39
-
40
- Write-Host "Phase 11.1 complete. See $ROOT\manifest.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
datasets/procure_all.sh DELETED
@@ -1,37 +0,0 @@
1
- #!/usr/bin/env bash
2
- # Phase 11.1 orchestrator: download + frame-extract + manifest.
3
- # Total disk target: ~120k labeled images. Expect 60-80GB intermediate, ~30GB frames.
4
-
5
- set -euo pipefail
6
-
7
- ROOT="${ROOT:-./data}"
8
- FFPP="${FFPP:-./ffpp_data}"
9
- mkdir -p "$ROOT/real" "$ROOT/fake" "$FFPP"
10
-
11
- # 1. FaceForensics++ (raw, videos) -- requires TOS keypress
12
- python backend/scripts/download_ffpp.py "$FFPP" -d all -c raw -t videos
13
-
14
- # 2. Frame extraction: real (original youtube)
15
- python -m backend.training.datasets.extract_frames \
16
- --input "$FFPP/original_sequences/youtube/raw/videos" \
17
- --output "$ROOT/real/ffpp_youtube" --label real --frames 16 --size 224
18
-
19
- # 3. Frame extraction: fakes (each manipulation family)
20
- for fam in Deepfakes Face2Face FaceSwap NeuralTextures FaceShifter; do
21
- python -m backend.training.datasets.extract_frames \
22
- --input "$FFPP/manipulated_sequences/$fam/raw/videos" \
23
- --output "$ROOT/fake/ffpp_${fam,,}" --label fake --frames 16 --size 224
24
- done
25
-
26
- # 4. FFHQ thumbnails (real)
27
- python -m backend.training.datasets.download_ffhq --output "$ROOT/real/ffhq" -n 10000
28
-
29
- # 6. DFDC preview sample (fake+real) -- needs Kaggle creds
30
- python -m backend.training.datasets.download_dfdc_sample --output "$ROOT/_dfdc_raw"
31
- # NOTE: unzip + sort into $ROOT/fake/dfdc and $ROOT/real/dfdc per DFDC metadata.json
32
-
33
- # 7. Build manifest
34
- python -m backend.training.datasets.build_manifest \
35
- --data "$ROOT" --out "$ROOT/manifest.csv" --seed 42
36
-
37
- echo "Phase 11.1 complete. See $ROOT/manifest.csv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db/database.py CHANGED
@@ -1,28 +1,26 @@
1
- from sqlalchemy import create_engine
2
  from sqlalchemy.orm import DeclarativeBase, sessionmaker
3
 
4
  from config import settings
5
 
6
- _is_postgres = not settings.DATABASE_URL.startswith("sqlite")
7
-
8
  engine = create_engine(
9
  settings.DATABASE_URL,
10
- # SQLite needs check_same_thread=False; Postgres doesn't support it
11
- connect_args={"check_same_thread": False} if not _is_postgres else {},
12
- # Neon (and other serverless Postgres) silently drops idle SSL connections.
13
- # pool_pre_ping=True: test each connection before use and transparently
14
- # reconnect if the server closed it — eliminates "SSL connection has been
15
- # closed unexpectedly" 500s.
16
- pool_pre_ping=_is_postgres,
17
- # Recycle connections every 5 min so we never hold a connection past Neon's
18
- # idle timeout (~5–10 min depending on plan).
19
- pool_recycle=300 if _is_postgres else -1,
20
- # Keep pool small — HF free tier is single-process; Neon free tier has a
21
- # max-connection limit.
22
- pool_size=5 if _is_postgres else 5,
23
- max_overflow=2 if _is_postgres else 10,
24
  )
25
 
 
 
 
 
 
 
 
 
 
 
 
26
  SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
27
 
28
 
@@ -40,5 +38,31 @@ def get_db():
40
 
41
  def init_db():
42
  from db import models # noqa: F401
 
 
43
  Base.metadata.create_all(bind=engine)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine, event
2
  from sqlalchemy.orm import DeclarativeBase, sessionmaker
3
 
4
  from config import settings
5
 
 
 
6
  engine = create_engine(
7
  settings.DATABASE_URL,
8
+ connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {},
9
+ pool_pre_ping=True,
10
+ pool_recycle=300,
 
 
 
 
 
 
 
 
 
 
 
11
  )
12
 
13
+
14
+ if settings.DATABASE_URL.startswith("sqlite"):
15
+ @event.listens_for(engine, "connect")
16
+ def _sqlite_on_connect(dbapi_conn, _):
17
+ # Enforce FK constraints (needed for ON DELETE SET NULL) + WAL for better
18
+ # concurrent reads while a writer is active.
19
+ cur = dbapi_conn.cursor()
20
+ cur.execute("PRAGMA foreign_keys=ON")
21
+ cur.execute("PRAGMA journal_mode=WAL")
22
+ cur.close()
23
+
24
  SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
25
 
26
 
 
38
 
39
  def init_db():
40
  from db import models # noqa: F401
41
+ from sqlalchemy import inspect, text
42
+
43
  Base.metadata.create_all(bind=engine)
44
 
45
+ # Phase 19.4 — lightweight in-place migration for new columns.
46
+ # Alembic is overkill here; just ALTER TABLE when a new column is missing.
47
+ insp = inspect(engine)
48
+ if "analyses" in insp.get_table_names():
49
+ existing = {c["name"] for c in insp.get_columns("analyses")}
50
+ additions = {
51
+ "media_hash": "VARCHAR(64)",
52
+ "media_path": "VARCHAR(512)",
53
+ "thumbnail_url": "VARCHAR(512)",
54
+ }
55
+ with engine.begin() as conn:
56
+ for col, ddl in additions.items():
57
+ if col not in existing:
58
+ conn.execute(text(f"ALTER TABLE analyses ADD COLUMN {col} {ddl}"))
59
+ # Indices (CREATE INDEX IF NOT EXISTS is SQLite+Postgres safe)
60
+ for ddl in (
61
+ "CREATE INDEX IF NOT EXISTS ix_analyses_media_hash ON analyses (media_hash)",
62
+ "CREATE INDEX IF NOT EXISTS ix_record_user_created ON analyses (user_id, created_at)",
63
+ "CREATE INDEX IF NOT EXISTS ix_report_analysis ON reports (analysis_id)",
64
+ ):
65
+ try:
66
+ conn.execute(text(ddl))
67
+ except Exception: # noqa: BLE001
68
+ pass
db/models.py CHANGED
@@ -1,6 +1,6 @@
1
- from datetime import datetime
2
 
3
- from sqlalchemy import DateTime, ForeignKey, Integer, String, Text
4
  from sqlalchemy.orm import Mapped, mapped_column, relationship
5
 
6
  from db.database import Base
@@ -13,7 +13,7 @@ class User(Base):
13
  email: Mapped[str] = mapped_column(String(255), unique=True, index=True, nullable=False)
14
  password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
15
  name: Mapped[str | None] = mapped_column(String(255), nullable=True)
16
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
17
 
18
  analyses: Mapped[list["AnalysisRecord"]] = relationship(back_populates="user")
19
 
@@ -22,16 +22,26 @@ class AnalysisRecord(Base):
22
  __tablename__ = "analyses"
23
 
24
  id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
25
- user_id: Mapped[int | None] = mapped_column(ForeignKey("users.id"), nullable=True)
 
 
26
  media_type: Mapped[str] = mapped_column(String(32), nullable=False) # image|video|text|screenshot
27
  verdict: Mapped[str] = mapped_column(String(32), nullable=False)
28
  authenticity_score: Mapped[float] = mapped_column(nullable=False)
29
  result_json: Mapped[str] = mapped_column(Text, nullable=False)
30
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
 
 
 
 
31
 
32
  user: Mapped["User | None"] = relationship(back_populates="analyses")
33
  report: Mapped["Report | None"] = relationship(back_populates="analysis", uselist=False)
34
 
 
 
 
 
35
 
36
  class Report(Base):
37
  __tablename__ = "reports"
@@ -39,7 +49,11 @@ class Report(Base):
39
  id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
40
  analysis_id: Mapped[int] = mapped_column(ForeignKey("analyses.id"), nullable=False)
41
  file_path: Mapped[str] = mapped_column(String(512), nullable=False)
42
- created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
43
- expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
44
 
45
  analysis: Mapped["AnalysisRecord"] = relationship(back_populates="report")
 
 
 
 
 
1
+ from datetime import datetime, timezone
2
 
3
+ from sqlalchemy import DateTime, ForeignKey, Index, Integer, String, Text
4
  from sqlalchemy.orm import Mapped, mapped_column, relationship
5
 
6
  from db.database import Base
 
13
  email: Mapped[str] = mapped_column(String(255), unique=True, index=True, nullable=False)
14
  password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
15
  name: Mapped[str | None] = mapped_column(String(255), nullable=True)
16
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
17
 
18
  analyses: Mapped[list["AnalysisRecord"]] = relationship(back_populates="user")
19
 
 
22
  __tablename__ = "analyses"
23
 
24
  id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
25
+ user_id: Mapped[int | None] = mapped_column(
26
+ ForeignKey("users.id", ondelete="SET NULL"), nullable=True,
27
+ )
28
  media_type: Mapped[str] = mapped_column(String(32), nullable=False) # image|video|text|screenshot
29
  verdict: Mapped[str] = mapped_column(String(32), nullable=False)
30
  authenticity_score: Mapped[float] = mapped_column(nullable=False)
31
  result_json: Mapped[str] = mapped_column(Text, nullable=False)
32
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
33
+ # Phase 19.1 / 19.2 — SHA-256 dedup + object storage
34
+ media_hash: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
35
+ media_path: Mapped[str | None] = mapped_column(String(512), nullable=True)
36
+ thumbnail_url: Mapped[str | None] = mapped_column(String(512), nullable=True)
37
 
38
  user: Mapped["User | None"] = relationship(back_populates="analyses")
39
  report: Mapped["Report | None"] = relationship(back_populates="analysis", uselist=False)
40
 
41
+ __table_args__ = (
42
+ Index("ix_record_user_created", "user_id", "created_at"),
43
+ )
44
+
45
 
46
  class Report(Base):
47
  __tablename__ = "reports"
 
49
  id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
50
  analysis_id: Mapped[int] = mapped_column(ForeignKey("analyses.id"), nullable=False)
51
  file_path: Mapped[str] = mapped_column(String(512), nullable=False)
52
+ created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
53
+ expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
54
 
55
  analysis: Mapped["AnalysisRecord"] = relationship(back_populates="report")
56
+
57
+ __table_args__ = (
58
+ Index("ix_report_analysis", "analysis_id"),
59
+ )
deepshield.db-shm ADDED
Binary file (32.8 kB). View file
 
deepshield.db-wal ADDED
Binary file (86.6 kB). View file
 
deepshield_13_5bcf1328.pdf DELETED
@@ -1,148 +0,0 @@
1
- %PDF-1.4
2
- %���� ReportLab Generated PDF document (opensource)
3
- 1 0 obj
4
- <<
5
- /F1 2 0 R /F2 3 0 R /F3 5 0 R
6
- >>
7
- endobj
8
- 2 0 obj
9
- <<
10
- /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
11
- >>
12
- endobj
13
- 3 0 obj
14
- <<
15
- /BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
16
- >>
17
- endobj
18
- 4 0 obj
19
- <<
20
- /Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 17 0 R /Resources <<
21
- /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
22
- >> /Rotate 0 /Trans <<
23
-
24
- >>
25
- /Type /Page
26
- >>
27
- endobj
28
- 5 0 obj
29
- <<
30
- /BaseFont /Symbol /Name /F3 /Subtype /Type1 /Type /Font
31
- >>
32
- endobj
33
- 6 0 obj
34
- <<
35
- /Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 17 0 R /Resources <<
36
- /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
37
- >> /Rotate 0 /Trans <<
38
-
39
- >>
40
- /Type /Page
41
- >>
42
- endobj
43
- 7 0 obj
44
- <<
45
- /Outlines 9 0 R /PageMode /UseNone /Pages 17 0 R /Type /Catalog
46
- >>
47
- endobj
48
- 8 0 obj
49
- <<
50
- /Author () /CreationDate (D:20260415181653+05'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260415181653+05'00') /Producer (xhtml2pdf <https://github.com/xhtml2pdf/xhtml2pdf/>)
51
- /Subject () /Title (DeepShield Analysis Report \204 7771f496-45b1-4c97-8a1a-d9d2492ca67d) /Trapped /False
52
- >>
53
- endobj
54
- 9 0 obj
55
- <<
56
- /Count 3 /First 10 0 R /Last 10 0 R /Type /Outlines
57
- >>
58
- endobj
59
- 10 0 obj
60
- <<
61
- /Count -4 /Dest [ 4 0 R /Fit ] /First 11 0 R /Last 16 0 R /Parent 9 0 R /Title (DeepShield Analysis Report)
62
- >>
63
- endobj
64
- 11 0 obj
65
- <<
66
- /Dest [ 4 0 R /Fit ] /Next 12 0 R /Parent 10 0 R /Title (Verdict)
67
- >>
68
- endobj
69
- 12 0 obj
70
- <<
71
- /Count -2 /Dest [ 4 0 R /Fit ] /First 13 0 R /Last 14 0 R /Next 15 0 R /Parent 10 0 R
72
- /Prev 11 0 R /Title (Text Classification)
73
- >>
74
- endobj
75
- 13 0 obj
76
- <<
77
- /Dest [ 4 0 R /Fit ] /Next 14 0 R /Parent 12 0 R /Title (Sensationalism Signals)
78
- >>
79
- endobj
80
- 14 0 obj
81
- <<
82
- /Dest [ 4 0 R /Fit ] /Parent 12 0 R /Prev 13 0 R /Title (Extracted Keywords)
83
- >>
84
- endobj
85
- 15 0 obj
86
- <<
87
- /Dest [ 4 0 R /Fit ] /Next 16 0 R /Parent 10 0 R /Prev 12 0 R /Title (Trusted Source Cross-Reference \(1\))
88
- >>
89
- endobj
90
- 16 0 obj
91
- <<
92
- /Dest [ 6 0 R /Fit ] /Parent 10 0 R /Prev 15 0 R /Title (Processing Summary)
93
- >>
94
- endobj
95
- 17 0 obj
96
- <<
97
- /Count 2 /Kids [ 4 0 R 6 0 R ] /Type /Pages
98
- >>
99
- endobj
100
- 18 0 obj
101
- <<
102
- /Filter [ /ASCII85Decode /FlateDecode ] /Length 1750
103
- >>
104
- stream
105
- Gb"/(9lo&I&A@sBlm4G[Acr2Y4p^$ca2t\gAsuiHo\c,I9gURE8lSA3M>qu?,XkR;()9nE&%0G$"Ts\%gUFdJ0E[3iXSb#I!k]Slq-+&^_fu5V&-:f'>`[5155TjpXI_!]U"iQd1qrcX0jNK021sk.K_S`f[kfkaR[pr2$LLU)UX&`3>7R17rJ3t':B_<4Kk*Grr8\a:5/Z<<[I]mbfHq28c@Y+3O)t)0k@mu0K^fiq^N*(u.%T.'jl<s/Nh4He2l7^V7l^6+r/e]g]la.!>S?L^o+>>SgBV8H:sX>5A0-l`)&\h4Lk6L5I=)ArV#_bh%^>M_c,"jSErfH[2A&CfKtLn_&K3h)!u;:i'6.H*(apE@/QWkIgF*OaTZ"ZT=me'_?iN-hL[(uHeb"'/B!\/7d068ieW>Y3P8NcsU#;"%eOe_!^-"Xsc?9a'H,u4"nMEm$3F[>c1S8J!`Sh;Ye8pG>de>ac3KpI*&j-(`*[@OB&i#OgJSl=(I-'<c@@S(D;k%W_$;Jl?$^4Y-G*rH-Rk_h_*=&9o`q/eu[3o$--Zc#XoX(sA&CI7RqS'cWBhG2:+ODa!):O6`^NT((K7(:%BVJ3=F%emKe-WmK3EIie5ZAbGXt^Hf,[uurZtImn"m<3AaU$p)@,./&T/aMg@_t-oU(Al5HTNb;0J4E-fqZg*4Y/o@,5%"0ObY@,kKsQdk#2'pZOD8tZrghVcMH[#FI&3f.,FmGKKKNo9?B[@`=FkP`:=oo>;4Vs.^rc%L+kt99^Gd]mfUsWoLD02jLH*WUl.Pb(oF^j?7RUN!m&Us22M!@A<RB<?,"#orPd]<&>ld**8+J._-f-FEVm$t<`HO6GNqd_[bhJ&8qK0d-ZKt;EB60u<VCgOQ;8F:jeCp]E2HpO&5==e.Z2c5.#%nBkfCHsrt>d0-2Z<CdP%-(PZ=R(ET3u6<D1@I(u[6LMn;M%:K3fl4ls;SX'd>:*Z]IT(dG)'7QU\#<V$$AmO6;HncG;?UO[<qf,QJem^o.f$D3^V'_h3dF.f82/[@>u^ecY/FgdnO#RWf_=Js*t;iiO?'fQ:g&@nC/Xhu.;&o1b+?_6-Z%i4;1H5GAUag0*4LfL'2;Sl`["O/H6p>jU\SO4%Ffq^-']m<b(Mo1Vg;h"E$f8Z?_AL@bH31kAKY%KEP\PmsdK2MJ^Dfb%0.sgc_9*[9&'t*;+>uUp/PKbuj>J71&Mh5t,WF_k&]O@P+do^;.WV"r6Kkb#5`,aF$-adPdc+'072](pse[q;.^?I#Q#kci1Qr9Z_U:Q_lQ53n!nIBHrchNfMeP-HF*=<22XdSrZ8j>sP4CR1SEP\Ge.aCh(VEW.)F'<]`"gVnaq<<]K,.uCIMlUqSgV3U</GlN`:3?Ft9S-uHH\_0/'rV&dUBe&=8^c)"F#b/Te`H6Yn1DnZc?T$IiaKe%'S][\*'W-]E<4.cnD8?.XB5)khib.oe$NkDa0D^I+$2a=[rbp"D3eQQqq@TO]aNHTMcGM3B3cn9,9'giRF__Y[<^:+bB3]sACEq,A$s%=n\8Vk/OM\c,W"mZ11,MaZ61]7"M`X1/qmcr-hH,#8+udNN9@p:IAM="9:b-RnD&FAVj^G'kW4tPgO+M25'hLH])Ped#fB*fOs>Te;V8("S^2/7e`3>4E]],alEY#@T-dG.(=/^7(s[bh3%omN/'WKl<"q_K`T7$VrMt.GfckX6]1EfAB]1F6o6g>\:2Etf)rD.XNrRc2pgl"Hr<(1MCd%~>endstream
106
- endobj
107
- 19 0 obj
108
- <<
109
- /Filter [ /ASCII85Decode /FlateDecode ] /Length 1251
110
- >>
111
- stream
112
- Gau`R;01GN&:Vs/fU'm&SZsB\Z>@pd[^l$Ne'"!6Hco+&(^1n<bt7%'s8H%#$m^MQApR0<`)taLn([eaAHiiuRK&mT!C!?!I`[+[8FM*9+s?gk^Sb`ESFuBheu'`^-k@VZQnjgqaj:g4M2J-c)%`([:iWt%O9mV9ZO6(4"\bX`WWWGJ,s27(iVrdq]@Q&`bX7t`KV@dkk1#U3_]/$nF6>.H%;Q95P;kU[/"Vgs.N%@'=M6kAJN1afF&?E_+rA+1KE+S:4],1QpOr^qg01e<#d,;@\e=!\1-*,1T[41J&^DSg86dC5.#&+tMiZhie$%p]f=sWJ!9ni#^ZR?Gp5lVJY,M<YHnZf[nt2A3ZtRV6dLh4C-*^gI%O$[,o&o;u7[Nu/XEmkj&m4-UHNFF#I0VCUiaS-$S2Gs[@(=.(Fg-V>W+]dGA*V*5[2WS\gs>9t%t32b/^W)[_+r7&3kOLD>8WTI508QU_ZkVRb*l"j_,ie@Wk/$,J'=rjAsRr^aIAp,g4N\@rcW@_7fV)G7.f:C\2aDCnK2"(-Yh-fNKV4ogPJ_Bbno/AG^W)=l`02mHESBSd,2MW2Q,8S^O,7f_^Pj+'$c\[n!'TZ'8A[[6$M/6Vlo9egXU318J0Zl;rXSYgM=-\-3TecfRc]m]FKNI.=E4amT3\PSaWQi;TtrPVN"#t`E;<R<T0FHF)>bkNM&M.:/OC)MK2$$?Jp$`SY/%t"jbj6*+.%6.71qjEsp)j@\0#RIF/1!&^q"O7Ou;8DL^2(?$>18.AWa`<qQ;FS*8d605U,LRjPYl%CQZ"EZ)d6ggmR/\emf.%.#K=ZXlPbU\40kfi-URgEX``iXe1pOV?N=StFNQ>H$Fi,Ak&SQPl+Y^;rG>nArp/_q%9B[r]_;\_^p'[__7OH7)iuf]c[rld?RB/M<r(<QsU%pNedj)1NmPM-_fL1VD1tNQL&@c-=<:"`[Vpojg6J[HJ4:,T\L_]InN3jJke4J(kV<hYN(d]b#E=":iOW#=k#-U%PKO/p'+,)f951AW&jRK9')Q>rP3T8Xk7<ZOVAq$3lpK6YL6tc'D2V%1G(jM8"TncWs=[!hW2(D30g$5(Q/MN1htIgRt\ADhN@$l202Af7(c#1P6?P("GPEU+>VY%=qG1""FA,mioCp,lF3^-AZtKRg/NFX>&kA^rZpnFA<r!,IA42rZQ6YFrrrLL)tME=&"E=g6gSrChSiOfRe!l*<?[tTYGRI@6&N"%Fn3=3;X6Dm0TH~>endstream
113
- endobj
114
- xref
115
- 0 20
116
- 0000000000 65535 f
117
- 0000000061 00000 n
118
- 0000000112 00000 n
119
- 0000000219 00000 n
120
- 0000000331 00000 n
121
- 0000000536 00000 n
122
- 0000000613 00000 n
123
- 0000000818 00000 n
124
- 0000000903 00000 n
125
- 0000001223 00000 n
126
- 0000001296 00000 n
127
- 0000001426 00000 n
128
- 0000001514 00000 n
129
- 0000001667 00000 n
130
- 0000001770 00000 n
131
- 0000001869 00000 n
132
- 0000001999 00000 n
133
- 0000002098 00000 n
134
- 0000002164 00000 n
135
- 0000004006 00000 n
136
- trailer
137
- <<
138
- /ID
139
- [<8e273c2672d813e3cd44109eb1edd604><8e273c2672d813e3cd44109eb1edd604>]
140
- % ReportLab generated PDF document -- digest (opensource)
141
-
142
- /Info 8 0 R
143
- /Root 7 0 R
144
- /Size 20
145
- >>
146
- startxref
147
- 5349
148
- %%EOF
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
deps.py DELETED
@@ -1,46 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from fastapi import Depends, Header, HTTPException, status
4
- from sqlalchemy.orm import Session
5
-
6
- from db.database import get_db
7
- from db.models import User
8
- from services.auth_service import decode_token, get_user
9
-
10
-
11
- def _extract_bearer(authorization: str | None) -> str | None:
12
- if not authorization:
13
- return None
14
- parts = authorization.split()
15
- if len(parts) != 2 or parts[0].lower() != "bearer":
16
- return None
17
- return parts[1]
18
-
19
-
20
- def get_current_user(
21
- authorization: str | None = Header(default=None),
22
- db: Session = Depends(get_db),
23
- ) -> User:
24
- token = _extract_bearer(authorization)
25
- if not token:
26
- raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Missing bearer token")
27
- payload = decode_token(token)
28
- if not payload or "sub" not in payload:
29
- raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired token")
30
- user = get_user(db, int(payload["sub"]))
31
- if not user:
32
- raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found")
33
- return user
34
-
35
-
36
- def optional_current_user(
37
- authorization: str | None = Header(default=None),
38
- db: Session = Depends(get_db),
39
- ) -> User | None:
40
- token = _extract_bearer(authorization)
41
- if not token:
42
- return None
43
- payload = decode_token(token)
44
- if not payload or "sub" not in payload:
45
- return None
46
- return get_user(db, int(payload["sub"]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
download_ffpp.py DELETED
@@ -1,261 +0,0 @@
1
- #!/usr/bin/env python
2
- """ Downloads FaceForensics++ and Deep Fake Detection public data release
3
- Example usage:
4
- see -h or https://github.com/ondyari/FaceForensics
5
- """
6
- # -*- coding: utf-8 -*-
7
- import argparse
8
- import os
9
- import urllib
10
- import urllib.request
11
- import tempfile
12
- import time
13
- import sys
14
- import json
15
- import random
16
- from tqdm import tqdm
17
- from os.path import join
18
-
19
-
20
- # URLs and filenames
21
- FILELIST_URL = 'misc/filelist.json'
22
- DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
23
- DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]
24
-
25
- # Parameters
26
- DATASETS = {
27
- 'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
28
- 'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
29
- 'original': 'original_sequences/youtube',
30
- 'DeepFakeDetection_original': 'original_sequences/actors',
31
- 'Deepfakes': 'manipulated_sequences/Deepfakes',
32
- 'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
33
- 'Face2Face': 'manipulated_sequences/Face2Face',
34
- 'FaceShifter': 'manipulated_sequences/FaceShifter',
35
- 'FaceSwap': 'manipulated_sequences/FaceSwap',
36
- 'NeuralTextures': 'manipulated_sequences/NeuralTextures'
37
- }
38
- ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
39
- 'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
40
- 'NeuralTextures']
41
- COMPRESSION = ['raw', 'c23', 'c40']
42
- TYPE = ['videos', 'masks', 'models']
43
- SERVERS = ['EU', 'EU2', 'CA']
44
-
45
-
46
- def parse_args():
47
- parser = argparse.ArgumentParser(
48
- description='Downloads FaceForensics v2 public data release.',
49
- formatter_class=argparse.ArgumentDefaultsHelpFormatter
50
- )
51
- parser.add_argument('output_path', type=str, help='Output directory.')
52
- parser.add_argument('-d', '--dataset', type=str, default='all',
53
- help='Which dataset to download, either pristine or '
54
- 'manipulated data or the downloaded youtube '
55
- 'videos.',
56
- choices=list(DATASETS.keys()) + ['all']
57
- )
58
- parser.add_argument('-c', '--compression', type=str, default='raw',
59
- help='Which compression degree. All videos '
60
- 'have been generated with h264 with a varying '
61
- 'codec. Raw (c0) videos are lossless compressed.',
62
- choices=COMPRESSION
63
- )
64
- parser.add_argument('-t', '--type', type=str, default='videos',
65
- help='Which file type, i.e. videos, masks, for our '
66
- 'manipulation methods, models, for Deepfakes.',
67
- choices=TYPE
68
- )
69
- parser.add_argument('-n', '--num_videos', type=int, default=None,
70
- help='Select a number of videos number to '
71
- "download if you don't want to download the full"
72
- ' dataset.')
73
- parser.add_argument('--server', type=str, default='EU',
74
- help='Server to download the data from. If you '
75
- 'encounter a slow download speed, consider '
76
- 'changing the server.',
77
- choices=SERVERS
78
- )
79
- args = parser.parse_args()
80
-
81
- # URLs
82
- server = args.server
83
- if server == 'EU':
84
- server_url = 'http://canis.vc.in.tum.de:8100/'
85
- elif server == 'EU2':
86
- server_url = 'http://kaldir.vc.in.tum.de/faceforensics/'
87
- elif server == 'CA':
88
- server_url = 'http://falas.cmpt.sfu.ca:8100/'
89
- else:
90
- raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))
91
- args.tos_url = server_url + 'webpage/FaceForensics_TOS.pdf'
92
- args.base_url = server_url + 'v3/'
93
- args.deepfakes_model_url = server_url + 'v3/manipulated_sequences/' + \
94
- 'Deepfakes/models/'
95
-
96
- return args
97
-
98
-
99
- def download_files(filenames, base_url, output_path, report_progress=True):
100
- os.makedirs(output_path, exist_ok=True)
101
- if report_progress:
102
- filenames = tqdm(filenames)
103
- for filename in filenames:
104
- download_file(base_url + filename, join(output_path, filename))
105
-
106
-
107
- def reporthook(count, block_size, total_size):
108
- global start_time
109
- if count == 0:
110
- start_time = time.time()
111
- return
112
- duration = time.time() - start_time
113
- progress_size = int(count * block_size)
114
- speed = int(progress_size / (1024 * duration))
115
- percent = int(count * block_size * 100 / total_size)
116
- sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
117
- (percent, progress_size / (1024 * 1024), speed, duration))
118
- sys.stdout.flush()
119
-
120
-
121
- def download_file(url, out_file, report_progress=False):
122
- out_dir = os.path.dirname(out_file)
123
- if not os.path.isfile(out_file):
124
- fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
125
- f = os.fdopen(fh, 'w')
126
- f.close()
127
- if report_progress:
128
- urllib.request.urlretrieve(url, out_file_tmp,
129
- reporthook=reporthook)
130
- else:
131
- urllib.request.urlretrieve(url, out_file_tmp)
132
- os.rename(out_file_tmp, out_file)
133
- else:
134
- tqdm.write('WARNING: skipping download of existing file ' + out_file)
135
-
136
-
137
- def main(args):
138
- # TOS
139
- print('By pressing any key to continue you confirm that you have agreed '\
140
- 'to the FaceForensics terms of use as described at:')
141
- print(args.tos_url)
142
- print('***')
143
- print('Press any key to continue, or CTRL-C to exit.')
144
- _ = input('')
145
-
146
- # Extract arguments
147
- c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
148
- c_type = args.type
149
- c_compression = args.compression
150
- num_videos = args.num_videos
151
- output_path = args.output_path
152
- os.makedirs(output_path, exist_ok=True)
153
-
154
- # Check for special dataset cases
155
- for dataset in c_datasets:
156
- dataset_path = DATASETS[dataset]
157
- # Special cases
158
- if 'original_youtube_videos' in dataset:
159
- # Here we download the original youtube videos zip file
160
- print('Downloading original youtube videos.')
161
- if not 'info' in dataset_path:
162
- print('Please be patient, this may take a while (~40gb)')
163
- suffix = ''
164
- else:
165
- suffix = 'info'
166
- download_file(args.base_url + '/' + dataset_path,
167
- out_file=join(output_path,
168
- 'downloaded_videos{}.zip'.format(
169
- suffix)),
170
- report_progress=True)
171
- return
172
-
173
- # Else: regular datasets
174
- print('Downloading {} of dataset "{}"'.format(
175
- c_type, dataset_path
176
- ))
177
-
178
- # Get filelists and video lenghts list from server
179
- if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
180
- filepaths = json.loads(urllib.request.urlopen(args.base_url + '/' +
181
- DEEPFEAKES_DETECTION_URL).read().decode("utf-8"))
182
- if 'actors' in dataset_path:
183
- filelist = filepaths['actors']
184
- else:
185
- filelist = filepaths['DeepFakesDetection']
186
- elif 'original' in dataset_path:
187
- # Load filelist from server
188
- file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
189
- FILELIST_URL).read().decode("utf-8"))
190
- filelist = []
191
- for pair in file_pairs:
192
- filelist += pair
193
- else:
194
- # Load filelist from server
195
- file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
196
- FILELIST_URL).read().decode("utf-8"))
197
- # Get filelist
198
- filelist = []
199
- for pair in file_pairs:
200
- filelist.append('_'.join(pair))
201
- if c_type != 'models':
202
- filelist.append('_'.join(pair[::-1]))
203
- # Maybe limit number of videos for download
204
- if num_videos is not None and num_videos > 0:
205
- print('Downloading the first {} videos'.format(num_videos))
206
- filelist = filelist[:num_videos]
207
-
208
- # Server and local paths
209
- dataset_videos_url = args.base_url + '{}/{}/{}/'.format(
210
- dataset_path, c_compression, c_type)
211
- dataset_mask_url = args.base_url + '{}/{}/videos/'.format(
212
- dataset_path, 'masks', c_type)
213
-
214
- if c_type == 'videos':
215
- dataset_output_path = join(output_path, dataset_path, c_compression,
216
- c_type)
217
- print('Output path: {}'.format(dataset_output_path))
218
- filelist = [filename + '.mp4' for filename in filelist]
219
- download_files(filelist, dataset_videos_url, dataset_output_path)
220
- elif c_type == 'masks':
221
- dataset_output_path = join(output_path, dataset_path, c_type,
222
- 'videos')
223
- print('Output path: {}'.format(dataset_output_path))
224
- if 'original' in dataset:
225
- if args.dataset != 'all':
226
- print('Only videos available for original data. Aborting.')
227
- return
228
- else:
229
- print('Only videos available for original data. '
230
- 'Skipping original.\n')
231
- continue
232
- if 'FaceShifter' in dataset:
233
- print('Masks not available for FaceShifter. Aborting.')
234
- return
235
- filelist = [filename + '.mp4' for filename in filelist]
236
- download_files(filelist, dataset_mask_url, dataset_output_path)
237
-
238
- # Else: models for deepfakes
239
- else:
240
- if dataset != 'Deepfakes' and c_type == 'models':
241
- print('Models only available for Deepfakes. Aborting')
242
- return
243
- dataset_output_path = join(output_path, dataset_path, c_type)
244
- print('Output path: {}'.format(dataset_output_path))
245
-
246
- # Get Deepfakes models
247
- for folder in tqdm(filelist):
248
- folder_filelist = DEEPFAKES_MODEL_NAMES
249
-
250
- # Folder paths
251
- folder_base_url = args.deepfakes_model_url + folder + '/'
252
- folder_dataset_output_path = join(dataset_output_path,
253
- folder)
254
- download_files(folder_filelist, folder_base_url,
255
- folder_dataset_output_path,
256
- report_progress=False) # already done
257
-
258
-
259
- if __name__ == "__main__":
260
- args = parse_args()
261
- main(args)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ela_service.py DELETED
@@ -1,88 +0,0 @@
1
- """Error Level Analysis (ELA) — Phase 12.1
2
-
3
- Re-saves an image at a fixed JPEG quality and diffs against the original to reveal
4
- per-pixel manipulation artifacts. Regions that were recently edited will show
5
- higher error levels than untouched areas.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import base64
11
- import io
12
-
13
- import cv2
14
- import numpy as np
15
- from loguru import logger
16
- from PIL import Image
17
-
18
-
19
- def _compute_ela(pil_img: Image.Image, quality: int = 90, scale: float = 15.0) -> np.ndarray:
20
- """Return an ELA difference map as a uint8 (H,W,3) RGB array.
21
-
22
- Args:
23
- pil_img: Input image (any format — converted to RGB internally).
24
- quality: JPEG re-save quality level (lower = more aggressive compression).
25
- scale: Amplification factor for the difference (higher = more contrast).
26
-
27
- Returns:
28
- Difference image as uint8 (H,W,3) array.
29
- """
30
- rgb = pil_img.convert("RGB")
31
-
32
- # Re-save at specified JPEG quality into an in-memory buffer
33
- buf = io.BytesIO()
34
- rgb.save(buf, format="JPEG", quality=quality)
35
- buf.seek(0)
36
- resaved = Image.open(buf).convert("RGB")
37
-
38
- original_arr = np.array(rgb, dtype=np.float32)
39
- resaved_arr = np.array(resaved, dtype=np.float32)
40
-
41
- # Per-pixel absolute difference, amplified
42
- diff = np.abs(original_arr - resaved_arr) * scale
43
- diff = np.clip(diff, 0, 255).astype(np.uint8)
44
-
45
- return diff
46
-
47
-
48
- def generate_ela_base64(pil_img: Image.Image, quality: int = 90, scale: float = 15.0) -> str:
49
- """Produce a base64 data-URL PNG of the ELA difference map.
50
-
51
- Regions with higher error levels (brighter in the output) are more likely
52
- to have been digitally manipulated.
53
- """
54
- diff = _compute_ela(pil_img, quality=quality, scale=scale)
55
-
56
- buf = io.BytesIO()
57
- Image.fromarray(diff).save(buf, format="PNG")
58
- b64 = base64.b64encode(buf.getvalue()).decode("ascii")
59
-
60
- logger.info(f"ELA map generated ({diff.shape[1]}x{diff.shape[0]})")
61
- return f"data:image/png;base64,{b64}"
62
-
63
-
64
- def generate_blended_ela_base64(
65
- pil_img: Image.Image,
66
- gradcam_weight: float = 0.6,
67
- ela_weight: float = 0.4,
68
- quality: int = 90,
69
- scale: float = 15.0,
70
- ) -> str:
71
- """Blend Grad-CAM heatmap overlay with ELA at specified weights.
72
-
73
- This is a utility for the 'blended' mode — it composites the ELA
74
- difference map on top of the original image for visual clarity.
75
- """
76
- rgb = pil_img.convert("RGB")
77
- original_arr = np.array(rgb, dtype=np.float32)
78
- ela_arr = _compute_ela(pil_img, quality=quality, scale=scale).astype(np.float32)
79
-
80
- # Blend: overlay ELA on the original for visual context
81
- blended = np.clip(original_arr * 0.5 + ela_arr * 0.5, 0, 255).astype(np.uint8)
82
-
83
- buf = io.BytesIO()
84
- Image.fromarray(blended).save(buf, format="PNG")
85
- b64 = base64.b64encode(buf.getvalue()).decode("ascii")
86
-
87
- logger.info(f"Blended ELA generated ({blended.shape[1]}x{blended.shape[0]})")
88
- return f"data:image/png;base64,{b64}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
exif_service.py DELETED
@@ -1,129 +0,0 @@
1
- """EXIF Metadata Extraction — Phase 12.2
2
-
3
- Extracts camera metadata from uploaded images and computes a trust adjustment
4
- score: presence of authentic camera metadata lowers fake probability, while
5
- evidence of editing software raises it.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- from typing import Optional
11
-
12
- from loguru import logger
13
- from PIL import Image
14
- from PIL.ExifTags import TAGS, GPSTAGS
15
-
16
- from schemas.common import ExifSummary
17
-
18
-
19
- # Software strings that suggest post-processing / generation
20
- _SUSPICIOUS_SOFTWARE = {
21
- "adobe photoshop", "photoshop", "gimp", "affinity photo",
22
- "stable diffusion", "midjourney", "dall-e", "comfyui",
23
- "automatic1111", "invokeai",
24
- }
25
-
26
- # Software strings that are normal camera firmware
27
- _CAMERA_SOFTWARE = {
28
- "ver.", "firmware", "camera", "dji", "gopro",
29
- }
30
-
31
-
32
- def _decode_gps(gps_info: dict) -> Optional[str]:
33
- """Decode EXIF GPSInfo dict into a human-readable lat/lon string."""
34
- try:
35
- def _to_decimal(values, ref):
36
- d, m, s = [float(v) for v in values]
37
- decimal = d + m / 60.0 + s / 3600.0
38
- if ref in ("S", "W"):
39
- decimal = -decimal
40
- return decimal
41
-
42
- lat = _to_decimal(gps_info.get(2, (0, 0, 0)), gps_info.get(1, "N"))
43
- lon = _to_decimal(gps_info.get(4, (0, 0, 0)), gps_info.get(3, "E"))
44
- return f"{lat:.6f}, {lon:.6f}"
45
- except Exception:
46
- return None
47
-
48
-
49
- def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
50
- """Extract EXIF metadata and compute a trust adjustment score.
51
-
52
- Trust adjustment logic:
53
- - Valid Make + Model + DateTimeOriginal → -15 (more likely real camera photo)
54
- - GPS info present → -5 additional (real photos often have GPS)
55
- - Suspicious editing software detected → +10 (more likely manipulated)
56
- - No EXIF at all → 0 (inconclusive — many platforms strip EXIF)
57
- """
58
- summary = ExifSummary()
59
-
60
- try:
61
- exif_data = pil_img._getexif()
62
- except Exception:
63
- exif_data = None
64
-
65
- if not exif_data:
66
- # Try exifread as fallback for formats Pillow doesn't handle well
67
- try:
68
- import exifread
69
- from io import BytesIO
70
- tags = exifread.process_file(BytesIO(raw_bytes), details=False)
71
- if tags:
72
- summary.make = str(tags.get("Image Make", "")).strip() or None
73
- summary.model = str(tags.get("Image Model", "")).strip() or None
74
- summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
75
- summary.software = str(tags.get("Image Software", "")).strip() or None
76
- summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
77
- except ImportError:
78
- logger.debug("exifread not installed, skipping fallback EXIF extraction")
79
- except Exception as e:
80
- logger.debug(f"exifread fallback failed: {e}")
81
- else:
82
- # Decode Pillow EXIF
83
- decoded = {}
84
- for tag_id, value in exif_data.items():
85
- tag_name = TAGS.get(tag_id, tag_id)
86
- decoded[tag_name] = value
87
-
88
- summary.make = str(decoded.get("Make", "")).strip() or None
89
- summary.model = str(decoded.get("Model", "")).strip() or None
90
- summary.datetime_original = str(decoded.get("DateTimeOriginal", "")).strip() or None
91
- summary.software = str(decoded.get("Software", "")).strip() or None
92
- summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
93
-
94
- # GPS
95
- gps_raw = decoded.get("GPSInfo")
96
- if gps_raw and isinstance(gps_raw, dict):
97
- gps_decoded = {}
98
- for k, v in gps_raw.items():
99
- gps_decoded[GPSTAGS.get(k, k)] = v
100
- summary.gps_info = _decode_gps(gps_decoded)
101
-
102
- # ── Trust adjustment scoring ──
103
- adjustment = 0
104
- reasons = []
105
-
106
- has_camera_meta = summary.make and summary.model and summary.datetime_original
107
- if has_camera_meta:
108
- adjustment -= 15
109
- reasons.append("valid camera metadata (Make/Model/DateTime)")
110
-
111
- if summary.gps_info:
112
- adjustment -= 5
113
- reasons.append("GPS coordinates present")
114
-
115
- if summary.software:
116
- sw_lower = summary.software.lower()
117
- if any(s in sw_lower for s in _SUSPICIOUS_SOFTWARE):
118
- adjustment += 10
119
- reasons.append(f"editing software detected: {summary.software}")
120
- elif any(s in sw_lower for s in _CAMERA_SOFTWARE):
121
- adjustment -= 2
122
- reasons.append("camera firmware in Software field")
123
-
124
- summary.trust_adjustment = adjustment
125
- summary.trust_reason = "; ".join(reasons) if reasons else "no EXIF metadata found"
126
-
127
- logger.info(f"EXIF extracted: make={summary.make}, model={summary.model}, "
128
- f"adjustment={adjustment} ({summary.trust_reason})")
129
- return summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
file_handler.py DELETED
@@ -1,96 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import io
4
- import os
5
- import tempfile
6
- from typing import Iterable
7
-
8
- from fastapi import HTTPException, UploadFile, status
9
-
10
- from config import settings
11
-
12
- IMAGE_MAGIC_BYTES: dict[bytes, str] = {
13
- b"\xff\xd8\xff": "image/jpeg",
14
- b"\x89PNG\r\n\x1a\n": "image/png",
15
- b"RIFF": "image/webp", # partial; WEBP has 'RIFF....WEBP'
16
- }
17
-
18
-
19
- def _detect_mime_by_magic(head: bytes) -> str | None:
20
- for sig, mime in IMAGE_MAGIC_BYTES.items():
21
- if head.startswith(sig):
22
- if mime == "image/webp" and b"WEBP" not in head[:16]:
23
- continue
24
- return mime
25
- return None
26
-
27
-
28
- async def read_upload_bytes(
29
- file: UploadFile,
30
- allowed_mimes: Iterable[str],
31
- max_size_mb: int,
32
- ) -> tuple[bytes, str]:
33
- """Read an UploadFile into memory after validating type and size.
34
- Returns (raw_bytes, detected_mime). Raises HTTPException on failure.
35
- """
36
- data = await file.read()
37
- size_mb = len(data) / (1024 * 1024)
38
- if size_mb > max_size_mb:
39
- raise HTTPException(
40
- status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
41
- detail=f"File too large ({size_mb:.1f} MB > {max_size_mb} MB)",
42
- )
43
-
44
- mime = _detect_mime_by_magic(data[:16]) or (file.content_type or "")
45
- if mime not in allowed_mimes:
46
- raise HTTPException(
47
- status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
48
- detail=f"Unsupported type '{mime}'. Allowed: {list(allowed_mimes)}",
49
- )
50
- return data, mime
51
-
52
-
53
- def bytes_to_buffer(data: bytes) -> io.BytesIO:
54
- return io.BytesIO(data)
55
-
56
-
57
- async def save_upload_to_tempfile(
58
- file: UploadFile,
59
- allowed_mimes: Iterable[str],
60
- max_size_mb: int,
61
- suffix: str = ".mp4",
62
- ) -> tuple[str, str]:
63
- """Stream an UploadFile to a temp file on disk. Returns (path, mime).
64
- MIME is taken from the client's content_type (no magic-byte check for videos).
65
- Caller is responsible for deleting the temp file.
66
- """
67
- mime = (file.content_type or "").lower()
68
- if mime not in allowed_mimes:
69
- raise HTTPException(
70
- status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
71
- detail=f"Unsupported type '{mime}'. Allowed: {list(allowed_mimes)}",
72
- )
73
-
74
- max_bytes = max_size_mb * 1024 * 1024
75
- fd, path = tempfile.mkstemp(suffix=suffix, prefix="ds_vid_")
76
- written = 0
77
- try:
78
- with os.fdopen(fd, "wb") as out:
79
- while True:
80
- chunk = await file.read(1024 * 1024)
81
- if not chunk:
82
- break
83
- written += len(chunk)
84
- if written > max_bytes:
85
- raise HTTPException(
86
- status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
87
- detail=f"File too large (> {max_size_mb} MB)",
88
- )
89
- out.write(chunk)
90
- except Exception:
91
- try:
92
- os.unlink(path)
93
- except OSError:
94
- pass
95
- raise
96
- return path, mime
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generate_colab_nb.py DELETED
@@ -1,213 +0,0 @@
1
- import nbformat as nbf
2
- import os
3
-
4
- nb = nbf.v4.new_notebook()
5
-
6
- text = """\
7
- # DeepShield: FaceForensics++ ViT Training
8
- Run this entirely in Google Colab.
9
- **Before running**:
10
- 1. Go to `Runtime` -> `Change runtime type` -> select **T4 GPU**.
11
- 2. Run the cells below sequentially.
12
- """
13
-
14
- code_install = """\
15
- !pip install timm transformers datasets accelerate evaluate opencv-python
16
- """
17
-
18
- code_ffpp = """\
19
- # We create the download script inside the Colab environment
20
- download_script = '''#!/usr/bin/env python
21
- import argparse
22
- import os
23
- import urllib.request
24
- import tempfile
25
- import time
26
- import sys
27
- import json
28
- from tqdm import tqdm
29
- from os.path import join
30
-
31
- FILELIST_URL = 'misc/filelist.json'
32
- DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
33
- DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]
34
- DATASETS = {
35
- 'original': 'original_sequences/youtube',
36
- 'Deepfakes': 'manipulated_sequences/Deepfakes',
37
- 'Face2Face': 'manipulated_sequences/Face2Face',
38
- 'FaceShifter': 'manipulated_sequences/FaceShifter',
39
- 'FaceSwap': 'manipulated_sequences/FaceSwap',
40
- 'NeuralTextures': 'manipulated_sequences/NeuralTextures'
41
- }
42
- ALL_DATASETS = ['original', 'Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']
43
- COMPRESSION = ['raw', 'c23', 'c40']
44
- TYPE = ['videos']
45
-
46
- def download_file(url, out_file):
47
- os.makedirs(os.path.dirname(out_file), exist_ok=True)
48
- if not os.path.isfile(out_file):
49
- urllib.request.urlretrieve(url, out_file)
50
-
51
- def main():
52
- parser = argparse.ArgumentParser()
53
- parser.add_argument('output_path', type=str)
54
- parser.add_argument('-d', '--dataset', type=str, default='all')
55
- parser.add_argument('-c', '--compression', type=str, default='c40')
56
- parser.add_argument('-t', '--type', type=str, default='videos')
57
- parser.add_argument('-n', '--num_videos', type=int, default=50) # Small amount for tutorial
58
- args = parser.parse_args()
59
-
60
- base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'
61
-
62
- datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
63
- for dataset in datasets:
64
- dataset_path = DATASETS[dataset]
65
- print(f'Downloading {args.compression} of {dataset}')
66
-
67
- file_pairs = json.loads(urllib.request.urlopen(base_url + FILELIST_URL).read().decode("utf-8"))
68
- filelist = []
69
- if 'original' in dataset_path:
70
- for pair in file_pairs:
71
- filelist += pair
72
- else:
73
- for pair in file_pairs:
74
- filelist.append('_'.join(pair))
75
- filelist.append('_'.join(pair[::-1]))
76
-
77
- filelist = filelist[:args.num_videos]
78
- dataset_videos_url = base_url + f'{dataset_path}/{args.compression}/{args.type}/'
79
- dataset_output_path = join(args.output_path, dataset_path, args.compression, args.type)
80
-
81
- for filename in tqdm(filelist):
82
- download_file(dataset_videos_url + filename + ".mp4", join(dataset_output_path, filename + ".mp4"))
83
-
84
- if __name__ == "__main__":
85
- main()
86
- '''
87
-
88
- with open("download_ffpp.py", "w") as f:
89
- f.write(download_script)
90
-
91
- !python download_ffpp.py ./data -d all -c c40 -t videos -n 50
92
- """
93
-
94
- code_extract = """\
95
- import cv2
96
- import os
97
- import glob
98
- from tqdm import tqdm
99
-
100
- def extract_frames(video_folder, output_folder, label, max_frames=4):
101
- os.makedirs(output_folder, exist_ok=True)
102
- videos = glob.glob(os.path.join(video_folder, "*.mp4"))
103
-
104
- for vid_path in tqdm(videos, desc=f"Extracting {label}"):
105
- vid_name = os.path.basename(vid_path).replace('.mp4','')
106
- cap = cv2.VideoCapture(vid_path)
107
- count = 0
108
- while cap.isOpened() and count < max_frames:
109
- ret, frame = cap.read()
110
- if not ret: break
111
- frame = cv2.resize(frame, (224, 224))
112
- out_path = os.path.join(output_folder, f"{vid_name}_f{count}.jpg")
113
- cv2.imwrite(out_path, frame)
114
- count += 1
115
- cap.release()
116
-
117
- # Extract Real
118
- extract_frames('./data/original_sequences/youtube/c40/videos', './dataset/real', 'real')
119
-
120
- # Extract Fakes
121
- fakes = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']
122
- for f in fakes:
123
- extract_frames(f'./data/manipulated_sequences/{f}/c40/videos', './dataset/fake', 'fake')
124
- """
125
-
126
- code_train = """\
127
- import numpy as np
128
- from datasets import load_dataset
129
- from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
130
- import torch
131
-
132
- # 1. Load Dataset
133
- dataset = load_dataset('imagefolder', data_dir='./dataset')
134
- # Split into train/validation
135
- dataset = dataset['train'].train_test_split(test_size=0.1)
136
-
137
- # 2. Preprocessor
138
- model_name_or_path = 'google/vit-base-patch16-224-in21k'
139
- processor = ViTImageProcessor.from_pretrained(model_name_or_path)
140
-
141
- def transform(example_batch):
142
- # Take a list of PIL images and turn them to pixel values
143
- inputs = processor([x.convert("RGB") for x in example_batch['image']], return_tensors='pt')
144
- inputs['labels'] = example_batch['label']
145
- return inputs
146
-
147
- prepared_ds = dataset.with_transform(transform)
148
-
149
- def collate_fn(batch):
150
- return {
151
- 'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
152
- 'labels': torch.tensor([x['labels'] for x in batch])
153
- }
154
-
155
- # 3. Load Model
156
- labels = dataset['train'].features['label'].names
157
- model = ViTForImageClassification.from_pretrained(
158
- model_name_or_path,
159
- num_labels=len(labels),
160
- id2label={str(i): c for i, c in enumerate(labels)},
161
- label2id={c: str(i) for i, c in enumerate(labels)}
162
- )
163
-
164
- training_args = TrainingArguments(
165
- output_dir="./vit-deepshield",
166
- per_device_train_batch_size=16,
167
- eval_strategy="steps",
168
- num_train_epochs=3,
169
- fp16=True, # Mixed precision for speed
170
- save_steps=100,
171
- eval_steps=100,
172
- logging_steps=10,
173
- learning_rate=2e-4,
174
- save_total_limit=2,
175
- remove_unused_columns=False,
176
- push_to_hub=False,
177
- load_best_model_at_end=True,
178
- )
179
-
180
- import evaluate
181
- metric = evaluate.load("accuracy")
182
- def compute_metrics(p):
183
- return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
184
-
185
- trainer = Trainer(
186
- model=model,
187
- args=training_args,
188
- data_collator=collate_fn,
189
- compute_metrics=compute_metrics,
190
- train_dataset=prepared_ds["train"],
191
- eval_dataset=prepared_ds["test"],
192
- )
193
-
194
- # 4. Train
195
- train_results = trainer.train()
196
- trainer.save_model("deepshield_vit_model")
197
- processor.save_pretrained("deepshield_vit_model")
198
- trainer.log_metrics("train", train_results.metrics)
199
- trainer.save_metrics("train", train_results.metrics)
200
- trainer.save_state()
201
- print("Training Complete! The model is saved to ./deepshield_vit_model")
202
- """
203
-
204
- nb['cells'] = [
205
- nbf.v4.new_markdown_cell(text),
206
- nbf.v4.new_code_cell(code_install),
207
- nbf.v4.new_code_cell(code_ffpp),
208
- nbf.v4.new_code_cell(code_extract),
209
- nbf.v4.new_code_cell(code_train)
210
- ]
211
-
212
- with open(r'c:\Users\athar\Desktop\minor2\backend\training\Colab_ViT_Training.ipynb', 'w', encoding='utf-8') as f:
213
- nbf.write(nb, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
heatmap_generator.py DELETED
@@ -1,164 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import base64
4
- import io
5
- from typing import Optional
6
-
7
- import cv2
8
- import numpy as np
9
- import torch
10
- from loguru import logger
11
- from PIL import Image
12
- from pytorch_grad_cam import GradCAMPlusPlus
13
- from pytorch_grad_cam.utils.image import show_cam_on_image
14
- from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
15
-
16
- from config import settings
17
- from models.model_loader import get_model_loader
18
-
19
-
20
- class _HFLogitsWrapper(torch.nn.Module):
21
- """Wrap a HuggingFace image classification model so forward() returns logits
22
- as a plain tensor (pytorch_grad_cam expects tensor outputs, not dicts/dataclasses).
23
- """
24
-
25
- def __init__(self, model: torch.nn.Module) -> None:
26
- super().__init__()
27
- self.model = model
28
-
29
- def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: # type: ignore[override]
30
- return self.model(pixel_values=pixel_values).logits
31
-
32
-
33
- def _vit_reshape_transform(tensor: torch.Tensor, height: int = 14, width: int = 14) -> torch.Tensor:
34
- """Grad-CAM expects (B, C, H, W); ViT hidden states are (B, 1+H*W, C).
35
- Drop the CLS token and reshape tokens into a spatial grid.
36
- """
37
- result = tensor[:, 1:, :]
38
- b, n, c = result.shape
39
- result = result.reshape(b, height, width, c)
40
- result = result.permute(0, 3, 1, 2) # (B, C, H, W)
41
- return result
42
-
43
-
44
- def _preprocess_for_cam(pil_img: Image.Image, processor) -> tuple[torch.Tensor, np.ndarray]:
45
- """Return (input_tensor, rgb_float_224) where rgb_float_224 is a (H,W,3) float
46
- array in [0,1] matching the model input geometry — needed for overlaying.
47
- """
48
- inputs = processor(images=pil_img, return_tensors="pt")
49
- input_tensor = inputs["pixel_values"].to(settings.DEVICE)
50
-
51
- size = getattr(processor, "size", {"height": 224, "width": 224})
52
- h = size.get("height", 224) if isinstance(size, dict) else 224
53
- w = size.get("width", 224) if isinstance(size, dict) else 224
54
-
55
- resized = pil_img.resize((w, h), Image.BILINEAR)
56
- rgb = np.array(resized).astype(np.float32) / 255.0 # (H,W,3) in [0,1]
57
- return input_tensor, rgb
58
-
59
-
60
- def _encode_overlay_to_base64(overlay: np.ndarray) -> str:
61
- """Encode a uint8 (H,W,3) RGB overlay to a base64 data-URL PNG."""
62
- buf = io.BytesIO()
63
- Image.fromarray(overlay).save(buf, format="PNG")
64
- b64 = base64.b64encode(buf.getvalue()).decode("ascii")
65
- return f"data:image/png;base64,{b64}"
66
-
67
-
68
- def _compute_gradcam_pp(
69
- pil_img: Image.Image,
70
- target_class_idx: Optional[int] = None,
71
- ) -> tuple[np.ndarray, np.ndarray]:
72
- """Compute Grad-CAM++ averaged across the last 3 ViT encoder layers.
73
- Returns (grayscale_cam, rgb_float) where grayscale_cam is (H,W) in [0,1].
74
- """
75
- loader = get_model_loader()
76
- model, processor = loader.load_image_model()
77
-
78
- model.eval()
79
- for p in model.parameters():
80
- p.requires_grad_(True)
81
-
82
- input_tensor, rgb_float = _preprocess_for_cam(pil_img, processor)
83
-
84
- grid = int(model.config.image_size / model.config.patch_size)
85
-
86
- # Average across last 3 ViT encoder layers for smoother heatmaps
87
- num_layers = len(model.vit.encoder.layer)
88
- last_n = min(3, num_layers)
89
- target_layers = [
90
- model.vit.encoder.layer[-(i + 1)].layernorm_before
91
- for i in range(last_n)
92
- ]
93
-
94
- wrapped = _HFLogitsWrapper(model)
95
-
96
- targets = None
97
- if target_class_idx is not None:
98
- targets = [ClassifierOutputTarget(int(target_class_idx))]
99
-
100
- with GradCAMPlusPlus(
101
- model=wrapped,
102
- target_layers=target_layers,
103
- reshape_transform=lambda t: _vit_reshape_transform(t, grid, grid),
104
- ) as cam:
105
- grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0] # (H,W) in [0,1]
106
-
107
- return grayscale_cam, rgb_float
108
-
109
-
110
- def generate_heatmap_base64(
111
- pil_img: Image.Image,
112
- target_class_idx: Optional[int] = None,
113
- ) -> str:
114
- """Produce a base64 data-URL PNG of the Grad-CAM++ overlay for the given image."""
115
- grayscale_cam, rgb_float = _compute_gradcam_pp(pil_img, target_class_idx)
116
- overlay = show_cam_on_image(rgb_float, grayscale_cam, use_rgb=True)
117
- logger.info(f"Heatmap generated ({overlay.shape[0]}x{overlay.shape[1]})")
118
- return _encode_overlay_to_base64(overlay)
119
-
120
-
121
- def generate_boxes_base64(
122
- pil_img: Image.Image,
123
- target_class_idx: Optional[int] = None,
124
- top_k: int = 5,
125
- threshold: float = 0.4,
126
- ) -> str:
127
- """Produce bounding boxes around top-K connected components from Grad-CAM++ activation.
128
- Renders colored boxes (red/yellow/orange by intensity) on the original image.
129
- """
130
- grayscale_cam, rgb_float = _compute_gradcam_pp(pil_img, target_class_idx)
131
-
132
- h, w = rgb_float.shape[:2]
133
- base_img = (rgb_float * 255).astype(np.uint8).copy()
134
-
135
- # Threshold the heatmap to find activated regions
136
- binary = (grayscale_cam >= threshold).astype(np.uint8) * 255
137
- contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
138
-
139
- if not contours:
140
- logger.info("No significant activation regions found for bounding boxes")
141
- return _encode_overlay_to_base64(base_img)
142
-
143
- # Sort by area descending, take top_k
144
- contours = sorted(contours, key=cv2.contourArea, reverse=True)[:top_k]
145
-
146
- # Color by mean activation intensity within each box
147
- for cnt in contours:
148
- x, y, bw, bh = cv2.boundingRect(cnt)
149
- region_activation = grayscale_cam[y:y + bh, x:x + bw].mean()
150
-
151
- if region_activation >= 0.7:
152
- color = (220, 40, 40) # red — high suspicion
153
- elif region_activation >= 0.5:
154
- color = (240, 140, 20) # orange — medium
155
- else:
156
- color = (230, 200, 40) # yellow — lower
157
-
158
- cv2.rectangle(base_img, (x, y), (x + bw, y + bh), color, 2)
159
- label = f"{region_activation * 100:.0f}%"
160
- cv2.putText(base_img, label, (x, max(y - 6, 12)),
161
- cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1, cv2.LINE_AA)
162
-
163
- logger.info(f"Bounding boxes generated: {len(contours)} regions")
164
- return _encode_overlay_to_base64(base_img)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
image_service.py DELETED
@@ -1,58 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import io
4
- from dataclasses import dataclass
5
- from typing import Tuple
6
-
7
- import torch
8
- from loguru import logger
9
- from PIL import Image
10
-
11
- from config import settings
12
- from models.model_loader import get_model_loader
13
-
14
-
15
- @dataclass
16
- class ImageClassification:
17
- label: str
18
- confidence: float
19
- all_scores: dict[str, float]
20
-
21
-
22
- def load_image_from_bytes(data: bytes) -> Image.Image:
23
- img = Image.open(io.BytesIO(data))
24
- if img.mode != "RGB":
25
- img = img.convert("RGB")
26
- return img
27
-
28
-
29
- def classify_image(pil_img: Image.Image) -> ImageClassification:
30
- """Run the ViT deepfake classifier on a PIL image."""
31
- loader = get_model_loader()
32
- model, processor = loader.load_image_model()
33
-
34
- inputs = processor(images=pil_img, return_tensors="pt")
35
- inputs = {k: v.to(settings.DEVICE) for k, v in inputs.items()}
36
-
37
- with torch.no_grad():
38
- outputs = model(**inputs)
39
- logits = outputs.logits # (1, num_labels)
40
- probs = torch.softmax(logits, dim=-1)[0]
41
-
42
- id2label: dict[int, str] = getattr(model.config, "id2label", {})
43
- all_scores = {id2label.get(i, str(i)): float(p.item()) for i, p in enumerate(probs)}
44
- top_idx = int(torch.argmax(probs).item())
45
- top_label = id2label.get(top_idx, str(top_idx))
46
- top_conf = float(probs[top_idx].item())
47
-
48
- logger.info(f"Image classify → {top_label} @ {top_conf:.3f}")
49
- return ImageClassification(label=top_label, confidence=top_conf, all_scores=all_scores)
50
-
51
-
52
- def preprocess_and_classify(raw_bytes: bytes) -> Tuple[Image.Image, ImageClassification]:
53
- """Convenience: decode bytes → PIL → classify. Returns the PIL image too so
54
- downstream steps (heatmap, artifact scan) can reuse it.
55
- """
56
- pil = load_image_from_bytes(raw_bytes)
57
- result = classify_image(pil)
58
- return pil, result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llm_explainer.py DELETED
@@ -1,191 +0,0 @@
1
- """LLM Explainability Card — Phase 12.3
2
-
3
- Generates a plain-English summary paragraph + 3 key-signal bullets from the
4
- full analysis payload. Supports Gemini (default) and OpenAI providers.
5
- Results are cached per record_id to avoid re-spending tokens.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import json
11
- from abc import ABC, abstractmethod
12
- from functools import lru_cache
13
- from typing import Any
14
-
15
- from loguru import logger
16
-
17
- from config import settings
18
- from schemas.common import LLMExplainabilitySummary
19
-
20
- # ── In-memory cache keyed by record_id ──
21
- _cache: dict[str, LLMExplainabilitySummary] = {}
22
-
23
-
24
- _PROMPT_TEMPLATE = """\
25
- You are DeepShield's explainability engine. Given the JSON analysis payload below,
26
- write a concise, accessible summary for a non-technical user.
27
-
28
- **Output format (strict JSON only — no markdown fences):**
29
- {{
30
- "paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
31
- "bullets": [
32
- "<key signal 1>",
33
- "<key signal 2>",
34
- "<key signal 3>"
35
- ]
36
- }}
37
-
38
- Rules:
39
- - Be factual. State what the analysis found, not what you speculate.
40
- - Reference specific indicators (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
41
- - If the verdict is "Likely Authentic", reassure the user and explain why.
42
- - If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence.
43
- - Keep the paragraph under 60 words. Each bullet under 20 words.
44
-
45
- **Analysis payload:**
46
- {payload_json}
47
- """
48
-
49
-
50
- class _LLMProvider(ABC):
51
- @abstractmethod
52
- def generate(self, prompt: str) -> str:
53
- """Send prompt to LLM and return raw text response."""
54
-
55
-
56
- class _GeminiProvider(_LLMProvider):
57
- def __init__(self) -> None:
58
- import google.generativeai as genai
59
- genai.configure(api_key=settings.LLM_API_KEY)
60
- self._model = genai.GenerativeModel(settings.LLM_MODEL)
61
-
62
- def generate(self, prompt: str) -> str:
63
- response = self._model.generate_content(prompt)
64
- return response.text
65
-
66
-
67
- class _OpenAIProvider(_LLMProvider):
68
- def __init__(self) -> None:
69
- from openai import OpenAI
70
- self._client = OpenAI(api_key=settings.LLM_API_KEY)
71
-
72
- def generate(self, prompt: str) -> str:
73
- response = self._client.chat.completions.create(
74
- model=settings.LLM_MODEL,
75
- messages=[{"role": "user", "content": prompt}],
76
- temperature=0.3,
77
- max_tokens=300,
78
- )
79
- return response.choices[0].message.content
80
-
81
-
82
- @lru_cache(maxsize=1)
83
- def _get_provider() -> _LLMProvider:
84
- """Lazy-init the configured LLM provider (singleton)."""
85
- provider_name = settings.LLM_PROVIDER.lower()
86
- if provider_name == "openai":
87
- return _OpenAIProvider()
88
- return _GeminiProvider()
89
-
90
-
91
- def _parse_llm_response(raw: str) -> tuple[str, list[str]]:
92
- """Parse the LLM's JSON response into (paragraph, bullets).
93
- Handles cases where the LLM wraps output in markdown fences.
94
- """
95
- text = raw.strip()
96
- # Strip markdown code fences if present
97
- if text.startswith("```"):
98
- lines = text.split("\n")
99
- # Remove first and last fence lines
100
- lines = [l for l in lines if not l.strip().startswith("```")]
101
- text = "\n".join(lines).strip()
102
-
103
- parsed = json.loads(text)
104
- paragraph = parsed.get("paragraph", "")
105
- bullets = parsed.get("bullets", [])
106
- if not isinstance(bullets, list):
107
- bullets = [str(bullets)]
108
- return paragraph, bullets[:3]
109
-
110
-
111
- def generate_llm_summary(
112
- payload: dict[str, Any],
113
- record_id: str | None = None,
114
- ) -> LLMExplainabilitySummary:
115
- """Generate an LLM-powered plain-English explanation for an analysis result.
116
-
117
- Args:
118
- payload: The full analysis response dict (verdict, scores, indicators, etc.).
119
- record_id: Optional cache key. If provided and cached, returns cached result.
120
-
121
- Returns:
122
- LLMExplainabilitySummary with paragraph, bullets, and model info.
123
- """
124
- # Check cache
125
- if record_id and record_id in _cache:
126
- logger.debug(f"LLM summary cache hit for record_id={record_id}")
127
- cached = _cache[record_id]
128
- cached.cached = True
129
- return cached
130
-
131
- # Guard: no API key configured
132
- if not settings.LLM_API_KEY:
133
- logger.warning("LLM_API_KEY not set — using deterministic fallback summary")
134
-
135
- verdict_data = payload.get("verdict", {})
136
- label = verdict_data.get("label", "Unknown")
137
- score = verdict_data.get("authenticity_score", 50)
138
-
139
- return LLMExplainabilitySummary(
140
- paragraph=f"The DeepShield AI engine has analyzed this media and determined it is '{label}' with an authenticity score of {score}/100. We arrived at this conclusion by passing the file through our deepfake detection algorithms, artifact scanners, and metadata analyzers.",
141
- bullets=[
142
- f"Overall Authenticity Score: {score}/100",
143
- f"Primary Verdict: {label}",
144
- "Note: Configure an LLM API key for deeper contextual analysis."
145
- ],
146
- model_used="static-fallback",
147
- )
148
-
149
- # Strip heavy base64 fields to reduce token usage
150
- slim_payload = {k: v for k, v in payload.items()
151
- if k not in ("explainability",)}
152
- # Include explainability but strip base64 images
153
- if "explainability" in payload and isinstance(payload["explainability"], dict):
154
- expl = {k: v for k, v in payload["explainability"].items()
155
- if not k.endswith("_base64")}
156
- slim_payload["explainability"] = expl
157
-
158
- prompt = _PROMPT_TEMPLATE.format(payload_json=json.dumps(slim_payload, indent=2, default=str))
159
-
160
- try:
161
- provider = _get_provider()
162
- raw_response = provider.generate(prompt)
163
- paragraph, bullets = _parse_llm_response(raw_response)
164
-
165
- summary = LLMExplainabilitySummary(
166
- paragraph=paragraph,
167
- bullets=bullets,
168
- model_used=f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}",
169
- )
170
-
171
- # Cache result
172
- if record_id:
173
- _cache[record_id] = summary
174
-
175
- logger.info(f"LLM summary generated via {settings.LLM_PROVIDER}/{settings.LLM_MODEL}")
176
- return summary
177
-
178
- except json.JSONDecodeError as e:
179
- logger.error(f"LLM returned unparseable JSON: {e}")
180
- return LLMExplainabilitySummary(
181
- paragraph="Analysis complete. See the detailed indicators below for specifics.",
182
- bullets=["LLM explanation could not be parsed"],
183
- model_used=f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}",
184
- )
185
- except Exception as e:
186
- logger.error(f"LLM explainer failed: {e}")
187
- return LLMExplainabilitySummary(
188
- paragraph="Analysis complete. See the detailed indicators below for specifics.",
189
- bullets=["LLM explanation temporarily unavailable"],
190
- model_used="error",
191
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
logs/deepshield.log ADDED
@@ -0,0 +1,949 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-04-22 18:24:59.601 | INFO | main:lifespan:83 - Starting DeepShield backend
2
+ 2026-04-22 18:24:59.655 | INFO | main:lifespan:85 - Database initialized
3
+ 2026-04-22 18:24:59.656 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
4
+ 2026-04-22 18:25:06.201 | INFO | models.model_loader:load_image_model:51 - Image model loaded
5
+ 2026-04-22 18:25:06.206 | INFO | services.report_service:cleanup_expired:151 - Cleaned up 1 expired reports
6
+ 2026-04-22 18:26:20.263 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
7
+ 2026-04-22 18:26:22.700 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
8
+ 2026-04-22 18:26:23.034 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.078 ffpp=n/a eff=0.18335410952568054 → 0.131
9
+ 2026-04-22 18:26:28.349 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
10
+ 2026-04-22 18:26:28.390 | INFO | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
11
+ 2026-04-22 18:26:29.238 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
12
+ 2026-04-22 18:26:29.277 | INFO | services.ela_service:generate_ela_base64:60 - ELA map generated (256x256)
13
+ 2026-04-22 18:26:30.141 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
14
+ 2026-04-22 18:26:30.327 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
15
+ 2026-04-22 18:26:30.347 | INFO | api.v1.analyze:analyze_image:214 - Saved AnalysisRecord id=19 score=13 verdict=Very Likely Fake
16
+ 2026-04-22 18:26:30.349 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: No module named 'google.generativeai'
17
+ 2026-04-22 18:26:30.349 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: No module named 'google.generativeai'
18
+ 2026-04-22 18:27:58.805 | INFO | main:lifespan:93 - Shutting down DeepShield backend
19
+ 2026-04-22 18:28:09.692 | INFO | main:lifespan:83 - Starting DeepShield backend
20
+ 2026-04-22 18:28:09.698 | INFO | main:lifespan:85 - Database initialized
21
+ 2026-04-22 18:28:09.698 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
22
+ 2026-04-22 18:28:11.556 | INFO | models.model_loader:load_image_model:51 - Image model loaded
23
+ 2026-04-24 01:50:58.220 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
24
+ 2026-04-24 01:51:03.592 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
25
+ 2026-04-24 01:51:03.887 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.597 ffpp=n/a eff=n/a → 0.597
26
+ 2026-04-24 01:51:12.975 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
27
+ 2026-04-24 01:51:13.089 | INFO | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
28
+ 2026-04-24 01:51:13.255 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
29
+ 2026-04-24 01:51:13.320 | INFO | services.ela_service:generate_ela_base64:60 - ELA map generated (640x427)
30
+ 2026-04-24 01:51:14.648 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 1 regions
31
+ 2026-04-24 01:51:14.933 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
32
+ 2026-04-24 01:51:14.979 | INFO | api.v1.analyze:analyze_image:215 - Saved AnalysisRecord id=20 score=40 verdict=Likely Fake
33
+ 2026-04-24 01:51:14.982 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: No module named 'google.generativeai'
34
+ 2026-04-24 01:51:14.984 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: No module named 'google.generativeai'
35
+ 2026-04-24 07:35:53.458 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
36
+ 2026-04-24 07:36:02.194 | INFO | models.model_loader:load_text_model:65 - Text model loaded
37
+ 2026-04-24 07:36:03.057 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
38
+ 2026-04-24 07:36:03.058 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 68 (High) excl=4 caps=3 cb=1 emo=1
39
+ 2026-04-24 07:36:03.061 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
40
+ 2026-04-24 07:36:05.585 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
41
+ 2026-04-24 07:36:06.959 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=21 text score=15 verdict=Very Likely Fake
42
+ 2026-04-24 07:36:08.561 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
43
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
44
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
45
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
46
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
47
+ Please retry in 51.884484839s. [links {
48
+ description: "Learn more about Gemini API quotas"
49
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
50
+ }
51
+ , violations {
52
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
53
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
54
+ quota_dimensions {
55
+ key: "model"
56
+ value: "gemini-2.5-pro"
57
+ }
58
+ quota_dimensions {
59
+ key: "location"
60
+ value: "global"
61
+ }
62
+ }
63
+ violations {
64
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
65
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
66
+ quota_dimensions {
67
+ key: "model"
68
+ value: "gemini-2.5-pro"
69
+ }
70
+ quota_dimensions {
71
+ key: "location"
72
+ value: "global"
73
+ }
74
+ }
75
+ violations {
76
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
77
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
78
+ quota_dimensions {
79
+ key: "model"
80
+ value: "gemini-2.5-pro"
81
+ }
82
+ quota_dimensions {
83
+ key: "location"
84
+ value: "global"
85
+ }
86
+ }
87
+ violations {
88
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
89
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
90
+ quota_dimensions {
91
+ key: "model"
92
+ value: "gemini-2.5-pro"
93
+ }
94
+ quota_dimensions {
95
+ key: "location"
96
+ value: "global"
97
+ }
98
+ }
99
+ , retry_delay {
100
+ seconds: 51
101
+ }
102
+ ]
103
+ 2026-04-24 07:36:41.979 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
104
+ 2026-04-24 07:36:47.524 | INFO | models.model_loader:load_image_model:51 - Image model loaded
105
+ 2026-04-24 07:36:48.484 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
106
+ 2026-04-24 07:36:49.759 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
107
+ 2026-04-24 07:36:49.848 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.521 ffpp=n/a eff=n/a → 0.521
108
+ 2026-04-24 07:36:51.638 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
109
+ 2026-04-24 07:36:51.638 | WARNING | services.artifact_detector:detect_face_based_artifacts:213 - Face-based artifact detection failed: module 'mediapipe' has no attribute 'solutions'
110
+ 2026-04-24 07:36:51.649 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
111
+ 2026-04-24 07:36:51.696 | INFO | services.ela_service:generate_ela_base64:60 - ELA map generated (512x512)
112
+ 2026-04-24 07:36:52.470 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
113
+ 2026-04-24 07:36:52.519 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
114
+ 2026-04-24 07:36:52.542 | INFO | api.v1.analyze:analyze_image:215 - Saved AnalysisRecord id=22 score=48 verdict=Possibly Manipulated
115
+ 2026-04-24 07:36:53.674 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
116
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
117
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
118
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
119
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
120
+ Please retry in 6.748563195s. [links {
121
+ description: "Learn more about Gemini API quotas"
122
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
123
+ }
124
+ , violations {
125
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
126
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
127
+ quota_dimensions {
128
+ key: "model"
129
+ value: "gemini-2.5-pro"
130
+ }
131
+ quota_dimensions {
132
+ key: "location"
133
+ value: "global"
134
+ }
135
+ }
136
+ violations {
137
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
138
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
139
+ quota_dimensions {
140
+ key: "model"
141
+ value: "gemini-2.5-pro"
142
+ }
143
+ quota_dimensions {
144
+ key: "location"
145
+ value: "global"
146
+ }
147
+ }
148
+ violations {
149
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
150
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
151
+ quota_dimensions {
152
+ key: "model"
153
+ value: "gemini-2.5-pro"
154
+ }
155
+ quota_dimensions {
156
+ key: "location"
157
+ value: "global"
158
+ }
159
+ }
160
+ violations {
161
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
162
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
163
+ quota_dimensions {
164
+ key: "model"
165
+ value: "gemini-2.5-pro"
166
+ }
167
+ quota_dimensions {
168
+ key: "location"
169
+ value: "global"
170
+ }
171
+ }
172
+ , retry_delay {
173
+ seconds: 6
174
+ }
175
+ ]
176
+ 2026-04-24 07:36:54.760 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
177
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
178
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
179
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
180
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
181
+ Please retry in 5.653927512s. [links {
182
+ description: "Learn more about Gemini API quotas"
183
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
184
+ }
185
+ , violations {
186
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
187
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
188
+ quota_dimensions {
189
+ key: "model"
190
+ value: "gemini-2.5-pro"
191
+ }
192
+ quota_dimensions {
193
+ key: "location"
194
+ value: "global"
195
+ }
196
+ }
197
+ violations {
198
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
199
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
200
+ quota_dimensions {
201
+ key: "model"
202
+ value: "gemini-2.5-pro"
203
+ }
204
+ quota_dimensions {
205
+ key: "location"
206
+ value: "global"
207
+ }
208
+ }
209
+ violations {
210
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
211
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
212
+ quota_dimensions {
213
+ key: "model"
214
+ value: "gemini-2.5-pro"
215
+ }
216
+ quota_dimensions {
217
+ key: "location"
218
+ value: "global"
219
+ }
220
+ }
221
+ violations {
222
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
223
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
224
+ quota_dimensions {
225
+ key: "model"
226
+ value: "gemini-2.5-pro"
227
+ }
228
+ quota_dimensions {
229
+ key: "location"
230
+ value: "global"
231
+ }
232
+ }
233
+ , retry_delay {
234
+ seconds: 5
235
+ }
236
+ ]
237
+ 2026-04-24 15:16:36.138 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
238
+ 2026-04-24 15:16:43.946 | INFO | models.model_loader:load_text_model:65 - Text model loaded
239
+ 2026-04-24 15:16:44.719 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
240
+ 2026-04-24 15:16:44.721 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
241
+ 2026-04-24 15:16:44.723 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
242
+ 2026-04-24 15:16:45.864 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
243
+ 2026-04-24 15:16:47.113 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=23 text score=15 verdict=Very Likely Fake
244
+ 2026-04-24 15:16:48.348 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
245
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
246
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
247
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
248
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
249
+ Please retry in 12.294521515s. [links {
250
+ description: "Learn more about Gemini API quotas"
251
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
252
+ }
253
+ , violations {
254
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
255
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
256
+ quota_dimensions {
257
+ key: "model"
258
+ value: "gemini-2.5-pro"
259
+ }
260
+ quota_dimensions {
261
+ key: "location"
262
+ value: "global"
263
+ }
264
+ }
265
+ violations {
266
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
267
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
268
+ quota_dimensions {
269
+ key: "model"
270
+ value: "gemini-2.5-pro"
271
+ }
272
+ quota_dimensions {
273
+ key: "location"
274
+ value: "global"
275
+ }
276
+ }
277
+ violations {
278
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
279
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
280
+ quota_dimensions {
281
+ key: "model"
282
+ value: "gemini-2.5-pro"
283
+ }
284
+ quota_dimensions {
285
+ key: "location"
286
+ value: "global"
287
+ }
288
+ }
289
+ violations {
290
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
291
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
292
+ quota_dimensions {
293
+ key: "model"
294
+ value: "gemini-2.5-pro"
295
+ }
296
+ quota_dimensions {
297
+ key: "location"
298
+ value: "global"
299
+ }
300
+ }
301
+ , retry_delay {
302
+ seconds: 12
303
+ }
304
+ ]
305
+ 2026-04-24 15:16:48.553 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
306
+ 2026-04-24 15:16:50.111 | INFO | models.model_loader:load_image_model:51 - Image model loaded
307
+ 2026-04-24 15:16:51.265 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
308
+ 2026-04-24 15:16:52.685 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
309
+ 2026-04-24 15:16:52.723 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.517 ffpp=n/a eff=n/a → 0.517
310
+ 2026-04-24 15:16:52.735 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
311
+ 2026-04-24 15:16:54.934 | WARNING | services.artifact_detector:detect_face_based_artifacts:211 - Face-based artifact detection failed: module 'mediapipe' has no attribute 'solutions'
312
+ 2026-04-24 15:16:54.949 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
313
+ 2026-04-24 15:16:54.965 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (256x256)
314
+ 2026-04-24 15:16:55.916 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
315
+ 2026-04-24 15:16:55.975 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
316
+ 2026-04-24 15:16:55.989 | INFO | api.v1.analyze:analyze_image:214 - Saved AnalysisRecord id=24 score=48 verdict=Possibly Manipulated
317
+ 2026-04-24 15:16:56.236 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
318
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
319
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
320
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
321
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
322
+ Please retry in 4.477916448s. [links {
323
+ description: "Learn more about Gemini API quotas"
324
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
325
+ }
326
+ , violations {
327
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
328
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
329
+ quota_dimensions {
330
+ key: "model"
331
+ value: "gemini-2.5-pro"
332
+ }
333
+ quota_dimensions {
334
+ key: "location"
335
+ value: "global"
336
+ }
337
+ }
338
+ violations {
339
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
340
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
341
+ quota_dimensions {
342
+ key: "model"
343
+ value: "gemini-2.5-pro"
344
+ }
345
+ quota_dimensions {
346
+ key: "location"
347
+ value: "global"
348
+ }
349
+ }
350
+ violations {
351
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
352
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
353
+ quota_dimensions {
354
+ key: "model"
355
+ value: "gemini-2.5-pro"
356
+ }
357
+ quota_dimensions {
358
+ key: "location"
359
+ value: "global"
360
+ }
361
+ }
362
+ violations {
363
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
364
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
365
+ quota_dimensions {
366
+ key: "model"
367
+ value: "gemini-2.5-pro"
368
+ }
369
+ quota_dimensions {
370
+ key: "location"
371
+ value: "global"
372
+ }
373
+ }
374
+ , retry_delay {
375
+ seconds: 4
376
+ }
377
+ ]
378
+ 2026-04-24 15:16:57.419 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
379
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
380
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
381
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
382
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
383
+ Please retry in 3.282459328s. [links {
384
+ description: "Learn more about Gemini API quotas"
385
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
386
+ }
387
+ , violations {
388
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
389
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
390
+ quota_dimensions {
391
+ key: "model"
392
+ value: "gemini-2.5-pro"
393
+ }
394
+ quota_dimensions {
395
+ key: "location"
396
+ value: "global"
397
+ }
398
+ }
399
+ violations {
400
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
401
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
402
+ quota_dimensions {
403
+ key: "model"
404
+ value: "gemini-2.5-pro"
405
+ }
406
+ quota_dimensions {
407
+ key: "location"
408
+ value: "global"
409
+ }
410
+ }
411
+ violations {
412
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
413
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
414
+ quota_dimensions {
415
+ key: "model"
416
+ value: "gemini-2.5-pro"
417
+ }
418
+ quota_dimensions {
419
+ key: "location"
420
+ value: "global"
421
+ }
422
+ }
423
+ violations {
424
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
425
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
426
+ quota_dimensions {
427
+ key: "model"
428
+ value: "gemini-2.5-pro"
429
+ }
430
+ quota_dimensions {
431
+ key: "location"
432
+ value: "global"
433
+ }
434
+ }
435
+ , retry_delay {
436
+ seconds: 3
437
+ }
438
+ ]
439
+ 2026-04-24 15:16:57.445 | INFO | models.model_loader:load_ocr_engine:130 - Loading EasyOCR reader (langs: ['en', 'hi'])
440
+ 2026-04-24 15:17:27.399 | INFO | models.model_loader:load_ocr_engine:136 - EasyOCR loaded
441
+ 2026-04-24 15:17:27.870 | INFO | services.screenshot_service:run_ocr:48 - OCR extracted 0 text regions
442
+ 2026-04-24 15:17:27.881 | INFO | api.v1.analyze:analyze_screenshot_endpoint:726 - Saved AnalysisRecord id=25 screenshot score=50 verdict=Possibly Manipulated
443
+ 2026-04-24 15:17:28.066 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
444
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
445
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
446
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
447
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
448
+ Please retry in 32.593323033s. [links {
449
+ description: "Learn more about Gemini API quotas"
450
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
451
+ }
452
+ , violations {
453
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
454
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
455
+ quota_dimensions {
456
+ key: "model"
457
+ value: "gemini-2.5-pro"
458
+ }
459
+ quota_dimensions {
460
+ key: "location"
461
+ value: "global"
462
+ }
463
+ }
464
+ violations {
465
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
466
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
467
+ quota_dimensions {
468
+ key: "model"
469
+ value: "gemini-2.5-pro"
470
+ }
471
+ quota_dimensions {
472
+ key: "location"
473
+ value: "global"
474
+ }
475
+ }
476
+ violations {
477
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
478
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
479
+ quota_dimensions {
480
+ key: "model"
481
+ value: "gemini-2.5-pro"
482
+ }
483
+ quota_dimensions {
484
+ key: "location"
485
+ value: "global"
486
+ }
487
+ }
488
+ violations {
489
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
490
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
491
+ quota_dimensions {
492
+ key: "model"
493
+ value: "gemini-2.5-pro"
494
+ }
495
+ quota_dimensions {
496
+ key: "location"
497
+ value: "global"
498
+ }
499
+ }
500
+ , retry_delay {
501
+ seconds: 32
502
+ }
503
+ ]
504
+ 2026-04-24 15:17:54.819 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
505
+ 2026-04-24 15:18:00.795 | INFO | models.model_loader:load_text_model:65 - Text model loaded
506
+ 2026-04-24 15:18:00.888 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
507
+ 2026-04-24 15:18:00.889 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
508
+ 2026-04-24 15:18:00.891 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
509
+ 2026-04-24 15:18:01.659 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
510
+ 2026-04-24 15:18:02.878 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=26 text score=15 verdict=Very Likely Fake
511
+ 2026-04-24 15:18:03.994 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
512
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
513
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
514
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
515
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
516
+ Please retry in 56.638939454s. [links {
517
+ description: "Learn more about Gemini API quotas"
518
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
519
+ }
520
+ , violations {
521
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
522
+ quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
523
+ quota_dimensions {
524
+ key: "model"
525
+ value: "gemini-2.5-pro"
526
+ }
527
+ quota_dimensions {
528
+ key: "location"
529
+ value: "global"
530
+ }
531
+ }
532
+ violations {
533
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
534
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
535
+ quota_dimensions {
536
+ key: "model"
537
+ value: "gemini-2.5-pro"
538
+ }
539
+ quota_dimensions {
540
+ key: "location"
541
+ value: "global"
542
+ }
543
+ }
544
+ violations {
545
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
546
+ quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
547
+ quota_dimensions {
548
+ key: "model"
549
+ value: "gemini-2.5-pro"
550
+ }
551
+ quota_dimensions {
552
+ key: "location"
553
+ value: "global"
554
+ }
555
+ }
556
+ violations {
557
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
558
+ quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
559
+ quota_dimensions {
560
+ key: "model"
561
+ value: "gemini-2.5-pro"
562
+ }
563
+ quota_dimensions {
564
+ key: "location"
565
+ value: "global"
566
+ }
567
+ }
568
+ , retry_delay {
569
+ seconds: 56
570
+ }
571
+ ]
572
+ 2026-04-24 15:20:38.285 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
573
+ 2026-04-24 15:20:43.929 | INFO | models.model_loader:load_text_model:65 - Text model loaded
574
+ 2026-04-24 15:20:44.034 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
575
+ 2026-04-24 15:20:44.035 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
576
+ 2026-04-24 15:20:44.037 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
577
+ 2026-04-24 15:20:44.806 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
578
+ 2026-04-24 15:20:46.001 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=27 text score=15 verdict=Very Likely Fake
579
+ 2026-04-24 15:20:56.376 | INFO | services.llm_explainer:generate_llm_summary:175 - LLM summary generated via gemini/gemini-2.5-flash
580
+ 2026-04-24 15:33:56.592 | INFO | api.v1.auth:register:33 - Registered user id=3 email=***@example.com
581
+ 2026-04-24 15:33:57.227 | INFO | api.v1.auth:login:42 - Login user id=3 email=***@example.com
582
+ 2026-04-24 15:33:57.553 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
583
+ 2026-04-24 15:34:06.986 | INFO | models.model_loader:load_text_model:65 - Text model loaded
584
+ 2026-04-24 15:34:07.731 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.997 fake_p=0.997
585
+ 2026-04-24 15:34:07.733 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
586
+ 2026-04-24 15:34:07.736 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
587
+ 2026-04-24 15:34:09.017 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
588
+ 2026-04-24 15:34:10.285 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=28 text score=30 verdict=Likely Fake
589
+ 2026-04-24 15:34:41.718 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
590
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
591
+ Please retry in 19.188761533s. [links {
592
+ description: "Learn more about Gemini API quotas"
593
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
594
+ }
595
+ , violations {
596
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
597
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
598
+ quota_dimensions {
599
+ key: "model"
600
+ value: "gemini-2.5-flash"
601
+ }
602
+ quota_dimensions {
603
+ key: "location"
604
+ value: "global"
605
+ }
606
+ quota_value: 5
607
+ }
608
+ , retry_delay {
609
+ seconds: 19
610
+ }
611
+ ]
612
+ 2026-04-24 15:34:41.788 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.997 fake_p=0.997
613
+ 2026-04-24 15:34:41.788 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 76 (High) excl=3 caps=2 cb=1 emo=3
614
+ 2026-04-24 15:34:41.789 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
615
+ 2026-04-24 15:34:41.791 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
616
+ 2026-04-24 15:34:43.147 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=29 text score=15 verdict=Very Likely Fake
617
+ 2026-04-24 15:34:43.555 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
618
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
619
+ Please retry in 17.333464233s. [links {
620
+ description: "Learn more about Gemini API quotas"
621
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
622
+ }
623
+ , violations {
624
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
625
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
626
+ quota_dimensions {
627
+ key: "model"
628
+ value: "gemini-2.5-flash"
629
+ }
630
+ quota_dimensions {
631
+ key: "location"
632
+ value: "global"
633
+ }
634
+ quota_value: 5
635
+ }
636
+ , retry_delay {
637
+ seconds: 17
638
+ }
639
+ ]
640
+ 2026-04-24 15:34:43.615 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.996 fake_p=0.996
641
+ 2026-04-24 15:34:43.616 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
642
+ 2026-04-24 15:34:43.616 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
643
+ 2026-04-24 15:34:43.618 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
644
+ 2026-04-24 15:34:44.924 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=30 text score=30 verdict=Likely Fake
645
+ 2026-04-24 15:34:45.353 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
646
+ * Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
647
+ Please retry in 15.553103918s. [links {
648
+ description: "Learn more about Gemini API quotas"
649
+ url: "https://ai.google.dev/gemini-api/docs/rate-limits"
650
+ }
651
+ , violations {
652
+ quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
653
+ quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
654
+ quota_dimensions {
655
+ key: "model"
656
+ value: "gemini-2.5-flash"
657
+ }
658
+ quota_dimensions {
659
+ key: "location"
660
+ value: "global"
661
+ }
662
+ quota_value: 5
663
+ }
664
+ , retry_delay {
665
+ seconds: 15
666
+ }
667
+ ]
668
+ 2026-04-24 15:43:27.438 | INFO | api.v1.auth:register:33 - Registered user id=4 email=***@example.com
669
+ 2026-04-24 15:43:27.463 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
670
+ 2026-04-24 15:43:33.684 | INFO | models.model_loader:load_text_model:65 - Text model loaded
671
+ 2026-04-24 15:43:33.796 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
672
+ 2026-04-24 15:43:33.797 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
673
+ 2026-04-24 15:43:33.799 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
674
+ 2026-04-24 15:43:35.106 | INFO | models.model_loader:load_spacy_nlp:96 - spaCy en_core_web_sm loaded
675
+ 2026-04-24 15:43:35.120 | INFO | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
676
+ 2026-04-24 15:43:36.284 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=31 text score=31 verdict=Likely Fake
677
+ 2026-04-24 15:43:36.352 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
678
+ 2026-04-24 15:43:36.352 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
679
+ 2026-04-24 15:43:36.353 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
680
+ 2026-04-24 15:43:36.370 | INFO | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
681
+ 2026-04-24 15:43:37.567 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=32 text score=31 verdict=Likely Fake
682
+ 2026-04-24 15:43:47.549 | INFO | services.llm_explainer:generate_llm_summary:207 - LLM summary generated via gemini/gemini-2.5-flash
683
+ 2026-04-24 15:43:47.614 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
684
+ 2026-04-24 15:43:47.614 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
685
+ 2026-04-24 15:43:47.615 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
686
+ 2026-04-24 15:43:47.630 | INFO | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
687
+ 2026-04-24 15:43:49.134 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=33 text score=31 verdict=Likely Fake
688
+ 2026-04-24 15:44:11.346 | WARNING | services.llm_explainer:mark_rate_limited:42 - LLM rate-limited — pausing all LLM calls for 300s
689
+ 2026-04-24 15:44:11.346 | WARNING | services.llm_explainer:generate_llm_summary:220 - LLM quota hit (ResourceExhausted) — circuit open for 300s
690
+ 2026-04-24 15:44:11.352 | WARNING | services.llm_explainer:mark_rate_limited:42 - LLM rate-limited — pausing all LLM calls for 5s
691
+ 2026-04-24 15:44:11.404 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
692
+ 2026-04-24 15:44:11.404 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
693
+ 2026-04-24 15:44:11.405 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
694
+ 2026-04-24 15:44:12.724 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=34 text score=30 verdict=Likely Fake
695
+ 2026-04-24 15:57:39.916 | INFO | api.v1.auth:register:33 - Registered user id=5 email=***@example.com
696
+ 2026-04-24 15:57:39.958 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
697
+ 2026-04-24 15:57:46.475 | INFO | models.model_loader:load_text_model:65 - Text model loaded
698
+ 2026-04-24 15:57:46.582 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
699
+ 2026-04-24 15:57:46.584 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
700
+ 2026-04-24 15:57:46.586 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
701
+ 2026-04-24 15:57:47.954 | INFO | models.model_loader:load_spacy_nlp:96 - spaCy en_core_web_sm loaded
702
+ 2026-04-24 15:57:49.166 | INFO | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=35 text score=30 verdict=Likely Fake
703
+ 2026-04-24 15:57:58.130 | INFO | services.llm_explainer:generate_llm_summary:271 - LLM summary generated via gemini/gemini-2.5-flash
704
+ 2026-04-24 15:57:58.196 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
705
+ 2026-04-24 15:57:58.197 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
706
+ 2026-04-24 15:57:58.197 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
707
+ 2026-04-24 15:57:59.705 | INFO | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=36 text score=30 verdict=Likely Fake
708
+ 2026-04-24 15:58:02.948 | ERROR | services.llm_explainer:generate_llm_summary:287 - LLM explainer failed: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'This model is currently experiencing high demand. Spikes in demand are usually temporary. Please try again later.', 'status': 'UNAVAILABLE'}}
709
+ 2026-04-24 15:58:03.008 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
710
+ 2026-04-24 15:58:03.008 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
711
+ 2026-04-24 15:58:03.009 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
712
+ 2026-04-24 15:58:04.488 | INFO | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=37 text score=30 verdict=Likely Fake
713
+ 2026-04-24 15:59:52.694 | INFO | services.llm_explainer:_get_provider:176 - LLM chain initialized: gemini/gemini-2.5-flash → groq/llama-3.3-70b-versatile
714
+ 2026-04-24 15:59:52.695 | INFO | services.llm_explainer:generate:161 - gemini/gemini-2.5-flash quota hit — failing over to groq/llama-3.3-70b-versatile
715
+ 2026-04-24 23:15:36.409 | INFO | main:lifespan:108 - Starting DeepShield backend
716
+ 2026-04-24 23:15:36.470 | INFO | main:lifespan:110 - Database initialized
717
+ 2026-04-24 23:15:36.470 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
718
+ 2026-04-24 23:15:46.404 | INFO | models.model_loader:load_image_model:51 - Image model loaded
719
+ 2026-04-24 23:15:57.188 | INFO | api.v1.analyze:analyze_image:118 - cache hit image sha=6de55b9fc5bd record=19
720
+ 2026-04-24 23:16:59.860 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
721
+ 2026-04-24 23:17:03.920 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
722
+ 2026-04-24 23:17:04.519 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.868 ffpp=n/a eff=0.03269108012318611 → 0.450
723
+ 2026-04-24 23:17:04.569 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
724
+ 2026-04-24 23:17:13.315 | INFO | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
725
+ 2026-04-24 23:17:16.988 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
726
+ 2026-04-24 23:17:17.131 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (800x450)
727
+ 2026-04-24 23:17:18.394 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
728
+ 2026-04-24 23:17:18.714 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
729
+ 2026-04-24 23:17:18.757 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=38 score=45 verdict=Possibly Manipulated
730
+ 2026-04-24 23:29:04.622 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
731
+ 2026-04-24 23:29:05.312 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Fake | vit=0.767 ffpp=n/a eff=0.36121347546577454 → 0.564
732
+ 2026-04-24 23:29:06.604 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
733
+ 2026-04-24 23:29:10.091 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (2393x4096)
734
+ 2026-04-24 23:29:11.326 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
735
+ 2026-04-24 23:29:11.344 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
736
+ 2026-04-24 23:29:11.436 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=39 score=44 verdict=Possibly Manipulated
737
+ 2026-04-24 23:30:58.303 | ERROR | api.v1.report:generate:51 - Report generation failed: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
738
+ Traceback (most recent call last):
739
+
740
+ File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\threading.py", line 1002, in _bootstrap
741
+ self._bootstrap_inner()
742
+ │ └ <function Thread._bootstrap_inner at 0x000001A73BF11A80>
743
+ └ <WorkerThread(AnyIO worker thread, started 18584)>
744
+ File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\threading.py", line 1045, in _bootstrap_inner
745
+ self.run()
746
+ │ └ <function WorkerThread.run at 0x000001A7030349A0>
747
+ └ <WorkerThread(AnyIO worker thread, started 18584)>
748
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\anyio\_backends\_asyncio.py", line 1002, in run
749
+ result = context.run(func, *args)
750
+ │ │ │ └ ()
751
+ │ │ └ functools.partial(<function generate at 0x000001A7011BA0C0>, db=<sqlalchemy.orm.session.Session object at 0x000001A70D16E390>...
752
+ │ └ <method 'run' of '_contextvars.Context' objects>
753
+ └ <_contextvars.Context object at 0x000001A70D16CD40>
754
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\slowapi\extension.py", line 766, in sync_wrapper
755
+ response = func(*args, **kwargs)
756
+ │ │ └ {'db': <sqlalchemy.orm.session.Session object at 0x000001A70D16E390>, 'user': None, 'analysis_id': 39, 'request': <starlette....
757
+ │ └ ()
758
+ └ <function generate at 0x000001A7011BA160>
759
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\slowapi\extension.py", line 766, in sync_wrapper
760
+ response = func(*args, **kwargs)
761
+ │ │ └ {'db': <sqlalchemy.orm.session.Session object at 0x000001A70D16E390>, 'user': None, 'analysis_id': 39, 'request': <starlette....
762
+ │ └ ()
763
+ └ <function generate at 0x000001A7011BA020>
764
+
765
+ > File "C:\Users\athar\Desktop\minor2\backend\api\v1\report.py", line 49, in generate
766
+ path = generate_report(record)
767
+ │ └ <db.models.AnalysisRecord object at 0x000001A70D17A2D0>
768
+ └ <function generate_report at 0x000001A7011B9D00>
769
+
770
+ File "C:\Users\athar\Desktop\minor2\backend\services\report_service.py", line 119, in generate_report
771
+ html_to_pdf(html, out_path)
772
+ │ │ └ WindowsPath('temp_reports/deepshield_39_c2b71295.pdf')
773
+ │ └ '<!DOCTYPE html>\n<html>\n<head>\n <meta charset="utf-8" />\n <title>DeepShield Analysis Report — c9f44067-528d-4e96-9365-2...
774
+ └ <function html_to_pdf at 0x000001A7011B9C60>
775
+
776
+ File "C:\Users\athar\Desktop\minor2\backend\services\report_service.py", line 107, in html_to_pdf
777
+ result = pisa.CreatePDF(html, dest=f)
778
+ │ │ │ └ <_io.BufferedWriter name='temp_reports\\deepshield_39_c2b71295.pdf'>
779
+ │ │ └ '<!DOCTYPE html>\n<html>\n<head>\n <meta charset="utf-8" />\n <title>DeepShield Analysis Report — c9f44067-528d-4e96-9365-2...
780
+ │ └ <function pisaDocument at 0x000001A7011B9440>
781
+ └ <module 'xhtml2pdf.pisa' from 'C:\\Users\\athar\\Desktop\\minor2\\backend\\.venv\\Lib\\site-packages\\xhtml2pdf\\pisa.py'>
782
+
783
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\document.py", line 196, in pisaDocument
784
+ doc.build(context.story)
785
+ │ │ │ └ [PmlParagraph(
786
+ │ │ │ 'dir'
787
+ │ │ │ 'dir'
788
+ │ │ │ 'caseSensitive'
789
+ │ │ │ 'caseSensitive'
790
+ │ │ │ 'encoding'
791
+ │ │ │ 'encoding'
792
+ │ │ │ 'text'
793
+ │ │ │ 'text...
794
+ │ │ └ <xhtml2pdf.context.pisaContext object at 0x000001A703A22990>
795
+ │ └ <function BaseDocTemplate.build at 0x000001A77EFA8E00>
796
+ └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
797
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\doctemplate.py", line 1083, in build
798
+ self.handle_flowable(flowables)
799
+ │ │ └ [PmlParagraph(
800
+ │ │ 'dir'
801
+ │ │ 'dir'
802
+ │ │ 'caseSensitive'
803
+ │ │ 'caseSensitive'
804
+ │ │ 'encoding'
805
+ │ │ 'encoding'
806
+ │ │ 'text'
807
+ │ │ 'text...
808
+ │ └ <function BaseDocTemplate.handle_flowable at 0x000001A77EFA8B80>
809
+ └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
810
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\doctemplate.py", line 932, in handle_flowable
811
+ if frame.add(f, canv, trySplit=self.allowSplitting):
812
+ │ │ │ │ │ └ 1
813
+ │ │ │ │ └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
814
+ │ │ │ └ <reportlab.pdfgen.canvas.Canvas object at 0x000001A70D1DED50>
815
+ │ │ └ PmlTable(
816
+ │ │ rowHeights=[None],
817
+ │ │ colWidths=[4.93228346456693, 488.29606299212605],
818
+ │ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
819
+ │ └ <function Frame._add at 0x000001A77EECDF80>
820
+ └ <reportlab.platypus.frames.Frame object at 0x000001A70344D6D0>
821
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\frames.py", line 158, in _add
822
+ w, h = flowable.wrap(aW, h)
823
+ │ │ │ └ 751.1811023622049
824
+ │ │ └ 493.228346456693
825
+ │ └ <function PmlTable.wrap at 0x000001A7011719E0>
826
+ └ PmlTable(
827
+ rowHeights=[None],
828
+ colWidths=[4.93228346456693, 488.29606299212605],
829
+ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
830
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\xhtml2pdf_reportlab.py", line 858, in wrap
831
+ return Table.wrap(self, availWidth, availHeight)
832
+ │ │ │ │ └ 751.1811023622049
833
+ │ │ │ └ 493.228346456693
834
+ │ │ └ PmlTable(
835
+ │ │ rowHeights=[None],
836
+ │ │ colWidths=[4.93228346456693, 488.29606299212605],
837
+ │ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
838
+ │ └ <function Table.wrap at 0x000001A77EFAC400>
839
+ └ <class 'reportlab.platypus.tables.Table'>
840
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 1354, in wrap
841
+ self._calc(availWidth, availHeight)
842
+ │ │ │ └ 751.1811023622049
843
+ │ │ └ 493.228346456693
844
+ │ └ <function Table._calc at 0x000001A77EFAB600>
845
+ └ PmlTable(
846
+ rowHeights=[None],
847
+ colWidths=[4.93228346456693, 488.29606299212605],
848
+ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
849
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 740, in _calc
850
+ self._calc_height(availHeight,availWidth,W=W)
851
+ │ │ │ │ └ None
852
+ │ │ │ └ 493.228346456693
853
+ │ │ └ 751.1811023622049
854
+ │ └ <function Table._calc_height at 0x000001A77EFAB560>
855
+ └ PmlTable(
856
+ rowHeights=[None],
857
+ colWidths=[4.93228346456693, 488.29606299212605],
858
+ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
859
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 664, in _calc_height
860
+ dW,t = self._listCellGeom(v,w or self._listValueWidth(v),s)
861
+ │ │ │ │ │ │ │ └ <CellStyle '(0, 0)'>
862
+ │ │ │ │ │ │ └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
863
+ │ │ │ │ │ └ <function Table._listValueWidth at 0x000001A77EFAB380>
864
+ │ │ │ │ └ PmlTable(
865
+ │ │ │ │ rowHeights=[None],
866
+ │ │ │ │ colWidths=[4.93228346456693, 488.29606299212605],
867
+ │ │ │ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
868
+ │ │ │ └ 4.93228346456693
869
+ │ │ └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
870
+ │ └ <function PmlTable._listCellGeom at 0x000001A701171940>
871
+ └ PmlTable(
872
+ rowHeights=[None],
873
+ colWidths=[4.93228346456693, 488.29606299212605],
874
+ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
875
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\xhtml2pdf_reportlab.py", line 810, in _listCellGeom
876
+ return Table._listCellGeom(self, V, w, s, W=W, H=H, aH=aH)
877
+ │ │ │ │ │ │ │ │ └ 751.1811023622049
878
+ │ │ │ │ │ │ │ └ None
879
+ │ │ │ │ │ │ └ None
880
+ │ │ │ │ │ └ <CellStyle '(0, 0)'>
881
+ │ │ │ │ └ 4.93228346456693
882
+ │ │ │ └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
883
+ │ │ └ PmlTable(
884
+ │ │ rowHeights=[None],
885
+ │ │ colWidths=[4.93228346456693, 488.29606299212605],
886
+ │ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
887
+ │ └ <function Table._listCellGeom at 0x000001A77EFAB2E0>
888
+ └ <class 'reportlab.platypus.tables.Table'>
889
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 490, in _listCellGeom
890
+ raise ValueError(f'{self.identity()}: flowable given negative availWidth={aW} == width={w} - leftPadding={s.leftPadding} - rightPadding={s.rightPadding}')
891
+ File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 440, in identity
892
+ tallest = '(tallest row %d)' % int(max(rh))
893
+ └ [None]
894
+
895
+ TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
896
+ 2026-04-24 23:44:20.465 | INFO | api.v1.auth:register:33 - Registered user id=6 email=***@gmail.com
897
+ 2026-04-24 23:45:54.152 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
898
+ 2026-04-24 23:45:54.595 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.668 ffpp=n/a eff=0.00913542602211237 → 0.339
899
+ 2026-04-24 23:45:55.772 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
900
+ 2026-04-24 23:45:58.926 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (2268x4032)
901
+ 2026-04-24 23:46:00.276 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 2 regions
902
+ 2026-04-24 23:46:00.291 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=Google, model=Pixel 7 Pro, adjustment=-20 (valid camera metadata (Make/Model/DateTime); GPS coordinates present)
903
+ 2026-04-24 23:46:00.379 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=40 score=14 verdict=Very Likely Fake
904
+ 2026-04-24 23:46:00.382 | ERROR | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
905
+ 2026-04-24 23:46:00.386 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
906
+ 2026-04-24 23:47:37.291 | INFO | services.report_service:generate_report:120 - Report generated id=40 path=temp_reports\deepshield_40_3f0f8ff7.pdf size=14978B
907
+ 2026-04-24 23:50:59.570 | INFO | api.v1.auth:login:42 - Login user id=6 email=***@gmail.com
908
+ 2026-04-25 02:48:29.295 | INFO | services.report_service:cleanup_expired:149 - Cleaned up 2 expired reports
909
+ 2026-04-25 02:48:29.419 | WARNING | services.report_service:cleanup_expired:149 - Cleanup failed for temp_reports\deepshield_40_3f0f8ff7.pdf: [WinError 2] The system cannot find the file specified: 'temp_reports\\deepshield_40_3f0f8ff7.pdf'
910
+ 2026-04-25 21:48:15.075 | INFO | main:lifespan:108 - Starting DeepShield backend
911
+ 2026-04-25 21:48:15.082 | INFO | main:lifespan:110 - Database initialized
912
+ 2026-04-25 21:48:15.082 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
913
+ 2026-04-25 21:48:18.709 | INFO | models.model_loader:load_image_model:51 - Image model loaded
914
+ 2026-04-25 21:48:18.712 | INFO | main:lifespan:118 - Shutting down DeepShield backend
915
+ 2026-04-25 21:52:02.663 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
916
+ 2026-04-25 21:52:03.239 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.870 ffpp=n/a eff=0.0529196597635746 → 0.462
917
+ 2026-04-25 21:52:04.390 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
918
+ 2026-04-25 21:52:04.682 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (1223x640)
919
+ 2026-04-25 21:52:05.863 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
920
+ 2026-04-25 21:52:05.883 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
921
+ 2026-04-25 21:52:05.927 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=41 score=46 verdict=Possibly Manipulated
922
+ 2026-04-25 22:02:22.021 | INFO | main:lifespan:108 - Starting DeepShield backend
923
+ 2026-04-25 22:02:22.057 | INFO | main:lifespan:110 - Database initialized
924
+ 2026-04-25 22:02:22.057 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
925
+ 2026-04-25 22:02:30.014 | INFO | models.model_loader:load_image_model:51 - Image model loaded
926
+ 2026-04-25 22:13:05.431 | INFO | api.v1.auth:login:42 - Login user id=6 email=***@gmail.com
927
+ 2026-04-25 22:13:28.224 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
928
+ 2026-04-25 22:13:28.471 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.694 ffpp=n/a eff=n/a → 0.694
929
+ 2026-04-25 22:13:28.859 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
930
+ 2026-04-25 22:13:31.674 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (2268x4032)
931
+ 2026-04-25 22:13:33.044 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 2 regions
932
+ 2026-04-25 22:13:33.062 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=Apple, model=iPhone 16 Pro, adjustment=-20 (valid camera metadata (Make/Model/DateTime); GPS coordinates present)
933
+ 2026-04-25 22:13:33.166 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=42 score=11 verdict=Very Likely Fake
934
+ 2026-04-25 22:13:33.169 | ERROR | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
935
+ 2026-04-25 22:13:33.171 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
936
+ 2026-04-26 22:05:50.626 | INFO | main:lifespan:108 - Starting DeepShield backend
937
+ 2026-04-26 22:05:50.640 | INFO | main:lifespan:110 - Database initialized
938
+ 2026-04-26 22:05:50.641 | INFO | models.model_loader:load_image_model:44 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
939
+ 2026-04-26 22:05:58.170 | INFO | models.model_loader:load_image_model:52 - Image model loaded
940
+ 2026-04-26 22:07:47.526 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
941
+ 2026-04-26 22:07:48.484 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.834 ffpp=n/a eff=0.02755815163254738 → 0.431
942
+ 2026-04-26 22:07:50.164 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
943
+ 2026-04-26 22:07:50.584 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (1290x1290)
944
+ 2026-04-26 22:07:52.661 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 1 regions
945
+ 2026-04-26 22:07:52.670 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
946
+ 2026-04-26 22:07:52.747 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=43 score=43 verdict=Possibly Manipulated
947
+ 2026-04-26 22:07:52.752 | ERROR | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
948
+ 2026-04-26 22:07:52.756 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
949
+ 2026-04-26 22:09:45.469 | INFO | services.report_service:generate_report:120 - Report generated id=43 path=temp_reports\deepshield_43_262befa5.pdf size=15602B
main.py CHANGED
@@ -1,17 +1,98 @@
1
  import asyncio
 
 
2
  from contextlib import asynccontextmanager
3
 
4
  from fastapi import FastAPI
5
  from fastapi.middleware.cors import CORSMiddleware
 
6
  from loguru import logger
 
 
 
 
 
7
 
8
  from api.router import api_router
9
  from config import settings
10
  from db.database import init_db
11
  from models.model_loader import get_model_loader
 
12
  from services.report_service import cleanup_expired
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  async def _report_cleanup_loop():
16
  while True:
17
  try:
@@ -23,6 +104,7 @@ async def _report_cleanup_loop():
23
 
24
  @asynccontextmanager
25
  async def lifespan(app: FastAPI):
 
26
  logger.info("Starting DeepShield backend")
27
  init_db()
28
  logger.info("Database initialized")
@@ -43,16 +125,32 @@ app = FastAPI(
43
  lifespan=lifespan,
44
  )
45
 
 
 
 
 
 
 
 
 
 
 
46
  app.add_middleware(
47
  CORSMiddleware,
48
  allow_origins=settings.CORS_ORIGINS,
49
  allow_credentials=True,
50
- allow_methods=["*"],
51
- allow_headers=["*"],
52
  )
53
 
54
  app.include_router(api_router)
55
 
 
 
 
 
 
 
56
 
57
  @app.get("/")
58
  def root():
 
1
  import asyncio
2
+ import secrets
3
+ import sys
4
  from contextlib import asynccontextmanager
5
 
6
  from fastapi import FastAPI
7
  from fastapi.middleware.cors import CORSMiddleware
8
+ from fastapi.staticfiles import StaticFiles
9
  from loguru import logger
10
+ from slowapi import _rate_limit_exceeded_handler
11
+ from slowapi.errors import RateLimitExceeded
12
+
13
+ from starlette.middleware.base import BaseHTTPMiddleware
14
+ from starlette.responses import JSONResponse
15
 
16
  from api.router import api_router
17
  from config import settings
18
  from db.database import init_db
19
  from models.model_loader import get_model_loader
20
+ from services.rate_limit import RateLimitContextMiddleware, limiter
21
  from services.report_service import cleanup_expired
22
 
23
 
24
+ class ContentLengthLimitMiddleware(BaseHTTPMiddleware):
25
+ """Reject oversized uploads via Content-Length header before reading body.
26
+ Saves bandwidth + memory vs letting read_upload_bytes reject post-read."""
27
+
28
+ def __init__(self, app, max_bytes: int) -> None:
29
+ super().__init__(app)
30
+ self._max = max_bytes
31
+
32
+ async def dispatch(self, request, call_next):
33
+ cl = request.headers.get("content-length")
34
+ if cl and cl.isdigit() and int(cl) > self._max:
35
+ return JSONResponse(
36
+ status_code=413,
37
+ content={"detail": f"Upload exceeds {self._max // (1024 * 1024)} MB limit"},
38
+ )
39
+ return await call_next(request)
40
+
41
+
42
+ # === Phase 15.3 — JWT / CORS / logging hardening ===
43
+
44
+ _DEFAULT_JWT_SECRET = "change-me-in-production"
45
+
46
+
47
+ def _enforce_production_hardening() -> None:
48
+ """Refuse to start in production with unsafe defaults (Phase 15.3)."""
49
+ if settings.JWT_SECRET_KEY == _DEFAULT_JWT_SECRET or not settings.JWT_SECRET_KEY:
50
+ example = secrets.token_urlsafe(48)
51
+ if settings.DEBUG:
52
+ logger.warning(
53
+ "JWT_SECRET_KEY is unset or default — safe in dev only. "
54
+ f"Set it before deploying. Example: {example}"
55
+ )
56
+ else:
57
+ logger.error(
58
+ "Refusing to start: JWT_SECRET_KEY is unset or default. "
59
+ f"Set JWT_SECRET_KEY in your environment. Example: {example}"
60
+ )
61
+ sys.exit(1)
62
+ if "*" in settings.CORS_ORIGINS and not settings.DEBUG:
63
+ logger.error(
64
+ "Refusing to start: CORS_ORIGINS contains '*' while allow_credentials=True. "
65
+ "Set an explicit origin list."
66
+ )
67
+ sys.exit(1)
68
+
69
+
70
+ def _configure_logging() -> None:
71
+ """Rotate + retain logs, scrub emails."""
72
+ import re
73
+
74
+ email_re = re.compile(r"([A-Za-z0-9._%+-]+)@([A-Za-z0-9.-]+\.[A-Za-z]{2,})")
75
+
76
+ def _scrub(record):
77
+ msg = record["message"]
78
+ record["message"] = email_re.sub(r"***@\2", msg)
79
+ return True
80
+
81
+ logger.remove()
82
+ logger.add(sys.stderr, filter=_scrub, level="INFO")
83
+ logger.add(
84
+ "logs/deepshield.log",
85
+ rotation="10 MB",
86
+ retention="7 days",
87
+ filter=_scrub,
88
+ level="INFO",
89
+ enqueue=True,
90
+ )
91
+
92
+
93
+ _configure_logging()
94
+
95
+
96
  async def _report_cleanup_loop():
97
  while True:
98
  try:
 
104
 
105
  @asynccontextmanager
106
  async def lifespan(app: FastAPI):
107
+ _enforce_production_hardening()
108
  logger.info("Starting DeepShield backend")
109
  init_db()
110
  logger.info("Database initialized")
 
125
  lifespan=lifespan,
126
  )
127
 
128
+ # Phase 15.2 — slowapi rate limiter
129
+ app.state.limiter = limiter
130
+
131
+
132
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
133
+ app.add_middleware(RateLimitContextMiddleware)
134
+ # Phase 15.3 — reject oversized uploads before reading body
135
+ app.add_middleware(ContentLengthLimitMiddleware, max_bytes=settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024)
136
+
137
+ # Phase 15.3 — explicit CORS methods/headers (no wildcards with credentials)
138
  app.add_middleware(
139
  CORSMiddleware,
140
  allow_origins=settings.CORS_ORIGINS,
141
  allow_credentials=True,
142
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
143
+ allow_headers=["Authorization", "Content-Type", "Accept", "Origin", "X-Requested-With"],
144
  )
145
 
146
  app.include_router(api_router)
147
 
148
+ # Phase 19.2 — serve stored thumbnails / media under /media/*
149
+ import os as _os
150
+ _media_root = _os.environ.get("MEDIA_ROOT", "./media")
151
+ _os.makedirs(_os.path.join(_media_root, "thumbs"), exist_ok=True)
152
+ app.mount("/media", StaticFiles(directory=_media_root), name="media")
153
+
154
 
155
  @app.get("/")
156
  def root():
media/03/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43.webp ADDED
models/icpr2020dfdc/blazeface/blazeface.pth → media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg RENAMED
File without changes
media/50/502e5d7120817956b7ed208987ecad441ef95a527ae8f975340f46669330a27c.jpg ADDED
models/icpr2020dfdc/blazeface/anchors.npy → media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg RENAMED
File without changes
media/6d/6de55b9fc5bdc37898418b7c25d29080f32053a1825e3a7dc2a2ff9df1292015.jpg ADDED
media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg ADDED

Git LFS Details

  • SHA256: 7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd
  • Pointer size: 132 Bytes
  • Size of remote file: 4.01 MB
media/bf/bf7ec0c425d20a2161b6a55356a869aad486cf7c6a196420b75be117bf8a47cb.webp ADDED
media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg ADDED

Git LFS Details

  • SHA256: c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028
  • Pointer size: 132 Bytes
  • Size of remote file: 3.13 MB
media/f0/f0eec5199108c2a4476f9b44aa5454ee0506949b5480b11a6578f2bbcb1f954f.jpg ADDED
media/f1/f1c22499ba7787be66a12c32ab2991df97fc4d25c88560207367214e75d7463c.jpg ADDED
media/thumbs/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43_400.jpg ADDED
media/thumbs/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06_400.jpg ADDED