Spaces:
Running
Running
Sync from GitHub via hub-sync
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .env.example +44 -0
- .gitattributes +4 -0
- Colab_ViT_Training.ipynb +0 -233
- README.md +0 -13
- analyze.py +0 -177
- api/router.py +1 -0
- api/v1/health.py +72 -1
- api/v1/history.py +7 -1
- api/v1/report.py +41 -6
- artifact_detector.py +0 -229
- auth.py +0 -30
- auth_service.py +0 -67
- common.py +0 -88
- config.py +34 -1
- database.py +0 -30
- datasets/__init__.py +0 -0
- datasets/build_manifest.py +0 -93
- datasets/download_dfdc_sample.py +0 -44
- datasets/download_ffhq.py +0 -49
- datasets/extract_frames.py +0 -90
- datasets/procure_all.ps1 +0 -40
- datasets/procure_all.sh +0 -37
- db/database.py +41 -17
- db/models.py +21 -7
- deepshield.db-shm +0 -0
- deepshield.db-wal +0 -0
- deepshield_13_5bcf1328.pdf +0 -148
- deps.py +0 -46
- download_ffpp.py +0 -261
- ela_service.py +0 -88
- exif_service.py +0 -129
- file_handler.py +0 -96
- generate_colab_nb.py +0 -213
- heatmap_generator.py +0 -164
- image_service.py +0 -58
- llm_explainer.py +0 -191
- logs/deepshield.log +949 -0
- main.py +100 -2
- media/03/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43.webp +0 -0
- models/icpr2020dfdc/blazeface/blazeface.pth → media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg +2 -2
- media/50/502e5d7120817956b7ed208987ecad441ef95a527ae8f975340f46669330a27c.jpg +0 -0
- models/icpr2020dfdc/blazeface/anchors.npy → media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg +2 -2
- media/6d/6de55b9fc5bdc37898418b7c25d29080f32053a1825e3a7dc2a2ff9df1292015.jpg +0 -0
- media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg +3 -0
- media/bf/bf7ec0c425d20a2161b6a55356a869aad486cf7c6a196420b75be117bf8a47cb.webp +0 -0
- media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg +3 -0
- media/f0/f0eec5199108c2a4476f9b44aa5454ee0506949b5480b11a6578f2bbcb1f954f.jpg +0 -0
- media/f1/f1c22499ba7787be66a12c32ab2991df97fc4d25c88560207367214e75d7463c.jpg +0 -0
- media/thumbs/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43_400.jpg +0 -0
- media/thumbs/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06_400.jpg +0 -0
.env.example
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# === DeepShield backend config example ===
|
| 2 |
+
|
| 3 |
+
# Server
|
| 4 |
+
APP_HOST=0.0.0.0
|
| 5 |
+
APP_PORT=8000
|
| 6 |
+
DEBUG=false
|
| 7 |
+
CORS_ORIGINS=["http://localhost:5173"]
|
| 8 |
+
|
| 9 |
+
# === Database ===
|
| 10 |
+
# SQLite (default — zero-config, great for dev / college demo):
|
| 11 |
+
DATABASE_URL=sqlite:///./deepshield.db
|
| 12 |
+
# Postgres (production path — run migrations are applied automatically
|
| 13 |
+
# by init_db via ALTER TABLE when new columns are missing):
|
| 14 |
+
# DATABASE_URL=postgresql+psycopg2://deepshield:CHANGEME@localhost:5432/deepshield
|
| 15 |
+
|
| 16 |
+
# Phase 19.1 — SHA-256 dedup cache TTL (days)
|
| 17 |
+
CACHE_TTL_DAYS=30
|
| 18 |
+
|
| 19 |
+
# Phase 19.2 — object storage root (content-addressed media + thumbnails)
|
| 20 |
+
MEDIA_ROOT=./media
|
| 21 |
+
|
| 22 |
+
# File upload
|
| 23 |
+
MAX_UPLOAD_SIZE_MB=100
|
| 24 |
+
UPLOAD_DIR=./temp_uploads
|
| 25 |
+
|
| 26 |
+
# AI models
|
| 27 |
+
PRELOAD_MODELS=true
|
| 28 |
+
DEVICE=cpu
|
| 29 |
+
|
| 30 |
+
# LLM explainability (Phase 12)
|
| 31 |
+
LLM_PROVIDER=gemini
|
| 32 |
+
LLM_API_KEY=
|
| 33 |
+
LLM_MODEL=gemini-1.5-flash
|
| 34 |
+
|
| 35 |
+
# News lookup (Phase 13)
|
| 36 |
+
NEWS_API_KEY=
|
| 37 |
+
|
| 38 |
+
# Auth (REQUIRED in production — generate with python -c "import secrets; print(secrets.token_urlsafe(48))")
|
| 39 |
+
JWT_SECRET_KEY=change-me-in-production
|
| 40 |
+
JWT_ALGORITHM=HS256
|
| 41 |
+
JWT_EXPIRATION_MINUTES=1440
|
| 42 |
+
|
| 43 |
+
# Optional metadata writer
|
| 44 |
+
EXIFTOOL_PATH=
|
.gitattributes
CHANGED
|
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg filter=lfs diff=lfs merge=lfs -text
|
Colab_ViT_Training.ipynb
DELETED
|
@@ -1,233 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "markdown",
|
| 5 |
-
"id": "1e0e7b4a",
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"source": [
|
| 8 |
-
"# DeepShield: FaceForensics++ ViT Training \n",
|
| 9 |
-
"Run this entirely in Google Colab.\n",
|
| 10 |
-
"**Before running**:\n",
|
| 11 |
-
"1. Go to `Runtime` -> `Change runtime type` -> select **T4 GPU**.\n",
|
| 12 |
-
"2. Run the cells below sequentially.\n"
|
| 13 |
-
]
|
| 14 |
-
},
|
| 15 |
-
{
|
| 16 |
-
"cell_type": "code",
|
| 17 |
-
"execution_count": null,
|
| 18 |
-
"id": "4fe293e7",
|
| 19 |
-
"metadata": {},
|
| 20 |
-
"outputs": [],
|
| 21 |
-
"source": [
|
| 22 |
-
"!pip install timm transformers datasets accelerate evaluate opencv-python\n"
|
| 23 |
-
]
|
| 24 |
-
},
|
| 25 |
-
{
|
| 26 |
-
"cell_type": "code",
|
| 27 |
-
"execution_count": null,
|
| 28 |
-
"id": "c9387c0f",
|
| 29 |
-
"metadata": {},
|
| 30 |
-
"outputs": [],
|
| 31 |
-
"source": [
|
| 32 |
-
"# We create the download script inside the Colab environment\n",
|
| 33 |
-
"download_script = '''#!/usr/bin/env python\n",
|
| 34 |
-
"import argparse\n",
|
| 35 |
-
"import os\n",
|
| 36 |
-
"import urllib.request\n",
|
| 37 |
-
"import tempfile\n",
|
| 38 |
-
"import time\n",
|
| 39 |
-
"import sys\n",
|
| 40 |
-
"import json\n",
|
| 41 |
-
"from tqdm import tqdm\n",
|
| 42 |
-
"from os.path import join\n",
|
| 43 |
-
"\n",
|
| 44 |
-
"FILELIST_URL = 'misc/filelist.json'\n",
|
| 45 |
-
"DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'\n",
|
| 46 |
-
"DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]\n",
|
| 47 |
-
"DATASETS = {\n",
|
| 48 |
-
" 'original': 'original_sequences/youtube',\n",
|
| 49 |
-
" 'Deepfakes': 'manipulated_sequences/Deepfakes',\n",
|
| 50 |
-
" 'Face2Face': 'manipulated_sequences/Face2Face',\n",
|
| 51 |
-
" 'FaceShifter': 'manipulated_sequences/FaceShifter',\n",
|
| 52 |
-
" 'FaceSwap': 'manipulated_sequences/FaceSwap',\n",
|
| 53 |
-
" 'NeuralTextures': 'manipulated_sequences/NeuralTextures'\n",
|
| 54 |
-
"}\n",
|
| 55 |
-
"ALL_DATASETS = ['original', 'Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']\n",
|
| 56 |
-
"COMPRESSION = ['raw', 'c23', 'c40']\n",
|
| 57 |
-
"TYPE = ['videos']\n",
|
| 58 |
-
"\n",
|
| 59 |
-
"def download_file(url, out_file):\n",
|
| 60 |
-
" os.makedirs(os.path.dirname(out_file), exist_ok=True)\n",
|
| 61 |
-
" if not os.path.isfile(out_file):\n",
|
| 62 |
-
" urllib.request.urlretrieve(url, out_file)\n",
|
| 63 |
-
"\n",
|
| 64 |
-
"def main():\n",
|
| 65 |
-
" parser = argparse.ArgumentParser()\n",
|
| 66 |
-
" parser.add_argument('output_path', type=str)\n",
|
| 67 |
-
" parser.add_argument('-d', '--dataset', type=str, default='all')\n",
|
| 68 |
-
" parser.add_argument('-c', '--compression', type=str, default='c40')\n",
|
| 69 |
-
" parser.add_argument('-t', '--type', type=str, default='videos')\n",
|
| 70 |
-
" parser.add_argument('-n', '--num_videos', type=int, default=50) # Small amount for tutorial\n",
|
| 71 |
-
" args = parser.parse_args()\n",
|
| 72 |
-
" \n",
|
| 73 |
-
" base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'\n",
|
| 74 |
-
" \n",
|
| 75 |
-
" datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS\n",
|
| 76 |
-
" for dataset in datasets:\n",
|
| 77 |
-
" dataset_path = DATASETS[dataset]\n",
|
| 78 |
-
" print(f'Downloading {args.compression} of {dataset}')\n",
|
| 79 |
-
" \n",
|
| 80 |
-
" file_pairs = json.loads(urllib.request.urlopen(base_url + FILELIST_URL).read().decode(\"utf-8\"))\n",
|
| 81 |
-
" filelist = []\n",
|
| 82 |
-
" if 'original' in dataset_path:\n",
|
| 83 |
-
" for pair in file_pairs:\n",
|
| 84 |
-
" filelist += pair\n",
|
| 85 |
-
" else:\n",
|
| 86 |
-
" for pair in file_pairs:\n",
|
| 87 |
-
" filelist.append('_'.join(pair))\n",
|
| 88 |
-
" filelist.append('_'.join(pair[::-1]))\n",
|
| 89 |
-
" \n",
|
| 90 |
-
" filelist = filelist[:args.num_videos]\n",
|
| 91 |
-
" dataset_videos_url = base_url + f'{dataset_path}/{args.compression}/{args.type}/'\n",
|
| 92 |
-
" dataset_output_path = join(args.output_path, dataset_path, args.compression, args.type)\n",
|
| 93 |
-
" \n",
|
| 94 |
-
" for filename in tqdm(filelist):\n",
|
| 95 |
-
" download_file(dataset_videos_url + filename + \".mp4\", join(dataset_output_path, filename + \".mp4\"))\n",
|
| 96 |
-
"\n",
|
| 97 |
-
"if __name__ == \"__main__\":\n",
|
| 98 |
-
" main()\n",
|
| 99 |
-
"'''\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"with open(\"download_ffpp.py\", \"w\") as f:\n",
|
| 102 |
-
" f.write(download_script)\n",
|
| 103 |
-
"\n",
|
| 104 |
-
"!python download_ffpp.py ./data -d all -c c40 -t videos -n 50\n"
|
| 105 |
-
]
|
| 106 |
-
},
|
| 107 |
-
{
|
| 108 |
-
"cell_type": "code",
|
| 109 |
-
"execution_count": null,
|
| 110 |
-
"id": "f33716f6",
|
| 111 |
-
"metadata": {},
|
| 112 |
-
"outputs": [],
|
| 113 |
-
"source": [
|
| 114 |
-
"import cv2\n",
|
| 115 |
-
"import os\n",
|
| 116 |
-
"import glob\n",
|
| 117 |
-
"from tqdm import tqdm\n",
|
| 118 |
-
"\n",
|
| 119 |
-
"def extract_frames(video_folder, output_folder, label, max_frames=4):\n",
|
| 120 |
-
" os.makedirs(output_folder, exist_ok=True)\n",
|
| 121 |
-
" videos = glob.glob(os.path.join(video_folder, \"*.mp4\"))\n",
|
| 122 |
-
" \n",
|
| 123 |
-
" for vid_path in tqdm(videos, desc=f\"Extracting {label}\"):\n",
|
| 124 |
-
" vid_name = os.path.basename(vid_path).replace('.mp4','')\n",
|
| 125 |
-
" cap = cv2.VideoCapture(vid_path)\n",
|
| 126 |
-
" count = 0\n",
|
| 127 |
-
" while cap.isOpened() and count < max_frames:\n",
|
| 128 |
-
" ret, frame = cap.read()\n",
|
| 129 |
-
" if not ret: break\n",
|
| 130 |
-
" frame = cv2.resize(frame, (224, 224))\n",
|
| 131 |
-
" out_path = os.path.join(output_folder, f\"{vid_name}_f{count}.jpg\")\n",
|
| 132 |
-
" cv2.imwrite(out_path, frame)\n",
|
| 133 |
-
" count += 1\n",
|
| 134 |
-
" cap.release()\n",
|
| 135 |
-
"\n",
|
| 136 |
-
"# Extract Real\n",
|
| 137 |
-
"extract_frames('./data/original_sequences/youtube/c40/videos', './dataset/real', 'real')\n",
|
| 138 |
-
"\n",
|
| 139 |
-
"# Extract Fakes\n",
|
| 140 |
-
"fakes = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']\n",
|
| 141 |
-
"for f in fakes:\n",
|
| 142 |
-
" extract_frames(f'./data/manipulated_sequences/{f}/c40/videos', './dataset/fake', 'fake')\n"
|
| 143 |
-
]
|
| 144 |
-
},
|
| 145 |
-
{
|
| 146 |
-
"cell_type": "code",
|
| 147 |
-
"execution_count": null,
|
| 148 |
-
"id": "b79cdd85",
|
| 149 |
-
"metadata": {},
|
| 150 |
-
"outputs": [],
|
| 151 |
-
"source": [
|
| 152 |
-
"import numpy as np\n",
|
| 153 |
-
"from datasets import load_dataset\n",
|
| 154 |
-
"from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer\n",
|
| 155 |
-
"import torch\n",
|
| 156 |
-
"\n",
|
| 157 |
-
"# 1. Load Dataset\n",
|
| 158 |
-
"dataset = load_dataset('imagefolder', data_dir='./dataset')\n",
|
| 159 |
-
"# Split into train/validation\n",
|
| 160 |
-
"dataset = dataset['train'].train_test_split(test_size=0.1)\n",
|
| 161 |
-
"\n",
|
| 162 |
-
"# 2. Preprocessor\n",
|
| 163 |
-
"model_name_or_path = 'google/vit-base-patch16-224-in21k'\n",
|
| 164 |
-
"processor = ViTImageProcessor.from_pretrained(model_name_or_path)\n",
|
| 165 |
-
"\n",
|
| 166 |
-
"def transform(example_batch):\n",
|
| 167 |
-
" # Take a list of PIL images and turn them to pixel values\n",
|
| 168 |
-
" inputs = processor([x.convert(\"RGB\") for x in example_batch['image']], return_tensors='pt')\n",
|
| 169 |
-
" inputs['labels'] = example_batch['label']\n",
|
| 170 |
-
" return inputs\n",
|
| 171 |
-
"\n",
|
| 172 |
-
"prepared_ds = dataset.with_transform(transform)\n",
|
| 173 |
-
"\n",
|
| 174 |
-
"def collate_fn(batch):\n",
|
| 175 |
-
" return {\n",
|
| 176 |
-
" 'pixel_values': torch.stack([x['pixel_values'] for x in batch]),\n",
|
| 177 |
-
" 'labels': torch.tensor([x['labels'] for x in batch])\n",
|
| 178 |
-
" }\n",
|
| 179 |
-
"\n",
|
| 180 |
-
"# 3. Load Model\n",
|
| 181 |
-
"labels = dataset['train'].features['label'].names\n",
|
| 182 |
-
"model = ViTForImageClassification.from_pretrained(\n",
|
| 183 |
-
" model_name_or_path,\n",
|
| 184 |
-
" num_labels=len(labels),\n",
|
| 185 |
-
" id2label={str(i): c for i, c in enumerate(labels)},\n",
|
| 186 |
-
" label2id={c: str(i) for i, c in enumerate(labels)}\n",
|
| 187 |
-
")\n",
|
| 188 |
-
"\n",
|
| 189 |
-
"training_args = TrainingArguments(\n",
|
| 190 |
-
" output_dir=\"./vit-deepshield\",\n",
|
| 191 |
-
" per_device_train_batch_size=16,\n",
|
| 192 |
-
" eval_strategy=\"steps\",\n",
|
| 193 |
-
" num_train_epochs=3,\n",
|
| 194 |
-
" fp16=True, # Mixed precision for speed\n",
|
| 195 |
-
" save_steps=100,\n",
|
| 196 |
-
" eval_steps=100,\n",
|
| 197 |
-
" logging_steps=10,\n",
|
| 198 |
-
" learning_rate=2e-4,\n",
|
| 199 |
-
" save_total_limit=2,\n",
|
| 200 |
-
" remove_unused_columns=False,\n",
|
| 201 |
-
" push_to_hub=False,\n",
|
| 202 |
-
" load_best_model_at_end=True,\n",
|
| 203 |
-
")\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"import evaluate\n",
|
| 206 |
-
"metric = evaluate.load(\"accuracy\")\n",
|
| 207 |
-
"def compute_metrics(p):\n",
|
| 208 |
-
" return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)\n",
|
| 209 |
-
"\n",
|
| 210 |
-
"trainer = Trainer(\n",
|
| 211 |
-
" model=model,\n",
|
| 212 |
-
" args=training_args,\n",
|
| 213 |
-
" data_collator=collate_fn,\n",
|
| 214 |
-
" compute_metrics=compute_metrics,\n",
|
| 215 |
-
" train_dataset=prepared_ds[\"train\"],\n",
|
| 216 |
-
" eval_dataset=prepared_ds[\"test\"],\n",
|
| 217 |
-
")\n",
|
| 218 |
-
"\n",
|
| 219 |
-
"# 4. Train\n",
|
| 220 |
-
"train_results = trainer.train()\n",
|
| 221 |
-
"trainer.save_model(\"deepshield_vit_model\")\n",
|
| 222 |
-
"processor.save_pretrained(\"deepshield_vit_model\")\n",
|
| 223 |
-
"trainer.log_metrics(\"train\", train_results.metrics)\n",
|
| 224 |
-
"trainer.save_metrics(\"train\", train_results.metrics)\n",
|
| 225 |
-
"trainer.save_state()\n",
|
| 226 |
-
"print(\"Training Complete! The model is saved to ./deepshield_vit_model\")\n"
|
| 227 |
-
]
|
| 228 |
-
}
|
| 229 |
-
],
|
| 230 |
-
"metadata": {},
|
| 231 |
-
"nbformat": 4,
|
| 232 |
-
"nbformat_minor": 5
|
| 233 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
DELETED
|
@@ -1,13 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
title: Deepshield
|
| 3 |
-
emoji: 🛡️
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo: indigo
|
| 6 |
-
sdk: docker
|
| 7 |
-
app_port: 7860
|
| 8 |
-
pinned: true
|
| 9 |
-
---
|
| 10 |
-
|
| 11 |
-
# DeepShield Backend
|
| 12 |
-
|
| 13 |
-
This space hosts the FastAPI backend for DeepShield.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
analyze.py
DELETED
|
@@ -1,177 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from typing import List
|
| 4 |
-
|
| 5 |
-
from pydantic import BaseModel
|
| 6 |
-
|
| 7 |
-
from schemas.common import (
|
| 8 |
-
ArtifactIndicator,
|
| 9 |
-
ContradictingEvidence,
|
| 10 |
-
ExifSummary,
|
| 11 |
-
LLMExplainabilitySummary,
|
| 12 |
-
ProcessingSummary,
|
| 13 |
-
TrustedSource,
|
| 14 |
-
TruthOverride,
|
| 15 |
-
Verdict,
|
| 16 |
-
VLMBreakdown,
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class SensationalismBreakdown(BaseModel):
|
| 21 |
-
score: int = 0
|
| 22 |
-
level: str = "Low"
|
| 23 |
-
exclamation_count: int = 0
|
| 24 |
-
caps_word_count: int = 0
|
| 25 |
-
clickbait_matches: int = 0
|
| 26 |
-
emotional_word_count: int = 0
|
| 27 |
-
superlative_count: int = 0
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
class ManipulationIndicatorOut(BaseModel):
|
| 31 |
-
pattern_type: str
|
| 32 |
-
matched_text: str
|
| 33 |
-
start_pos: int
|
| 34 |
-
end_pos: int
|
| 35 |
-
severity: str
|
| 36 |
-
description: str
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
class TextExplainability(BaseModel):
|
| 40 |
-
fake_probability: float
|
| 41 |
-
top_label: str
|
| 42 |
-
all_scores: dict = {}
|
| 43 |
-
keywords: List[str] = []
|
| 44 |
-
sensationalism: SensationalismBreakdown = SensationalismBreakdown()
|
| 45 |
-
manipulation_indicators: List[ManipulationIndicatorOut] = []
|
| 46 |
-
detected_language: str = "en" # ISO 639-1 code, e.g. "en", "hi"
|
| 47 |
-
truth_override: TruthOverride | None = None
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
class TextAnalysisResponse(BaseModel):
|
| 51 |
-
analysis_id: str
|
| 52 |
-
record_id: int = 0
|
| 53 |
-
media_type: str = "text"
|
| 54 |
-
timestamp: str
|
| 55 |
-
verdict: Verdict
|
| 56 |
-
explainability: TextExplainability
|
| 57 |
-
llm_summary: LLMExplainabilitySummary | None = None
|
| 58 |
-
trusted_sources: List[TrustedSource] = []
|
| 59 |
-
contradicting_evidence: List[ContradictingEvidence] = []
|
| 60 |
-
processing_summary: ProcessingSummary
|
| 61 |
-
responsible_ai_notice: str = (
|
| 62 |
-
"AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
class OCRBoxOut(BaseModel):
|
| 67 |
-
text: str
|
| 68 |
-
bbox: List[List[int]]
|
| 69 |
-
confidence: float
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
class SuspiciousPhraseOut(BaseModel):
|
| 73 |
-
text: str
|
| 74 |
-
bbox: List[List[int]]
|
| 75 |
-
pattern_type: str
|
| 76 |
-
severity: str
|
| 77 |
-
description: str
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
class LayoutAnomalyOut(BaseModel):
|
| 81 |
-
type: str
|
| 82 |
-
severity: str
|
| 83 |
-
description: str
|
| 84 |
-
confidence: float
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
class ScreenshotExplainability(BaseModel):
|
| 88 |
-
extracted_text: str = ""
|
| 89 |
-
ocr_boxes: List[OCRBoxOut] = []
|
| 90 |
-
fake_probability: float = 0.0
|
| 91 |
-
sensationalism: SensationalismBreakdown = SensationalismBreakdown()
|
| 92 |
-
suspicious_phrases: List[SuspiciousPhraseOut] = []
|
| 93 |
-
layout_anomalies: List[LayoutAnomalyOut] = []
|
| 94 |
-
keywords: List[str] = []
|
| 95 |
-
detected_language: str = "en"
|
| 96 |
-
truth_override: TruthOverride | None = None
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
class ScreenshotAnalysisResponse(BaseModel):
|
| 100 |
-
analysis_id: str
|
| 101 |
-
record_id: int = 0
|
| 102 |
-
media_type: str = "screenshot"
|
| 103 |
-
timestamp: str
|
| 104 |
-
verdict: Verdict
|
| 105 |
-
explainability: ScreenshotExplainability
|
| 106 |
-
llm_summary: LLMExplainabilitySummary | None = None
|
| 107 |
-
trusted_sources: List[TrustedSource] = []
|
| 108 |
-
contradicting_evidence: List[ContradictingEvidence] = []
|
| 109 |
-
processing_summary: ProcessingSummary
|
| 110 |
-
responsible_ai_notice: str = (
|
| 111 |
-
"AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
class ImageExplainability(BaseModel):
|
| 116 |
-
heatmap_base64: str = ""
|
| 117 |
-
ela_base64: str = ""
|
| 118 |
-
boxes_base64: str = ""
|
| 119 |
-
heatmap_status: str = "success" # success | failed | degraded
|
| 120 |
-
artifact_indicators: List[ArtifactIndicator] = []
|
| 121 |
-
exif: ExifSummary | None = None
|
| 122 |
-
llm_summary: LLMExplainabilitySummary | None = None
|
| 123 |
-
vlm_breakdown: VLMBreakdown | None = None
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
class FrameAnalysisOut(BaseModel):
|
| 127 |
-
index: int
|
| 128 |
-
timestamp_s: float
|
| 129 |
-
label: str
|
| 130 |
-
confidence: float
|
| 131 |
-
suspicious_prob: float
|
| 132 |
-
is_suspicious: bool
|
| 133 |
-
has_face: bool = False
|
| 134 |
-
scored: bool = False
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
class VideoExplainability(BaseModel):
|
| 138 |
-
num_frames_sampled: int
|
| 139 |
-
num_face_frames: int = 0
|
| 140 |
-
num_suspicious_frames: int
|
| 141 |
-
mean_suspicious_prob: float
|
| 142 |
-
max_suspicious_prob: float
|
| 143 |
-
suspicious_ratio: float
|
| 144 |
-
insufficient_faces: bool = False
|
| 145 |
-
suspicious_timestamps: List[float] = []
|
| 146 |
-
frames: List[FrameAnalysisOut] = []
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
class VideoAnalysisResponse(BaseModel):
|
| 150 |
-
analysis_id: str
|
| 151 |
-
record_id: int = 0
|
| 152 |
-
media_type: str = "video"
|
| 153 |
-
timestamp: str
|
| 154 |
-
verdict: Verdict
|
| 155 |
-
explainability: VideoExplainability
|
| 156 |
-
llm_summary: LLMExplainabilitySummary | None = None
|
| 157 |
-
trusted_sources: List[TrustedSource] = []
|
| 158 |
-
contradicting_evidence: List[ContradictingEvidence] = []
|
| 159 |
-
processing_summary: ProcessingSummary
|
| 160 |
-
responsible_ai_notice: str = (
|
| 161 |
-
"AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
class ImageAnalysisResponse(BaseModel):
|
| 166 |
-
analysis_id: str
|
| 167 |
-
record_id: int = 0
|
| 168 |
-
media_type: str = "image"
|
| 169 |
-
timestamp: str
|
| 170 |
-
verdict: Verdict
|
| 171 |
-
explainability: ImageExplainability
|
| 172 |
-
trusted_sources: List[TrustedSource] = []
|
| 173 |
-
contradicting_evidence: List[ContradictingEvidence] = []
|
| 174 |
-
processing_summary: ProcessingSummary
|
| 175 |
-
responsible_ai_notice: str = (
|
| 176 |
-
"AI-based analysis may not be 100% accurate. Cross-check with trusted sources before sharing."
|
| 177 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/router.py
CHANGED
|
@@ -5,6 +5,7 @@ from api.v1 import analyze, auth, health, history, report
|
|
| 5 |
api_router = APIRouter(prefix="/api/v1")
|
| 6 |
api_router.include_router(health.router)
|
| 7 |
api_router.include_router(analyze.router)
|
|
|
|
| 8 |
api_router.include_router(report.router)
|
| 9 |
api_router.include_router(auth.router)
|
| 10 |
api_router.include_router(history.router)
|
|
|
|
| 5 |
api_router = APIRouter(prefix="/api/v1")
|
| 6 |
api_router.include_router(health.router)
|
| 7 |
api_router.include_router(analyze.router)
|
| 8 |
+
api_router.include_router(analyze.jobs_router) # Phase 19.3
|
| 9 |
api_router.include_router(report.router)
|
| 10 |
api_router.include_router(auth.router)
|
| 11 |
api_router.include_router(history.router)
|
api/v1/health.py
CHANGED
|
@@ -1,8 +1,79 @@
|
|
| 1 |
-
from fastapi import APIRouter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
router = APIRouter(tags=["health"])
|
| 4 |
|
| 5 |
|
| 6 |
@router.get("/health")
|
| 7 |
def health():
|
|
|
|
| 8 |
return {"status": "ok", "service": "deepshield-backend"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Response, status
|
| 2 |
+
from loguru import logger
|
| 3 |
+
from sqlalchemy import text
|
| 4 |
+
|
| 5 |
+
from config import settings
|
| 6 |
+
from db.database import engine
|
| 7 |
+
from services.llm_explainer import is_rate_limited
|
| 8 |
|
| 9 |
router = APIRouter(tags=["health"])
|
| 10 |
|
| 11 |
|
| 12 |
@router.get("/health")
|
| 13 |
def health():
|
| 14 |
+
"""Legacy combined healthcheck — kept for backwards compatibility."""
|
| 15 |
return {"status": "ok", "service": "deepshield-backend"}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@router.get("/health/live")
|
| 19 |
+
def health_live():
|
| 20 |
+
"""Liveness probe — returns 200 as long as the process is up."""
|
| 21 |
+
return {"status": "alive"}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
@router.get("/health/ready")
|
| 25 |
+
def health_ready(response: Response):
|
| 26 |
+
"""Readiness probe — 200 only when DB is reachable and models are loaded.
|
| 27 |
+
|
| 28 |
+
Phase 19.5: the frontend disables the Analyze button while this returns 503.
|
| 29 |
+
"""
|
| 30 |
+
checks: dict[str, bool] = {}
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
with engine.connect() as conn:
|
| 34 |
+
conn.execute(text("SELECT 1"))
|
| 35 |
+
checks["db"] = True
|
| 36 |
+
except Exception as e: # noqa: BLE001
|
| 37 |
+
logger.warning(f"readiness db check failed: {e}")
|
| 38 |
+
checks["db"] = False
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
from models.model_loader import get_model_loader
|
| 42 |
+
checks["models"] = bool(get_model_loader().is_ready())
|
| 43 |
+
except AttributeError:
|
| 44 |
+
# No is_ready() — fall back to "ready if loader constructs"
|
| 45 |
+
try:
|
| 46 |
+
from models.model_loader import get_model_loader
|
| 47 |
+
get_model_loader()
|
| 48 |
+
checks["models"] = True
|
| 49 |
+
except Exception: # noqa: BLE001
|
| 50 |
+
checks["models"] = False
|
| 51 |
+
except Exception as e: # noqa: BLE001
|
| 52 |
+
logger.warning(f"readiness model check failed: {e}")
|
| 53 |
+
checks["models"] = False
|
| 54 |
+
|
| 55 |
+
ok = all(checks.values())
|
| 56 |
+
if not ok:
|
| 57 |
+
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
| 58 |
+
return {"status": "ready" if ok else "not_ready", "checks": checks}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@router.get("/health/llm")
|
| 62 |
+
def health_llm(response: Response):
|
| 63 |
+
"""LLM availability probe — lets the frontend decide whether to request/show
|
| 64 |
+
the AI summary card. Doesn't spend tokens; only checks config + breaker state.
|
| 65 |
+
"""
|
| 66 |
+
has_primary = bool(settings.LLM_API_KEY)
|
| 67 |
+
has_fallback = bool(settings.GROQ_API_KEY)
|
| 68 |
+
cooldown = is_rate_limited()
|
| 69 |
+
|
| 70 |
+
# Available if (any provider configured) AND (not rate-limited OR fallback exists)
|
| 71 |
+
available = (has_primary or has_fallback) and (not cooldown or has_fallback)
|
| 72 |
+
if not available:
|
| 73 |
+
response.status_code = status.HTTP_503_SERVICE_UNAVAILABLE
|
| 74 |
+
return {
|
| 75 |
+
"available": available,
|
| 76 |
+
"primary": f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}" if has_primary else None,
|
| 77 |
+
"fallback": f"groq/{settings.GROQ_MODEL}" if has_fallback else None,
|
| 78 |
+
"rate_limited": cooldown,
|
| 79 |
+
}
|
api/v1/history.py
CHANGED
|
@@ -60,7 +60,13 @@ def get_history_detail(
|
|
| 60 |
if not r or r.user_id != user.id:
|
| 61 |
raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
|
| 62 |
try:
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
except Exception:
|
| 65 |
raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")
|
| 66 |
|
|
|
|
| 60 |
if not r or r.user_id != user.id:
|
| 61 |
raise HTTPException(status.HTTP_404_NOT_FOUND, "Analysis not found")
|
| 62 |
try:
|
| 63 |
+
payload = json.loads(r.result_json)
|
| 64 |
+
# Inject storage fields from DB columns so the frontend can display full-size media
|
| 65 |
+
if r.media_path and not payload.get("media_path"):
|
| 66 |
+
payload["media_path"] = r.media_path
|
| 67 |
+
if r.thumbnail_url and not payload.get("thumbnail_url"):
|
| 68 |
+
payload["thumbnail_url"] = r.thumbnail_url
|
| 69 |
+
return payload
|
| 70 |
except Exception:
|
| 71 |
raise HTTPException(status.HTTP_500_INTERNAL_SERVER_ERROR, "Corrupt result payload")
|
| 72 |
|
api/v1/report.py
CHANGED
|
@@ -2,24 +2,45 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
-
from fastapi import APIRouter, Depends, HTTPException
|
| 6 |
from fastapi.responses import FileResponse
|
| 7 |
from loguru import logger
|
| 8 |
from sqlalchemy.orm import Session
|
| 9 |
|
|
|
|
| 10 |
from db.database import get_db
|
| 11 |
-
from db.models import AnalysisRecord, Report
|
|
|
|
| 12 |
from services.report_service import cleanup_expired, create_report_row, generate_report
|
| 13 |
|
| 14 |
router = APIRouter(prefix="/report", tags=["report"])
|
| 15 |
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
@router.post("/{analysis_id}")
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
|
| 20 |
if not record:
|
| 21 |
raise HTTPException(status_code=404, detail="analysis not found")
|
| 22 |
|
|
|
|
|
|
|
| 23 |
existing = db.query(Report).filter(Report.analysis_id == analysis_id).first()
|
| 24 |
if existing and Path(existing.file_path).exists():
|
| 25 |
return {"report_id": existing.id, "analysis_id": analysis_id, "ready": True}
|
|
@@ -44,7 +65,19 @@ def generate(analysis_id: int, db: Session = Depends(get_db)):
|
|
| 44 |
|
| 45 |
|
| 46 |
@router.get("/{analysis_id}/download")
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
row = db.query(Report).filter(Report.analysis_id == analysis_id).first()
|
| 49 |
if not row:
|
| 50 |
raise HTTPException(status_code=404, detail="report not found — generate first")
|
|
@@ -58,7 +91,9 @@ def download(analysis_id: int, db: Session = Depends(get_db)):
|
|
| 58 |
)
|
| 59 |
|
| 60 |
|
| 61 |
-
@router.post("/cleanup")
|
| 62 |
-
def cleanup():
|
|
|
|
|
|
|
| 63 |
n = cleanup_expired()
|
| 64 |
return {"deleted": n}
|
|
|
|
| 2 |
|
| 3 |
from pathlib import Path
|
| 4 |
|
| 5 |
+
from fastapi import APIRouter, Depends, HTTPException, Request, status
|
| 6 |
from fastapi.responses import FileResponse
|
| 7 |
from loguru import logger
|
| 8 |
from sqlalchemy.orm import Session
|
| 9 |
|
| 10 |
+
from api.deps import get_current_user, optional_current_user
|
| 11 |
from db.database import get_db
|
| 12 |
+
from db.models import AnalysisRecord, Report, User
|
| 13 |
+
from services.rate_limit import ANON_REPORT, AUTH_REPORT, is_anon, is_authed, limiter
|
| 14 |
from services.report_service import cleanup_expired, create_report_row, generate_report
|
| 15 |
|
| 16 |
router = APIRouter(prefix="/report", tags=["report"])
|
| 17 |
|
| 18 |
|
| 19 |
+
def _assert_record_access(record: AnalysisRecord, user: User | None) -> None:
|
| 20 |
+
"""Phase 15.1 — allow access if the requester owns the record, or if the record
|
| 21 |
+
is anonymous (user_id is None). Everything else is 403."""
|
| 22 |
+
if record.user_id is None:
|
| 23 |
+
return
|
| 24 |
+
if user is not None and record.user_id == user.id:
|
| 25 |
+
return
|
| 26 |
+
raise HTTPException(status.HTTP_403_FORBIDDEN, "You do not own this analysis")
|
| 27 |
+
|
| 28 |
+
|
| 29 |
@router.post("/{analysis_id}")
|
| 30 |
+
@limiter.limit(ANON_REPORT, exempt_when=is_authed)
|
| 31 |
+
@limiter.limit(AUTH_REPORT, exempt_when=is_anon)
|
| 32 |
+
def generate(
|
| 33 |
+
request: Request,
|
| 34 |
+
analysis_id: int,
|
| 35 |
+
db: Session = Depends(get_db),
|
| 36 |
+
user: User | None = Depends(optional_current_user),
|
| 37 |
+
):
|
| 38 |
record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
|
| 39 |
if not record:
|
| 40 |
raise HTTPException(status_code=404, detail="analysis not found")
|
| 41 |
|
| 42 |
+
_assert_record_access(record, user)
|
| 43 |
+
|
| 44 |
existing = db.query(Report).filter(Report.analysis_id == analysis_id).first()
|
| 45 |
if existing and Path(existing.file_path).exists():
|
| 46 |
return {"report_id": existing.id, "analysis_id": analysis_id, "ready": True}
|
|
|
|
| 65 |
|
| 66 |
|
| 67 |
@router.get("/{analysis_id}/download")
|
| 68 |
+
@limiter.limit(ANON_REPORT, exempt_when=is_authed)
|
| 69 |
+
@limiter.limit(AUTH_REPORT, exempt_when=is_anon)
|
| 70 |
+
def download(
|
| 71 |
+
request: Request,
|
| 72 |
+
analysis_id: int,
|
| 73 |
+
db: Session = Depends(get_db),
|
| 74 |
+
user: User | None = Depends(optional_current_user),
|
| 75 |
+
):
|
| 76 |
+
record = db.query(AnalysisRecord).filter(AnalysisRecord.id == analysis_id).first()
|
| 77 |
+
if not record:
|
| 78 |
+
raise HTTPException(status_code=404, detail="analysis not found")
|
| 79 |
+
_assert_record_access(record, user)
|
| 80 |
+
|
| 81 |
row = db.query(Report).filter(Report.analysis_id == analysis_id).first()
|
| 82 |
if not row:
|
| 83 |
raise HTTPException(status_code=404, detail="report not found — generate first")
|
|
|
|
| 91 |
)
|
| 92 |
|
| 93 |
|
| 94 |
+
@router.post("/cleanup", include_in_schema=False)
|
| 95 |
+
def cleanup(user: User = Depends(get_current_user)):
|
| 96 |
+
# Phase 15.1 — auth-guarded. Exposed only to authenticated users; an internal
|
| 97 |
+
# scheduler loop in main.py handles periodic cleanup automatically.
|
| 98 |
n = cleanup_expired()
|
| 99 |
return {"deleted": n}
|
artifact_detector.py
DELETED
|
@@ -1,229 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import io
|
| 4 |
-
from typing import List
|
| 5 |
-
|
| 6 |
-
import numpy as np
|
| 7 |
-
from loguru import logger
|
| 8 |
-
from PIL import Image
|
| 9 |
-
|
| 10 |
-
from schemas.common import ArtifactIndicator
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
def _severity_from_score(score: float) -> str:
|
| 14 |
-
if score >= 0.7:
|
| 15 |
-
return "high"
|
| 16 |
-
if score >= 0.4:
|
| 17 |
-
return "medium"
|
| 18 |
-
return "low"
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# ---------- 1. GAN high-frequency signature (FFT) ----------
|
| 22 |
-
def detect_gan_hf_artifact(pil_img: Image.Image) -> ArtifactIndicator | None:
|
| 23 |
-
"""Compute high-frequency energy ratio on the luminance channel.
|
| 24 |
-
Real photos typically follow a ~1/f spectrum; many GAN outputs show
|
| 25 |
-
elevated HF energy or spectral peaks.
|
| 26 |
-
"""
|
| 27 |
-
try:
|
| 28 |
-
gray = np.asarray(pil_img.convert("L"), dtype=np.float32)
|
| 29 |
-
# downsample for speed
|
| 30 |
-
if max(gray.shape) > 512:
|
| 31 |
-
import cv2
|
| 32 |
-
|
| 33 |
-
scale = 512 / max(gray.shape)
|
| 34 |
-
gray = cv2.resize(gray, (int(gray.shape[1] * scale), int(gray.shape[0] * scale)))
|
| 35 |
-
|
| 36 |
-
fft = np.fft.fftshift(np.fft.fft2(gray))
|
| 37 |
-
mag = np.abs(fft)
|
| 38 |
-
h, w = mag.shape
|
| 39 |
-
cy, cx = h // 2, w // 2
|
| 40 |
-
y, x = np.ogrid[:h, :w]
|
| 41 |
-
r = np.sqrt((x - cx) ** 2 + (y - cy) ** 2)
|
| 42 |
-
r_max = np.sqrt(cx * cx + cy * cy)
|
| 43 |
-
hf_mask = r > (0.5 * r_max)
|
| 44 |
-
|
| 45 |
-
total = float(mag.sum() + 1e-9)
|
| 46 |
-
hf = float(mag[hf_mask].sum())
|
| 47 |
-
ratio = hf / total # typically 0.05–0.20 for natural photos
|
| 48 |
-
|
| 49 |
-
# normalize to [0,1] suspiciousness
|
| 50 |
-
score = max(0.0, min(1.0, (ratio - 0.10) / 0.20))
|
| 51 |
-
sev = _severity_from_score(score)
|
| 52 |
-
return ArtifactIndicator(
|
| 53 |
-
type="gan_artifact",
|
| 54 |
-
severity=sev,
|
| 55 |
-
description=(
|
| 56 |
-
f"High-frequency energy ratio {ratio:.3f} — "
|
| 57 |
-
+ ("elevated HF energy consistent with GAN/diffusion outputs" if score > 0.4
|
| 58 |
-
else "natural frequency falloff")
|
| 59 |
-
),
|
| 60 |
-
confidence=float(score),
|
| 61 |
-
)
|
| 62 |
-
except Exception as e: # noqa: BLE001
|
| 63 |
-
logger.warning(f"GAN HF detection failed: {e}")
|
| 64 |
-
return None
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
# ---------- 2. JPEG quantization table anomaly ----------
|
| 68 |
-
_STANDARD_Q_SUMS = { # rough heuristic: camera JPEGs fall in these ranges
|
| 69 |
-
50: (1500, 4500),
|
| 70 |
-
75: (600, 2500),
|
| 71 |
-
90: (200, 1000),
|
| 72 |
-
95: (100, 600),
|
| 73 |
-
}
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
def detect_compression_anomaly(raw_bytes: bytes) -> ArtifactIndicator | None:
|
| 77 |
-
"""Inspect JPEG quantization tables. Missing tables, non-standard layouts,
|
| 78 |
-
or re-saved tables often indicate manipulation or re-encoding.
|
| 79 |
-
"""
|
| 80 |
-
try:
|
| 81 |
-
img = Image.open(io.BytesIO(raw_bytes))
|
| 82 |
-
if img.format != "JPEG":
|
| 83 |
-
return ArtifactIndicator(
|
| 84 |
-
type="compression",
|
| 85 |
-
severity="low",
|
| 86 |
-
description=f"Non-JPEG format ({img.format}); compression signature not available",
|
| 87 |
-
confidence=0.1,
|
| 88 |
-
)
|
| 89 |
-
|
| 90 |
-
q = getattr(img, "quantization", None)
|
| 91 |
-
if not q:
|
| 92 |
-
return ArtifactIndicator(
|
| 93 |
-
type="compression",
|
| 94 |
-
severity="low",
|
| 95 |
-
description="No JPEG quantization tables readable",
|
| 96 |
-
confidence=0.2,
|
| 97 |
-
)
|
| 98 |
-
|
| 99 |
-
tables = list(q.values())
|
| 100 |
-
sums = [int(sum(t)) for t in tables]
|
| 101 |
-
num_tables = len(tables)
|
| 102 |
-
|
| 103 |
-
# Heuristics: very low sum → very high quality (possibly re-saved);
|
| 104 |
-
# non-standard number of tables; extreme values.
|
| 105 |
-
suspicious = 0.0
|
| 106 |
-
reasons: list[str] = []
|
| 107 |
-
if num_tables not in (1, 2):
|
| 108 |
-
suspicious += 0.4
|
| 109 |
-
reasons.append(f"unusual table count ({num_tables})")
|
| 110 |
-
if any(s < 60 for s in sums):
|
| 111 |
-
suspicious += 0.3
|
| 112 |
-
reasons.append("very low quantization sums (possible re-encoding)")
|
| 113 |
-
if any(s > 8000 for s in sums):
|
| 114 |
-
suspicious += 0.2
|
| 115 |
-
reasons.append("very high quantization sums")
|
| 116 |
-
|
| 117 |
-
score = max(0.0, min(1.0, suspicious))
|
| 118 |
-
sev = _severity_from_score(score)
|
| 119 |
-
desc = (
|
| 120 |
-
f"JPEG Q-table sums {sums}"
|
| 121 |
-
+ (f"; {', '.join(reasons)}" if reasons else "; within typical camera range")
|
| 122 |
-
)
|
| 123 |
-
return ArtifactIndicator(
|
| 124 |
-
type="compression",
|
| 125 |
-
severity=sev,
|
| 126 |
-
description=desc,
|
| 127 |
-
confidence=float(score),
|
| 128 |
-
)
|
| 129 |
-
except Exception as e: # noqa: BLE001
|
| 130 |
-
logger.warning(f"Compression anomaly detection failed: {e}")
|
| 131 |
-
return None
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
# ---------- 3. Facial boundary + 4. Lighting (MediaPipe) ----------
|
| 135 |
-
def detect_face_based_artifacts(pil_img: Image.Image) -> List[ArtifactIndicator]:
|
| 136 |
-
"""If a face is detected, analyze jaw boundary variance and per-quadrant
|
| 137 |
-
luminance balance. Returns 0, 1, or 2 indicators.
|
| 138 |
-
"""
|
| 139 |
-
results: List[ArtifactIndicator] = []
|
| 140 |
-
try:
|
| 141 |
-
import mediapipe as mp # type: ignore
|
| 142 |
-
|
| 143 |
-
from models.model_loader import get_model_loader
|
| 144 |
-
|
| 145 |
-
detector = get_model_loader().load_face_detector()
|
| 146 |
-
rgb = np.asarray(pil_img.convert("RGB"))
|
| 147 |
-
h, w = rgb.shape[:2]
|
| 148 |
-
mp_result = detector.process(rgb)
|
| 149 |
-
|
| 150 |
-
if not mp_result.multi_face_landmarks:
|
| 151 |
-
return results
|
| 152 |
-
|
| 153 |
-
landmarks = mp_result.multi_face_landmarks[0].landmark
|
| 154 |
-
|
| 155 |
-
# ----- Jaw boundary jitter -----
|
| 156 |
-
# FaceMesh jaw/oval landmark indices (approximate face contour)
|
| 157 |
-
JAW_IDX = [
|
| 158 |
-
10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361,
|
| 159 |
-
288, 397, 365, 379, 378, 400, 377, 152, 148, 176, 149,
|
| 160 |
-
150, 136, 172, 58, 132, 93, 234, 127, 162, 21, 54, 103, 67, 109,
|
| 161 |
-
]
|
| 162 |
-
pts = np.array([(landmarks[i].x * w, landmarks[i].y * h) for i in JAW_IDX])
|
| 163 |
-
# Second-difference magnitude = local curvature jitter
|
| 164 |
-
diffs = np.diff(pts, axis=0)
|
| 165 |
-
seconds = np.diff(diffs, axis=0)
|
| 166 |
-
jitter = float(np.linalg.norm(seconds, axis=1).mean()) / max(w, h)
|
| 167 |
-
jitter_score = max(0.0, min(1.0, (jitter - 0.003) / 0.010))
|
| 168 |
-
results.append(
|
| 169 |
-
ArtifactIndicator(
|
| 170 |
-
type="facial_boundary",
|
| 171 |
-
severity=_severity_from_score(jitter_score),
|
| 172 |
-
description=(
|
| 173 |
-
f"Jaw-contour jitter {jitter:.4f} (normalized) — "
|
| 174 |
-
+ ("inconsistent boundary blending detected" if jitter_score > 0.4
|
| 175 |
-
else "face boundary appears smooth")
|
| 176 |
-
),
|
| 177 |
-
confidence=float(jitter_score),
|
| 178 |
-
)
|
| 179 |
-
)
|
| 180 |
-
|
| 181 |
-
# ----- Lighting inconsistency (per-quadrant luminance) -----
|
| 182 |
-
xs = np.array([lm.x * w for lm in landmarks])
|
| 183 |
-
ys = np.array([lm.y * h for lm in landmarks])
|
| 184 |
-
x0, x1 = int(max(0, xs.min())), int(min(w, xs.max()))
|
| 185 |
-
y0, y1 = int(max(0, ys.min())), int(min(h, ys.max()))
|
| 186 |
-
if x1 > x0 + 4 and y1 > y0 + 4:
|
| 187 |
-
face_crop = rgb[y0:y1, x0:x1]
|
| 188 |
-
gray = 0.299 * face_crop[..., 0] + 0.587 * face_crop[..., 1] + 0.114 * face_crop[..., 2]
|
| 189 |
-
hh, ww = gray.shape
|
| 190 |
-
quads = [
|
| 191 |
-
gray[: hh // 2, : ww // 2],
|
| 192 |
-
gray[: hh // 2, ww // 2 :],
|
| 193 |
-
gray[hh // 2 :, : ww // 2],
|
| 194 |
-
gray[hh // 2 :, ww // 2 :],
|
| 195 |
-
]
|
| 196 |
-
means = np.array([q.mean() for q in quads if q.size > 0])
|
| 197 |
-
if means.size == 4 and means.mean() > 1e-3:
|
| 198 |
-
imbalance = float(means.std() / means.mean())
|
| 199 |
-
lighting_score = max(0.0, min(1.0, (imbalance - 0.08) / 0.20))
|
| 200 |
-
results.append(
|
| 201 |
-
ArtifactIndicator(
|
| 202 |
-
type="lighting",
|
| 203 |
-
severity=_severity_from_score(lighting_score),
|
| 204 |
-
description=(
|
| 205 |
-
f"Luminance imbalance across face quadrants {imbalance:.3f} — "
|
| 206 |
-
+ ("inconsistent lighting direction" if lighting_score > 0.4
|
| 207 |
-
else "lighting appears uniform")
|
| 208 |
-
),
|
| 209 |
-
confidence=float(lighting_score),
|
| 210 |
-
)
|
| 211 |
-
)
|
| 212 |
-
except Exception as e: # noqa: BLE001
|
| 213 |
-
logger.warning(f"Face-based artifact detection failed: {e}")
|
| 214 |
-
|
| 215 |
-
return results
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
# ---------- Orchestrator ----------
|
| 219 |
-
def scan_artifacts(pil_img: Image.Image, raw_bytes: bytes) -> List[ArtifactIndicator]:
|
| 220 |
-
indicators: List[ArtifactIndicator] = []
|
| 221 |
-
for fn in (
|
| 222 |
-
lambda: detect_gan_hf_artifact(pil_img),
|
| 223 |
-
lambda: detect_compression_anomaly(raw_bytes),
|
| 224 |
-
):
|
| 225 |
-
ind = fn()
|
| 226 |
-
if ind is not None:
|
| 227 |
-
indicators.append(ind)
|
| 228 |
-
indicators.extend(detect_face_based_artifacts(pil_img))
|
| 229 |
-
return indicators
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auth.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from datetime import datetime
|
| 4 |
-
|
| 5 |
-
from pydantic import BaseModel, EmailStr, Field
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class RegisterBody(BaseModel):
|
| 9 |
-
email: EmailStr
|
| 10 |
-
password: str = Field(min_length=6, max_length=128)
|
| 11 |
-
name: str | None = Field(default=None, max_length=255)
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
class LoginBody(BaseModel):
|
| 15 |
-
email: EmailStr
|
| 16 |
-
password: str
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
class UserOut(BaseModel):
|
| 20 |
-
id: int
|
| 21 |
-
email: str
|
| 22 |
-
name: str | None = None
|
| 23 |
-
created_at: datetime
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
class TokenResponse(BaseModel):
|
| 27 |
-
access_token: str
|
| 28 |
-
token_type: str = "bearer"
|
| 29 |
-
expires_in_minutes: int
|
| 30 |
-
user: UserOut
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
auth_service.py
DELETED
|
@@ -1,67 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from datetime import datetime, timedelta, timezone
|
| 4 |
-
from typing import Any
|
| 5 |
-
|
| 6 |
-
import bcrypt
|
| 7 |
-
from jose import JWTError, jwt
|
| 8 |
-
from sqlalchemy.orm import Session
|
| 9 |
-
|
| 10 |
-
from config import settings
|
| 11 |
-
from db.models import User
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
def _encode_pw(plain: str) -> bytes:
|
| 15 |
-
# bcrypt truncates to 72 bytes silently in some builds and hard-errors in others.
|
| 16 |
-
# Truncate explicitly so behavior is deterministic across versions.
|
| 17 |
-
return plain.encode("utf-8")[:72]
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def hash_password(plain: str) -> str:
|
| 21 |
-
return bcrypt.hashpw(_encode_pw(plain), bcrypt.gensalt()).decode("utf-8")
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
def verify_password(plain: str, hashed: str) -> bool:
|
| 25 |
-
try:
|
| 26 |
-
return bcrypt.checkpw(_encode_pw(plain), hashed.encode("utf-8"))
|
| 27 |
-
except Exception:
|
| 28 |
-
return False
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
def create_access_token(user_id: int, email: str) -> str:
|
| 32 |
-
now = datetime.now(timezone.utc)
|
| 33 |
-
payload = {
|
| 34 |
-
"sub": str(user_id),
|
| 35 |
-
"email": email,
|
| 36 |
-
"iat": int(now.timestamp()),
|
| 37 |
-
"exp": int((now + timedelta(minutes=settings.JWT_EXPIRATION_MINUTES)).timestamp()),
|
| 38 |
-
}
|
| 39 |
-
return jwt.encode(payload, settings.JWT_SECRET_KEY, algorithm=settings.JWT_ALGORITHM)
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
def decode_token(token: str) -> dict[str, Any] | None:
|
| 43 |
-
try:
|
| 44 |
-
return jwt.decode(token, settings.JWT_SECRET_KEY, algorithms=[settings.JWT_ALGORITHM])
|
| 45 |
-
except JWTError:
|
| 46 |
-
return None
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def register_user(db: Session, email: str, password: str, name: str | None) -> User:
|
| 50 |
-
email = email.strip().lower()
|
| 51 |
-
user = User(email=email, password_hash=hash_password(password), name=(name or None))
|
| 52 |
-
db.add(user)
|
| 53 |
-
db.commit()
|
| 54 |
-
db.refresh(user)
|
| 55 |
-
return user
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
def authenticate(db: Session, email: str, password: str) -> User | None:
|
| 59 |
-
email = email.strip().lower()
|
| 60 |
-
user = db.query(User).filter(User.email == email).first()
|
| 61 |
-
if not user or not verify_password(password, user.password_hash):
|
| 62 |
-
return None
|
| 63 |
-
return user
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
def get_user(db: Session, user_id: int) -> User | None:
|
| 67 |
-
return db.query(User).filter(User.id == user_id).first()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
common.py
DELETED
|
@@ -1,88 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from typing import List, Optional
|
| 4 |
-
|
| 5 |
-
from pydantic import BaseModel, ConfigDict, Field
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
class Verdict(BaseModel):
|
| 9 |
-
model_config = ConfigDict(protected_namespaces=())
|
| 10 |
-
|
| 11 |
-
label: str
|
| 12 |
-
severity: str
|
| 13 |
-
authenticity_score: int = Field(ge=0, le=100)
|
| 14 |
-
model_confidence: float = Field(ge=0.0, le=1.0)
|
| 15 |
-
model_label: str
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
class ArtifactIndicator(BaseModel):
|
| 19 |
-
type: str
|
| 20 |
-
severity: str # low | medium | high
|
| 21 |
-
description: str
|
| 22 |
-
confidence: float = Field(ge=0.0, le=1.0)
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
class TrustedSource(BaseModel):
|
| 26 |
-
source_name: str
|
| 27 |
-
title: str
|
| 28 |
-
url: str
|
| 29 |
-
published_at: Optional[str] = None
|
| 30 |
-
relevance_score: float = Field(ge=0.0, le=1.0)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
class ContradictingEvidence(BaseModel):
|
| 34 |
-
source_name: str
|
| 35 |
-
title: str
|
| 36 |
-
url: str
|
| 37 |
-
type: str = "fact_check"
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
class TruthOverride(BaseModel):
|
| 41 |
-
applied: bool = False
|
| 42 |
-
source_url: str = ""
|
| 43 |
-
source_name: str = ""
|
| 44 |
-
similarity: float = 0.0
|
| 45 |
-
fake_prob_before: float = 0.0
|
| 46 |
-
fake_prob_after: float = 0.0
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
class ExifSummary(BaseModel):
|
| 50 |
-
make: Optional[str] = None
|
| 51 |
-
model: Optional[str] = None
|
| 52 |
-
datetime_original: Optional[str] = None
|
| 53 |
-
gps_info: Optional[str] = None
|
| 54 |
-
software: Optional[str] = None
|
| 55 |
-
lens_model: Optional[str] = None
|
| 56 |
-
trust_adjustment: int = 0 # negative = more real, positive = more fake
|
| 57 |
-
trust_reason: str = ""
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
class LLMExplainabilitySummary(BaseModel):
|
| 61 |
-
paragraph: str = ""
|
| 62 |
-
bullets: List[str] = []
|
| 63 |
-
model_used: str = ""
|
| 64 |
-
cached: bool = False
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
class VLMComponentScore(BaseModel):
|
| 68 |
-
score: int = Field(ge=0, le=100, default=75)
|
| 69 |
-
notes: str = ""
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
class VLMBreakdown(BaseModel):
|
| 73 |
-
facial_symmetry: VLMComponentScore = VLMComponentScore()
|
| 74 |
-
skin_texture: VLMComponentScore = VLMComponentScore()
|
| 75 |
-
lighting_consistency: VLMComponentScore = VLMComponentScore()
|
| 76 |
-
background_coherence: VLMComponentScore = VLMComponentScore()
|
| 77 |
-
anatomy_hands_eyes: VLMComponentScore = VLMComponentScore()
|
| 78 |
-
context_objects: VLMComponentScore = VLMComponentScore()
|
| 79 |
-
model_used: str = ""
|
| 80 |
-
cached: bool = False
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
class ProcessingSummary(BaseModel):
|
| 84 |
-
model_config = ConfigDict(protected_namespaces=())
|
| 85 |
-
|
| 86 |
-
stages_completed: List[str]
|
| 87 |
-
total_duration_ms: int
|
| 88 |
-
model_used: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
CHANGED
|
@@ -20,6 +20,7 @@ class Settings(BaseSettings):
|
|
| 20 |
|
| 21 |
# AI Models
|
| 22 |
IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model"
|
|
|
|
| 23 |
TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect"
|
| 24 |
# Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID.
|
| 25 |
TEXT_MULTILANG_MODEL_ID: str = ""
|
|
@@ -37,15 +38,47 @@ class Settings(BaseSettings):
|
|
| 37 |
REPORT_DIR: str = "./temp_reports"
|
| 38 |
REPORT_TTL_SECONDS: int = 3600 # 1h expiry
|
| 39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
# LLM Explainability (Phase 12)
|
| 41 |
LLM_PROVIDER: str = "gemini" # "gemini" | "openai"
|
| 42 |
LLM_API_KEY: str = ""
|
| 43 |
-
LLM_MODEL: str = "gemini-2.5-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# EfficientNet (ICPR2020 / DeepShield1 merge)
|
| 46 |
EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4"
|
| 47 |
EFFICIENTNET_TRAIN_DB: str = "DFDC"
|
| 48 |
ENSEMBLE_MODE: bool = True # run both ViT + EfficientNet and average scores
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
VIDEO_SAMPLE_FRAMES: int = 16 # frames to sample per video for inference
|
| 50 |
EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled
|
| 51 |
|
|
|
|
| 20 |
|
| 21 |
# AI Models
|
| 22 |
IMAGE_MODEL_ID: str = "prithivMLmods/Deep-Fake-Detector-v2-Model"
|
| 23 |
+
GENERAL_IMAGE_MODEL_ID: str = "umm-maybe/AI-image-detector"
|
| 24 |
TEXT_MODEL_ID: str = "jy46604790/Fake-News-Bert-Detect"
|
| 25 |
# Multilingual text model for non-English (Hindi etc.). Leave empty to fall back to TEXT_MODEL_ID.
|
| 26 |
TEXT_MULTILANG_MODEL_ID: str = ""
|
|
|
|
| 38 |
REPORT_DIR: str = "./temp_reports"
|
| 39 |
REPORT_TTL_SECONDS: int = 3600 # 1h expiry
|
| 40 |
|
| 41 |
+
# Phase 19 — dedup cache + object storage
|
| 42 |
+
CACHE_TTL_DAYS: int = 30
|
| 43 |
+
MEDIA_ROOT: str = "./media"
|
| 44 |
+
|
| 45 |
# LLM Explainability (Phase 12)
|
| 46 |
LLM_PROVIDER: str = "gemini" # "gemini" | "openai"
|
| 47 |
LLM_API_KEY: str = ""
|
| 48 |
+
LLM_MODEL: str = "gemini-2.5-flash" # flash is ~12x cheaper + larger free-tier quota than pro. Use "gemini-2.5-pro" for harder reasoning.
|
| 49 |
+
|
| 50 |
+
# LLM fallback — Groq (Llama 3.3 70B by default). Used automatically when the
|
| 51 |
+
# primary provider returns 429/quota exceeded. Leave empty to disable fallback.
|
| 52 |
+
GROQ_API_KEY: str = ""
|
| 53 |
+
GROQ_MODEL: str = "llama-3.3-70b-versatile"
|
| 54 |
|
| 55 |
# EfficientNet (ICPR2020 / DeepShield1 merge)
|
| 56 |
EFFICIENTNET_MODEL: str = "EfficientNetAutoAttB4"
|
| 57 |
EFFICIENTNET_TRAIN_DB: str = "DFDC"
|
| 58 |
ENSEMBLE_MODE: bool = True # run both ViT + EfficientNet and average scores
|
| 59 |
+
|
| 60 |
+
# Phase 11.3: FFPP-fine-tuned ViT. Path is resolved relative to the repo root.
|
| 61 |
+
# The checkpoint lives at <repo_root>/trained_models/ (the `trained_models/` dir
|
| 62 |
+
# at the project root, alongside `backend/` and `frontend/`).
|
| 63 |
+
FFPP_MODEL_PATH: str = "trained_models"
|
| 64 |
+
# Optional: pull FFPP checkpoint from Hugging Face Hub when local checkpoint
|
| 65 |
+
# is missing (keeps large model files out of GitHub source repo).
|
| 66 |
+
FFPP_MODEL_REPO_ID: str = ""
|
| 67 |
+
FFPP_MODEL_REVISION: str = "main"
|
| 68 |
+
FFPP_BASE_PROCESSOR_ID: str = "google/vit-base-patch16-224-in21k"
|
| 69 |
+
FFPP_ENABLED: bool = True
|
| 70 |
+
# Ensemble weights — FFPP is trained on a better (face-specific FFPP c40) dataset
|
| 71 |
+
# and is weighted more heavily when a face is present. When no face is detected,
|
| 72 |
+
# we still blend it but lean on the generic ViT since FFPP only saw face crops.
|
| 73 |
+
FFPP_WEIGHT_FACE: float = 0.55 # face-present ensemble weight
|
| 74 |
+
VIT_WEIGHT_FACE: float = 0.20
|
| 75 |
+
EFFNET_WEIGHT_FACE: float = 0.25
|
| 76 |
+
FFPP_WEIGHT_NOFACE: float = 0.35 # no-face ensemble weight
|
| 77 |
+
VIT_WEIGHT_NOFACE: float = 0.65
|
| 78 |
+
NOFACE_GENERAL_WEIGHT: float = 0.60
|
| 79 |
+
NOFACE_FORENSICS_WEIGHT: float = 0.20
|
| 80 |
+
NOFACE_EXIF_WEIGHT: float = 0.10
|
| 81 |
+
NOFACE_VLM_WEIGHT: float = 0.10
|
| 82 |
VIDEO_SAMPLE_FRAMES: int = 16 # frames to sample per video for inference
|
| 83 |
EXIFTOOL_PATH: str = "" # full path to ExifTool binary; empty = metadata write disabled
|
| 84 |
|
database.py
DELETED
|
@@ -1,30 +0,0 @@
|
|
| 1 |
-
from sqlalchemy import create_engine
|
| 2 |
-
from sqlalchemy.orm import DeclarativeBase, sessionmaker
|
| 3 |
-
|
| 4 |
-
from config import settings
|
| 5 |
-
|
| 6 |
-
engine = create_engine(
|
| 7 |
-
settings.DATABASE_URL,
|
| 8 |
-
connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {},
|
| 9 |
-
pool_pre_ping=True,
|
| 10 |
-
pool_recycle=300,
|
| 11 |
-
)
|
| 12 |
-
|
| 13 |
-
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
class Base(DeclarativeBase):
|
| 17 |
-
pass
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def get_db():
|
| 21 |
-
db = SessionLocal()
|
| 22 |
-
try:
|
| 23 |
-
yield db
|
| 24 |
-
finally:
|
| 25 |
-
db.close()
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
def init_db():
|
| 29 |
-
from db import models # noqa: F401
|
| 30 |
-
Base.metadata.create_all(bind=engine)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/__init__.py
DELETED
|
File without changes
|
datasets/build_manifest.py
DELETED
|
@@ -1,93 +0,0 @@
|
|
| 1 |
-
"""Build a unified train/val/test manifest (70/15/15) across all dataset buckets.
|
| 2 |
-
|
| 3 |
-
Expected input layout (produced by the other scripts in this package):
|
| 4 |
-
|
| 5 |
-
data_root/
|
| 6 |
-
real/
|
| 7 |
-
ffpp_youtube/*.jpg # frames from FFPP original_sequences
|
| 8 |
-
ffhq/*.jpg # FFHQ thumbnails
|
| 9 |
-
|
| 10 |
-
fake/
|
| 11 |
-
ffpp_deepfakes/*.jpg
|
| 12 |
-
ffpp_face2face/*.jpg
|
| 13 |
-
ffpp_faceswap/*.jpg
|
| 14 |
-
ffpp_neuraltextures/*.jpg
|
| 15 |
-
ffpp_faceshifter/*.jpg
|
| 16 |
-
dfdc/*.jpg
|
| 17 |
-
|
| 18 |
-
The manifest is stratified by (label, source) so FFHQ stays represented
|
| 19 |
-
in val/test.
|
| 20 |
-
|
| 21 |
-
Usage:
|
| 22 |
-
python -m backend.training.datasets.build_manifest \
|
| 23 |
-
--data ./data --out ./data/manifest.csv --seed 42
|
| 24 |
-
"""
|
| 25 |
-
from __future__ import annotations
|
| 26 |
-
|
| 27 |
-
import argparse
|
| 28 |
-
import csv
|
| 29 |
-
import random
|
| 30 |
-
from collections import defaultdict
|
| 31 |
-
from pathlib import Path
|
| 32 |
-
|
| 33 |
-
IMG_EXTS = {".jpg", ".jpeg", ".png"}
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def collect(data_root: Path) -> list[tuple[str, str, str]]:
|
| 37 |
-
rows: list[tuple[str, str, str]] = []
|
| 38 |
-
for label in ("real", "fake"):
|
| 39 |
-
label_root = data_root / label
|
| 40 |
-
if not label_root.exists():
|
| 41 |
-
continue
|
| 42 |
-
for source_dir in sorted(p for p in label_root.iterdir() if p.is_dir()):
|
| 43 |
-
for img in source_dir.rglob("*"):
|
| 44 |
-
if img.suffix.lower() in IMG_EXTS and img.is_file():
|
| 45 |
-
rows.append((str(img.resolve()), label, source_dir.name))
|
| 46 |
-
return rows
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def split(rows: list[tuple[str, str, str]], seed: int) -> dict[str, list[tuple[str, str, str]]]:
|
| 50 |
-
buckets: dict[tuple[str, str], list[tuple[str, str, str]]] = defaultdict(list)
|
| 51 |
-
for r in rows:
|
| 52 |
-
buckets[(r[1], r[2])].append(r)
|
| 53 |
-
|
| 54 |
-
rng = random.Random(seed)
|
| 55 |
-
out = {"train": [], "val": [], "test": []}
|
| 56 |
-
for key, items in buckets.items():
|
| 57 |
-
rng.shuffle(items)
|
| 58 |
-
n = len(items)
|
| 59 |
-
n_train = int(0.70 * n)
|
| 60 |
-
n_val = int(0.15 * n)
|
| 61 |
-
out["train"].extend(items[:n_train])
|
| 62 |
-
out["val"].extend(items[n_train : n_train + n_val])
|
| 63 |
-
out["test"].extend(items[n_train + n_val :])
|
| 64 |
-
return out
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
def main() -> None:
|
| 68 |
-
ap = argparse.ArgumentParser()
|
| 69 |
-
ap.add_argument("--data", required=True, type=Path)
|
| 70 |
-
ap.add_argument("--out", required=True, type=Path)
|
| 71 |
-
ap.add_argument("--seed", type=int, default=42)
|
| 72 |
-
args = ap.parse_args()
|
| 73 |
-
|
| 74 |
-
rows = collect(args.data)
|
| 75 |
-
if not rows:
|
| 76 |
-
raise SystemExit(f"No images found under {args.data}")
|
| 77 |
-
|
| 78 |
-
splits = split(rows, args.seed)
|
| 79 |
-
args.out.parent.mkdir(parents=True, exist_ok=True)
|
| 80 |
-
with args.out.open("w", newline="", encoding="utf-8") as f:
|
| 81 |
-
w = csv.writer(f)
|
| 82 |
-
w.writerow(["path", "label", "source", "split"])
|
| 83 |
-
for name, items in splits.items():
|
| 84 |
-
for path, label, source in items:
|
| 85 |
-
w.writerow([path, label, source, name])
|
| 86 |
-
|
| 87 |
-
summary = {k: len(v) for k, v in splits.items()}
|
| 88 |
-
print(f"Manifest: {args.out}")
|
| 89 |
-
print(f"Totals: {summary} (overall {sum(summary.values())})")
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
if __name__ == "__main__":
|
| 93 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/download_dfdc_sample.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
"""Download a sample of the DFDC (Deepfake Detection Challenge) Preview dataset.
|
| 2 |
-
|
| 3 |
-
The full DFDC is ~470GB; the *preview* release (~5GB, Kaggle) is enough for
|
| 4 |
-
diversity augmentation alongside FFPP.
|
| 5 |
-
|
| 6 |
-
Requires the Kaggle CLI (`pip install kaggle`) and ~/.kaggle/kaggle.json.
|
| 7 |
-
|
| 8 |
-
Usage:
|
| 9 |
-
python -m backend.training.datasets.download_dfdc_sample --output ./data/dfdc_preview
|
| 10 |
-
"""
|
| 11 |
-
from __future__ import annotations
|
| 12 |
-
|
| 13 |
-
import argparse
|
| 14 |
-
import shutil
|
| 15 |
-
import subprocess
|
| 16 |
-
import sys
|
| 17 |
-
from pathlib import Path
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def main() -> None:
|
| 21 |
-
ap = argparse.ArgumentParser()
|
| 22 |
-
ap.add_argument("--output", required=True, type=Path)
|
| 23 |
-
ap.add_argument(
|
| 24 |
-
"--competition",
|
| 25 |
-
default="deepfake-detection-challenge",
|
| 26 |
-
help="Kaggle competition slug (default: deepfake-detection-challenge preview).",
|
| 27 |
-
)
|
| 28 |
-
args = ap.parse_args()
|
| 29 |
-
|
| 30 |
-
kaggle = shutil.which("kaggle")
|
| 31 |
-
if kaggle is None:
|
| 32 |
-
print("Kaggle CLI not found. Install with: pip install kaggle", file=sys.stderr)
|
| 33 |
-
print("Then place kaggle.json in ~/.kaggle/ (chmod 600).", file=sys.stderr)
|
| 34 |
-
sys.exit(2)
|
| 35 |
-
|
| 36 |
-
args.output.mkdir(parents=True, exist_ok=True)
|
| 37 |
-
cmd = [kaggle, "competitions", "download", "-c", args.competition, "-p", str(args.output)]
|
| 38 |
-
print("Running:", " ".join(cmd))
|
| 39 |
-
subprocess.run(cmd, check=True)
|
| 40 |
-
print(f"Downloaded to {args.output}. Unzip with: unzip *.zip")
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
if __name__ == "__main__":
|
| 44 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/download_ffhq.py
DELETED
|
@@ -1,49 +0,0 @@
|
|
| 1 |
-
"""Download the FFHQ 128x128 thumbnail subset from the official Google Drive mirror.
|
| 2 |
-
|
| 3 |
-
Pulls up to N images (default 10k) into the `real` bucket of the training set.
|
| 4 |
-
Falls back to the NVlabs 'ffhq-dataset' helper if available; otherwise expects
|
| 5 |
-
user to run the manual download once.
|
| 6 |
-
|
| 7 |
-
Usage:
|
| 8 |
-
python -m backend.training.datasets.download_ffhq --output ./data/real/ffhq -n 10000
|
| 9 |
-
"""
|
| 10 |
-
from __future__ import annotations
|
| 11 |
-
|
| 12 |
-
import argparse
|
| 13 |
-
import shutil
|
| 14 |
-
import subprocess
|
| 15 |
-
import sys
|
| 16 |
-
from pathlib import Path
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def try_nvlabs_helper(output: Path, num: int) -> bool:
|
| 20 |
-
"""Prefer the official ffhq-dataset downloader if installed."""
|
| 21 |
-
helper = shutil.which("ffhq-dataset")
|
| 22 |
-
if helper is None:
|
| 23 |
-
return False
|
| 24 |
-
cmd = [helper, "--json", "ffhq-dataset-v2.json", "--thumbs", "--num_threads", "4"]
|
| 25 |
-
print("Running:", " ".join(cmd))
|
| 26 |
-
subprocess.run(cmd, cwd=output, check=False)
|
| 27 |
-
return True
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def main() -> None:
|
| 31 |
-
ap = argparse.ArgumentParser()
|
| 32 |
-
ap.add_argument("--output", required=True, type=Path)
|
| 33 |
-
ap.add_argument("-n", "--num", type=int, default=10000)
|
| 34 |
-
args = ap.parse_args()
|
| 35 |
-
args.output.mkdir(parents=True, exist_ok=True)
|
| 36 |
-
|
| 37 |
-
if try_nvlabs_helper(args.output, args.num):
|
| 38 |
-
return
|
| 39 |
-
|
| 40 |
-
print("[!] `ffhq-dataset` helper not installed.")
|
| 41 |
-
print(" Install via: pip install ffhq-dataset (requires gdown)")
|
| 42 |
-
print(" Or download thumbnails128x128.zip manually from:")
|
| 43 |
-
print(" https://github.com/NVlabs/ffhq-dataset")
|
| 44 |
-
print(f" Extract into: {args.output}")
|
| 45 |
-
sys.exit(1)
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
if __name__ == "__main__":
|
| 49 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/extract_frames.py
DELETED
|
@@ -1,90 +0,0 @@
|
|
| 1 |
-
"""Convert FFPP / DFDC videos -> 16 sampled frames at 224x224 RGB.
|
| 2 |
-
|
| 3 |
-
Usage:
|
| 4 |
-
python -m backend.training.datasets.extract_frames \
|
| 5 |
-
--input ./ffpp_data/original_sequences/youtube/raw/videos \
|
| 6 |
-
--output ./ffpp_data/frames/real \
|
| 7 |
-
--label real --frames 16 --size 224
|
| 8 |
-
"""
|
| 9 |
-
from __future__ import annotations
|
| 10 |
-
|
| 11 |
-
import argparse
|
| 12 |
-
import csv
|
| 13 |
-
from pathlib import Path
|
| 14 |
-
|
| 15 |
-
import cv2
|
| 16 |
-
import numpy as np
|
| 17 |
-
from tqdm import tqdm
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def sample_frame_indices(total: int, n: int) -> list[int]:
|
| 21 |
-
if total <= 0:
|
| 22 |
-
return []
|
| 23 |
-
if total <= n:
|
| 24 |
-
return list(range(total))
|
| 25 |
-
step = total / float(n)
|
| 26 |
-
return [min(total - 1, int(step * i + step / 2)) for i in range(n)]
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def extract_from_video(path: Path, out_dir: Path, n: int, size: int) -> int:
|
| 30 |
-
cap = cv2.VideoCapture(str(path))
|
| 31 |
-
if not cap.isOpened():
|
| 32 |
-
return 0
|
| 33 |
-
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 34 |
-
indices = set(sample_frame_indices(total, n))
|
| 35 |
-
out_dir.mkdir(parents=True, exist_ok=True)
|
| 36 |
-
|
| 37 |
-
saved = 0
|
| 38 |
-
i = 0
|
| 39 |
-
while True:
|
| 40 |
-
ok, frame = cap.read()
|
| 41 |
-
if not ok:
|
| 42 |
-
break
|
| 43 |
-
if i in indices:
|
| 44 |
-
frame = cv2.resize(frame, (size, size), interpolation=cv2.INTER_AREA)
|
| 45 |
-
cv2.imwrite(str(out_dir / f"{path.stem}_f{i:06d}.jpg"), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
| 46 |
-
saved += 1
|
| 47 |
-
i += 1
|
| 48 |
-
cap.release()
|
| 49 |
-
return saved
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
def main() -> None:
|
| 53 |
-
ap = argparse.ArgumentParser(description="Sample N frames per video and resize.")
|
| 54 |
-
ap.add_argument("--input", required=True, type=Path, help="Directory of .mp4 videos (recursive).")
|
| 55 |
-
ap.add_argument("--output", required=True, type=Path, help="Directory to write .jpg frames.")
|
| 56 |
-
ap.add_argument("--label", required=True, choices=["real", "fake"], help="Label tag for manifest.")
|
| 57 |
-
ap.add_argument("--frames", type=int, default=16)
|
| 58 |
-
ap.add_argument("--size", type=int, default=224)
|
| 59 |
-
ap.add_argument("--manifest", type=Path, default=None, help="Optional CSV manifest append path.")
|
| 60 |
-
args = ap.parse_args()
|
| 61 |
-
|
| 62 |
-
videos = [p for p in args.input.rglob("*.mp4")]
|
| 63 |
-
if not videos:
|
| 64 |
-
print(f"No .mp4 found under {args.input}")
|
| 65 |
-
return
|
| 66 |
-
|
| 67 |
-
rows: list[tuple[str, str, str]] = []
|
| 68 |
-
total_frames = 0
|
| 69 |
-
for vid in tqdm(videos, desc=f"extract[{args.label}]"):
|
| 70 |
-
rel_out = args.output / vid.stem
|
| 71 |
-
saved = extract_from_video(vid, rel_out, args.frames, args.size)
|
| 72 |
-
total_frames += saved
|
| 73 |
-
if args.manifest is not None:
|
| 74 |
-
for jpg in rel_out.glob("*.jpg"):
|
| 75 |
-
rows.append((str(jpg), args.label, vid.stem))
|
| 76 |
-
|
| 77 |
-
if args.manifest is not None and rows:
|
| 78 |
-
args.manifest.parent.mkdir(parents=True, exist_ok=True)
|
| 79 |
-
new_file = not args.manifest.exists()
|
| 80 |
-
with args.manifest.open("a", newline="", encoding="utf-8") as f:
|
| 81 |
-
w = csv.writer(f)
|
| 82 |
-
if new_file:
|
| 83 |
-
w.writerow(["path", "label", "source_video"])
|
| 84 |
-
w.writerows(rows)
|
| 85 |
-
|
| 86 |
-
print(f"Done. Videos: {len(videos)}, frames written: {total_frames}")
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
if __name__ == "__main__":
|
| 90 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/procure_all.ps1
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
# Phase 11.1 orchestrator for Windows (PowerShell)
|
| 2 |
-
$ErrorActionPreference = "Stop"
|
| 3 |
-
|
| 4 |
-
$ROOT = if ($env:ROOT) { $env:ROOT } else { ".\data" }
|
| 5 |
-
$FFPP = if ($env:FFPP) { $env:FFPP } else { ".\ffpp_data" }
|
| 6 |
-
|
| 7 |
-
New-Item -ItemType Directory -Force -Path "$ROOT\real" | Out-Null
|
| 8 |
-
New-Item -ItemType Directory -Force -Path "$ROOT\fake" | Out-Null
|
| 9 |
-
New-Item -ItemType Directory -Force -Path $FFPP | Out-Null
|
| 10 |
-
|
| 11 |
-
Write-Host "1. FaceForensics++ (highly compressed c40, 10 videos only) -- requires TOS keypress"
|
| 12 |
-
python backend\scripts\download_ffpp.py $FFPP -d all -c c40 -t videos -n 10
|
| 13 |
-
|
| 14 |
-
Write-Host "2. Frame extraction: real (original youtube)"
|
| 15 |
-
python -m backend.training.datasets.extract_frames `
|
| 16 |
-
--input "$FFPP\original_sequences\youtube\c40\videos" `
|
| 17 |
-
--output "$ROOT\real\ffpp_youtube" --label real --frames 4 --size 224
|
| 18 |
-
|
| 19 |
-
Write-Host "3. Frame extraction: fakes (each manipulation family)"
|
| 20 |
-
$Families = @("Deepfakes", "Face2Face", "FaceSwap", "NeuralTextures", "FaceShifter")
|
| 21 |
-
foreach ($fam in $Families) {
|
| 22 |
-
$famLower = $fam.ToLower()
|
| 23 |
-
python -m backend.training.datasets.extract_frames `
|
| 24 |
-
--input "$FFPP\manipulated_sequences\$fam\c40\videos" `
|
| 25 |
-
--output "$ROOT\fake\ffpp_$famLower" --label fake --frames 4 --size 224
|
| 26 |
-
}
|
| 27 |
-
|
| 28 |
-
Write-Host "4. FFHQ thumbnails (real - limited to 100 items)"
|
| 29 |
-
python -m backend.training.datasets.download_ffhq --output "$ROOT\real\ffhq" -n 100
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
Write-Host "6. DFDC preview sample (fake+real)"
|
| 33 |
-
python -m backend.training.datasets.download_dfdc_sample --output "$ROOT\_dfdc_raw"
|
| 34 |
-
Write-Host "NOTE: You will need to manually unzip + sort DFDC into $ROOT\fake\dfdc and $ROOT\real\dfdc"
|
| 35 |
-
|
| 36 |
-
Write-Host "7. Build manifest"
|
| 37 |
-
python -m backend.training.datasets.build_manifest `
|
| 38 |
-
--data $ROOT --out "$ROOT\manifest.csv" --seed 42
|
| 39 |
-
|
| 40 |
-
Write-Host "Phase 11.1 complete. See $ROOT\manifest.csv"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
datasets/procure_all.sh
DELETED
|
@@ -1,37 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env bash
|
| 2 |
-
# Phase 11.1 orchestrator: download + frame-extract + manifest.
|
| 3 |
-
# Total disk target: ~120k labeled images. Expect 60-80GB intermediate, ~30GB frames.
|
| 4 |
-
|
| 5 |
-
set -euo pipefail
|
| 6 |
-
|
| 7 |
-
ROOT="${ROOT:-./data}"
|
| 8 |
-
FFPP="${FFPP:-./ffpp_data}"
|
| 9 |
-
mkdir -p "$ROOT/real" "$ROOT/fake" "$FFPP"
|
| 10 |
-
|
| 11 |
-
# 1. FaceForensics++ (raw, videos) -- requires TOS keypress
|
| 12 |
-
python backend/scripts/download_ffpp.py "$FFPP" -d all -c raw -t videos
|
| 13 |
-
|
| 14 |
-
# 2. Frame extraction: real (original youtube)
|
| 15 |
-
python -m backend.training.datasets.extract_frames \
|
| 16 |
-
--input "$FFPP/original_sequences/youtube/raw/videos" \
|
| 17 |
-
--output "$ROOT/real/ffpp_youtube" --label real --frames 16 --size 224
|
| 18 |
-
|
| 19 |
-
# 3. Frame extraction: fakes (each manipulation family)
|
| 20 |
-
for fam in Deepfakes Face2Face FaceSwap NeuralTextures FaceShifter; do
|
| 21 |
-
python -m backend.training.datasets.extract_frames \
|
| 22 |
-
--input "$FFPP/manipulated_sequences/$fam/raw/videos" \
|
| 23 |
-
--output "$ROOT/fake/ffpp_${fam,,}" --label fake --frames 16 --size 224
|
| 24 |
-
done
|
| 25 |
-
|
| 26 |
-
# 4. FFHQ thumbnails (real)
|
| 27 |
-
python -m backend.training.datasets.download_ffhq --output "$ROOT/real/ffhq" -n 10000
|
| 28 |
-
|
| 29 |
-
# 6. DFDC preview sample (fake+real) -- needs Kaggle creds
|
| 30 |
-
python -m backend.training.datasets.download_dfdc_sample --output "$ROOT/_dfdc_raw"
|
| 31 |
-
# NOTE: unzip + sort into $ROOT/fake/dfdc and $ROOT/real/dfdc per DFDC metadata.json
|
| 32 |
-
|
| 33 |
-
# 7. Build manifest
|
| 34 |
-
python -m backend.training.datasets.build_manifest \
|
| 35 |
-
--data "$ROOT" --out "$ROOT/manifest.csv" --seed 42
|
| 36 |
-
|
| 37 |
-
echo "Phase 11.1 complete. See $ROOT/manifest.csv"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
db/database.py
CHANGED
|
@@ -1,28 +1,26 @@
|
|
| 1 |
-
from sqlalchemy import create_engine
|
| 2 |
from sqlalchemy.orm import DeclarativeBase, sessionmaker
|
| 3 |
|
| 4 |
from config import settings
|
| 5 |
|
| 6 |
-
_is_postgres = not settings.DATABASE_URL.startswith("sqlite")
|
| 7 |
-
|
| 8 |
engine = create_engine(
|
| 9 |
settings.DATABASE_URL,
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
# pool_pre_ping=True: test each connection before use and transparently
|
| 14 |
-
# reconnect if the server closed it — eliminates "SSL connection has been
|
| 15 |
-
# closed unexpectedly" 500s.
|
| 16 |
-
pool_pre_ping=_is_postgres,
|
| 17 |
-
# Recycle connections every 5 min so we never hold a connection past Neon's
|
| 18 |
-
# idle timeout (~5–10 min depending on plan).
|
| 19 |
-
pool_recycle=300 if _is_postgres else -1,
|
| 20 |
-
# Keep pool small — HF free tier is single-process; Neon free tier has a
|
| 21 |
-
# max-connection limit.
|
| 22 |
-
pool_size=5 if _is_postgres else 5,
|
| 23 |
-
max_overflow=2 if _is_postgres else 10,
|
| 24 |
)
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 27 |
|
| 28 |
|
|
@@ -40,5 +38,31 @@ def get_db():
|
|
| 40 |
|
| 41 |
def init_db():
|
| 42 |
from db import models # noqa: F401
|
|
|
|
|
|
|
| 43 |
Base.metadata.create_all(bind=engine)
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine, event
|
| 2 |
from sqlalchemy.orm import DeclarativeBase, sessionmaker
|
| 3 |
|
| 4 |
from config import settings
|
| 5 |
|
|
|
|
|
|
|
| 6 |
engine = create_engine(
|
| 7 |
settings.DATABASE_URL,
|
| 8 |
+
connect_args={"check_same_thread": False} if settings.DATABASE_URL.startswith("sqlite") else {},
|
| 9 |
+
pool_pre_ping=True,
|
| 10 |
+
pool_recycle=300,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
)
|
| 12 |
|
| 13 |
+
|
| 14 |
+
if settings.DATABASE_URL.startswith("sqlite"):
|
| 15 |
+
@event.listens_for(engine, "connect")
|
| 16 |
+
def _sqlite_on_connect(dbapi_conn, _):
|
| 17 |
+
# Enforce FK constraints (needed for ON DELETE SET NULL) + WAL for better
|
| 18 |
+
# concurrent reads while a writer is active.
|
| 19 |
+
cur = dbapi_conn.cursor()
|
| 20 |
+
cur.execute("PRAGMA foreign_keys=ON")
|
| 21 |
+
cur.execute("PRAGMA journal_mode=WAL")
|
| 22 |
+
cur.close()
|
| 23 |
+
|
| 24 |
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 25 |
|
| 26 |
|
|
|
|
| 38 |
|
| 39 |
def init_db():
|
| 40 |
from db import models # noqa: F401
|
| 41 |
+
from sqlalchemy import inspect, text
|
| 42 |
+
|
| 43 |
Base.metadata.create_all(bind=engine)
|
| 44 |
|
| 45 |
+
# Phase 19.4 — lightweight in-place migration for new columns.
|
| 46 |
+
# Alembic is overkill here; just ALTER TABLE when a new column is missing.
|
| 47 |
+
insp = inspect(engine)
|
| 48 |
+
if "analyses" in insp.get_table_names():
|
| 49 |
+
existing = {c["name"] for c in insp.get_columns("analyses")}
|
| 50 |
+
additions = {
|
| 51 |
+
"media_hash": "VARCHAR(64)",
|
| 52 |
+
"media_path": "VARCHAR(512)",
|
| 53 |
+
"thumbnail_url": "VARCHAR(512)",
|
| 54 |
+
}
|
| 55 |
+
with engine.begin() as conn:
|
| 56 |
+
for col, ddl in additions.items():
|
| 57 |
+
if col not in existing:
|
| 58 |
+
conn.execute(text(f"ALTER TABLE analyses ADD COLUMN {col} {ddl}"))
|
| 59 |
+
# Indices (CREATE INDEX IF NOT EXISTS is SQLite+Postgres safe)
|
| 60 |
+
for ddl in (
|
| 61 |
+
"CREATE INDEX IF NOT EXISTS ix_analyses_media_hash ON analyses (media_hash)",
|
| 62 |
+
"CREATE INDEX IF NOT EXISTS ix_record_user_created ON analyses (user_id, created_at)",
|
| 63 |
+
"CREATE INDEX IF NOT EXISTS ix_report_analysis ON reports (analysis_id)",
|
| 64 |
+
):
|
| 65 |
+
try:
|
| 66 |
+
conn.execute(text(ddl))
|
| 67 |
+
except Exception: # noqa: BLE001
|
| 68 |
+
pass
|
db/models.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
from datetime import datetime
|
| 2 |
|
| 3 |
-
from sqlalchemy import DateTime, ForeignKey, Integer, String, Text
|
| 4 |
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
| 5 |
|
| 6 |
from db.database import Base
|
|
@@ -13,7 +13,7 @@ class User(Base):
|
|
| 13 |
email: Mapped[str] = mapped_column(String(255), unique=True, index=True, nullable=False)
|
| 14 |
password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
|
| 15 |
name: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
| 16 |
-
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.
|
| 17 |
|
| 18 |
analyses: Mapped[list["AnalysisRecord"]] = relationship(back_populates="user")
|
| 19 |
|
|
@@ -22,16 +22,26 @@ class AnalysisRecord(Base):
|
|
| 22 |
__tablename__ = "analyses"
|
| 23 |
|
| 24 |
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 25 |
-
user_id: Mapped[int | None] = mapped_column(
|
|
|
|
|
|
|
| 26 |
media_type: Mapped[str] = mapped_column(String(32), nullable=False) # image|video|text|screenshot
|
| 27 |
verdict: Mapped[str] = mapped_column(String(32), nullable=False)
|
| 28 |
authenticity_score: Mapped[float] = mapped_column(nullable=False)
|
| 29 |
result_json: Mapped[str] = mapped_column(Text, nullable=False)
|
| 30 |
-
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
user: Mapped["User | None"] = relationship(back_populates="analyses")
|
| 33 |
report: Mapped["Report | None"] = relationship(back_populates="analysis", uselist=False)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
class Report(Base):
|
| 37 |
__tablename__ = "reports"
|
|
@@ -39,7 +49,11 @@ class Report(Base):
|
|
| 39 |
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 40 |
analysis_id: Mapped[int] = mapped_column(ForeignKey("analyses.id"), nullable=False)
|
| 41 |
file_path: Mapped[str] = mapped_column(String(512), nullable=False)
|
| 42 |
-
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.
|
| 43 |
-
expires_at: Mapped[datetime | None] = mapped_column(DateTime, nullable=True)
|
| 44 |
|
| 45 |
analysis: Mapped["AnalysisRecord"] = relationship(back_populates="report")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from datetime import datetime, timezone
|
| 2 |
|
| 3 |
+
from sqlalchemy import DateTime, ForeignKey, Index, Integer, String, Text
|
| 4 |
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
| 5 |
|
| 6 |
from db.database import Base
|
|
|
|
| 13 |
email: Mapped[str] = mapped_column(String(255), unique=True, index=True, nullable=False)
|
| 14 |
password_hash: Mapped[str] = mapped_column(String(255), nullable=False)
|
| 15 |
name: Mapped[str | None] = mapped_column(String(255), nullable=True)
|
| 16 |
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
| 17 |
|
| 18 |
analyses: Mapped[list["AnalysisRecord"]] = relationship(back_populates="user")
|
| 19 |
|
|
|
|
| 22 |
__tablename__ = "analyses"
|
| 23 |
|
| 24 |
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 25 |
+
user_id: Mapped[int | None] = mapped_column(
|
| 26 |
+
ForeignKey("users.id", ondelete="SET NULL"), nullable=True,
|
| 27 |
+
)
|
| 28 |
media_type: Mapped[str] = mapped_column(String(32), nullable=False) # image|video|text|screenshot
|
| 29 |
verdict: Mapped[str] = mapped_column(String(32), nullable=False)
|
| 30 |
authenticity_score: Mapped[float] = mapped_column(nullable=False)
|
| 31 |
result_json: Mapped[str] = mapped_column(Text, nullable=False)
|
| 32 |
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
| 33 |
+
# Phase 19.1 / 19.2 — SHA-256 dedup + object storage
|
| 34 |
+
media_hash: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
|
| 35 |
+
media_path: Mapped[str | None] = mapped_column(String(512), nullable=True)
|
| 36 |
+
thumbnail_url: Mapped[str | None] = mapped_column(String(512), nullable=True)
|
| 37 |
|
| 38 |
user: Mapped["User | None"] = relationship(back_populates="analyses")
|
| 39 |
report: Mapped["Report | None"] = relationship(back_populates="analysis", uselist=False)
|
| 40 |
|
| 41 |
+
__table_args__ = (
|
| 42 |
+
Index("ix_record_user_created", "user_id", "created_at"),
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
|
| 46 |
class Report(Base):
|
| 47 |
__tablename__ = "reports"
|
|
|
|
| 49 |
id: Mapped[int] = mapped_column(Integer, primary_key=True, index=True)
|
| 50 |
analysis_id: Mapped[int] = mapped_column(ForeignKey("analyses.id"), nullable=False)
|
| 51 |
file_path: Mapped[str] = mapped_column(String(512), nullable=False)
|
| 52 |
+
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
| 53 |
+
expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
| 54 |
|
| 55 |
analysis: Mapped["AnalysisRecord"] = relationship(back_populates="report")
|
| 56 |
+
|
| 57 |
+
__table_args__ = (
|
| 58 |
+
Index("ix_report_analysis", "analysis_id"),
|
| 59 |
+
)
|
deepshield.db-shm
ADDED
|
Binary file (32.8 kB). View file
|
|
|
deepshield.db-wal
ADDED
|
Binary file (86.6 kB). View file
|
|
|
deepshield_13_5bcf1328.pdf
DELETED
|
@@ -1,148 +0,0 @@
|
|
| 1 |
-
%PDF-1.4
|
| 2 |
-
%���� ReportLab Generated PDF document (opensource)
|
| 3 |
-
1 0 obj
|
| 4 |
-
<<
|
| 5 |
-
/F1 2 0 R /F2 3 0 R /F3 5 0 R
|
| 6 |
-
>>
|
| 7 |
-
endobj
|
| 8 |
-
2 0 obj
|
| 9 |
-
<<
|
| 10 |
-
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
| 11 |
-
>>
|
| 12 |
-
endobj
|
| 13 |
-
3 0 obj
|
| 14 |
-
<<
|
| 15 |
-
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
| 16 |
-
>>
|
| 17 |
-
endobj
|
| 18 |
-
4 0 obj
|
| 19 |
-
<<
|
| 20 |
-
/Contents 18 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 17 0 R /Resources <<
|
| 21 |
-
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 22 |
-
>> /Rotate 0 /Trans <<
|
| 23 |
-
|
| 24 |
-
>>
|
| 25 |
-
/Type /Page
|
| 26 |
-
>>
|
| 27 |
-
endobj
|
| 28 |
-
5 0 obj
|
| 29 |
-
<<
|
| 30 |
-
/BaseFont /Symbol /Name /F3 /Subtype /Type1 /Type /Font
|
| 31 |
-
>>
|
| 32 |
-
endobj
|
| 33 |
-
6 0 obj
|
| 34 |
-
<<
|
| 35 |
-
/Contents 19 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 17 0 R /Resources <<
|
| 36 |
-
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 37 |
-
>> /Rotate 0 /Trans <<
|
| 38 |
-
|
| 39 |
-
>>
|
| 40 |
-
/Type /Page
|
| 41 |
-
>>
|
| 42 |
-
endobj
|
| 43 |
-
7 0 obj
|
| 44 |
-
<<
|
| 45 |
-
/Outlines 9 0 R /PageMode /UseNone /Pages 17 0 R /Type /Catalog
|
| 46 |
-
>>
|
| 47 |
-
endobj
|
| 48 |
-
8 0 obj
|
| 49 |
-
<<
|
| 50 |
-
/Author () /CreationDate (D:20260415181653+05'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260415181653+05'00') /Producer (xhtml2pdf <https://github.com/xhtml2pdf/xhtml2pdf/>)
|
| 51 |
-
/Subject () /Title (DeepShield Analysis Report \204 7771f496-45b1-4c97-8a1a-d9d2492ca67d) /Trapped /False
|
| 52 |
-
>>
|
| 53 |
-
endobj
|
| 54 |
-
9 0 obj
|
| 55 |
-
<<
|
| 56 |
-
/Count 3 /First 10 0 R /Last 10 0 R /Type /Outlines
|
| 57 |
-
>>
|
| 58 |
-
endobj
|
| 59 |
-
10 0 obj
|
| 60 |
-
<<
|
| 61 |
-
/Count -4 /Dest [ 4 0 R /Fit ] /First 11 0 R /Last 16 0 R /Parent 9 0 R /Title (DeepShield Analysis Report)
|
| 62 |
-
>>
|
| 63 |
-
endobj
|
| 64 |
-
11 0 obj
|
| 65 |
-
<<
|
| 66 |
-
/Dest [ 4 0 R /Fit ] /Next 12 0 R /Parent 10 0 R /Title (Verdict)
|
| 67 |
-
>>
|
| 68 |
-
endobj
|
| 69 |
-
12 0 obj
|
| 70 |
-
<<
|
| 71 |
-
/Count -2 /Dest [ 4 0 R /Fit ] /First 13 0 R /Last 14 0 R /Next 15 0 R /Parent 10 0 R
|
| 72 |
-
/Prev 11 0 R /Title (Text Classification)
|
| 73 |
-
>>
|
| 74 |
-
endobj
|
| 75 |
-
13 0 obj
|
| 76 |
-
<<
|
| 77 |
-
/Dest [ 4 0 R /Fit ] /Next 14 0 R /Parent 12 0 R /Title (Sensationalism Signals)
|
| 78 |
-
>>
|
| 79 |
-
endobj
|
| 80 |
-
14 0 obj
|
| 81 |
-
<<
|
| 82 |
-
/Dest [ 4 0 R /Fit ] /Parent 12 0 R /Prev 13 0 R /Title (Extracted Keywords)
|
| 83 |
-
>>
|
| 84 |
-
endobj
|
| 85 |
-
15 0 obj
|
| 86 |
-
<<
|
| 87 |
-
/Dest [ 4 0 R /Fit ] /Next 16 0 R /Parent 10 0 R /Prev 12 0 R /Title (Trusted Source Cross-Reference \(1\))
|
| 88 |
-
>>
|
| 89 |
-
endobj
|
| 90 |
-
16 0 obj
|
| 91 |
-
<<
|
| 92 |
-
/Dest [ 6 0 R /Fit ] /Parent 10 0 R /Prev 15 0 R /Title (Processing Summary)
|
| 93 |
-
>>
|
| 94 |
-
endobj
|
| 95 |
-
17 0 obj
|
| 96 |
-
<<
|
| 97 |
-
/Count 2 /Kids [ 4 0 R 6 0 R ] /Type /Pages
|
| 98 |
-
>>
|
| 99 |
-
endobj
|
| 100 |
-
18 0 obj
|
| 101 |
-
<<
|
| 102 |
-
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1750
|
| 103 |
-
>>
|
| 104 |
-
stream
|
| 105 |
-
Gb"/(9lo&I&A@sBlm4G[Acr2Y4p^$ca2t\gAsuiHo\c,I9gURE8lSA3M>qu?,XkR;()9nE&%0G$"Ts\%gUFdJ0E[3iXSb#I!k]Slq-+&^_fu5V&-:f'>`[5155TjpXI_!]U"iQd1qrcX0jNK021sk.K_S`f[kfkaR[pr2$LLU)UX&`3>7R17rJ3t':B_<4Kk*Grr8\a:5/Z<<[I]mbfHq28c@Y+3O)t)0k@mu0K^fiq^N*(u.%T.'jl<s/Nh4He2l7^V7l^6+r/e]g]la.!>S?L^o+>>SgBV8H:sX>5A0-l`)&\h4Lk6L5I=)ArV#_bh%^>M_c,"jSErfH[2A&CfKtLn_&K3h)!u;:i'6.H*(apE@/QWkIgF*OaTZ"ZT=me'_?iN-hL[(uHeb"'/B!\/7d068ieW>Y3P8NcsU#;"%eOe_!^-"Xsc?9a'H,u4"nMEm$3F[>c1S8J!`Sh;Ye8pG>de>ac3KpI*&j-(`*[@OB&i#OgJSl=(I-'<c@@S(D;k%W_$;Jl?$^4Y-G*rH-Rk_h_*=&9o`q/eu[3o$--Zc#XoX(sA&CI7RqS'cWBhG2:+ODa!):O6`^NT((K7(:%BVJ3=F%emKe-WmK3EIie5ZAbGXt^Hf,[uurZtImn"m<3AaU$p)@,./&T/aMg@_t-oU(Al5HTNb;0J4E-fqZg*4Y/o@,5%"0ObY@,kKsQdk#2'pZOD8tZrghVcMH[#FI&3f.,FmGKKKNo9?B[@`=FkP`:=oo>;4Vs.^rc%L+kt99^Gd]mfUsWoLD02jLH*WUl.Pb(oF^j?7RUN!m&Us22M!@A<RB<?,"#orPd]<&>ld**8+J._-f-FEVm$t<`HO6GNqd_[bhJ&8qK0d-ZKt;EB60u<VCgOQ;8F:jeCp]E2HpO&5==e.Z2c5.#%nBkfCHsrt>d0-2Z<CdP%-(PZ=R(ET3u6<D1@I(u[6LMn;M%:K3fl4ls;SX'd>:*Z]IT(dG)'7QU\#<V$$AmO6;HncG;?UO[<qf,QJem^o.f$D3^V'_h3dF.f82/[@>u^ecY/FgdnO#RWf_=Js*t;iiO?'fQ:g&@nC/Xhu.;&o1b+?_6-Z%i4;1H5GAUag0*4LfL'2;Sl`["O/H6p>jU\SO4%Ffq^-']m<b(Mo1Vg;h"E$f8Z?_AL@bH31kAKY%KEP\PmsdK2MJ^Dfb%0.sgc_9*[9&'t*;+>uUp/PKbuj>J71&Mh5t,WF_k&]O@P+do^;.WV"r6Kkb#5`,aF$-adPdc+'072](pse[q;.^?I#Q#kci1Qr9Z_U:Q_lQ53n!nIBHrchNfMeP-HF*=<22XdSrZ8j>sP4CR1SEP\Ge.aCh(VEW.)F'<]`"gVnaq<<]K,.uCIMlUqSgV3U</GlN`:3?Ft9S-uHH\_0/'rV&dUBe&=8^c)"F#b/Te`H6Yn1DnZc?T$IiaKe%'S][\*'W-]E<4.cnD8?.XB5)khib.oe$NkDa0D^I+$2a=[rbp"D3eQQqq@TO]aNHTMcGM3B3cn9,9'giRF__Y[<^:+bB3]sACEq,A$s%=n\8Vk/OM\c,W"mZ11,MaZ61]7"M`X1/qmcr-hH,#8+udNN9@p:IAM="9:b-RnD&FAVj^G'kW4tPgO+M25'hLH])Ped#fB*fOs>Te;V8("S^2/7e`3>4E]],alEY#@T-dG.(=/^7(s[bh3%omN/'WKl<"q_K`T7$VrMt.GfckX6]1EfAB]1F6o6g>\:2Etf)rD.XNrRc2pgl"Hr<(1MCd%~>endstream
|
| 106 |
-
endobj
|
| 107 |
-
19 0 obj
|
| 108 |
-
<<
|
| 109 |
-
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1251
|
| 110 |
-
>>
|
| 111 |
-
stream
|
| 112 |
-
Gau`R;01GN&:Vs/fU'm&SZsB\Z>@pd[^l$Ne'"!6Hco+&(^1n<bt7%'s8H%#$m^MQApR0<`)taLn([eaAHiiuRK&mT!C!?!I`[+[8FM*9+s?gk^Sb`ESFuBheu'`^-k@VZQnjgqaj:g4M2J-c)%`([:iWt%O9mV9ZO6(4"\bX`WWWGJ,s27(iVrdq]@Q&`bX7t`KV@dkk1#U3_]/$nF6>.H%;Q95P;kU[/"Vgs.N%@'=M6kAJN1afF&?E_+rA+1KE+S:4],1QpOr^qg01e<#d,;@\e=!\1-*,1T[41J&^DSg86dC5.#&+tMiZhie$%p]f=sWJ!9ni#^ZR?Gp5lVJY,M<YHnZf[nt2A3ZtRV6dLh4C-*^gI%O$[,o&o;u7[Nu/XEmkj&m4-UHNFF#I0VCUiaS-$S2Gs[@(=.(Fg-V>W+]dGA*V*5[2WS\gs>9t%t32b/^W)[_+r7&3kOLD>8WTI508QU_ZkVRb*l"j_,ie@Wk/$,J'=rjAsRr^aIAp,g4N\@rcW@_7fV)G7.f:C\2aDCnK2"(-Yh-fNKV4ogPJ_Bbno/AG^W)=l`02mHESBSd,2MW2Q,8S^O,7f_^Pj+'$c\[n!'TZ'8A[[6$M/6Vlo9egXU318J0Zl;rXSYgM=-\-3TecfRc]m]FKNI.=E4amT3\PSaWQi;TtrPVN"#t`E;<R<T0FHF)>bkNM&M.:/OC)MK2$$?Jp$`SY/%t"jbj6*+.%6.71qjEsp)j@\0#RIF/1!&^q"O7Ou;8DL^2(?$>18.AWa`<qQ;FS*8d605U,LRjPYl%CQZ"EZ)d6ggmR/\emf.%.#K=ZXlPbU\40kfi-URgEX``iXe1pOV?N=StFNQ>H$Fi,Ak&SQPl+Y^;rG>nArp/_q%9B[r]_;\_^p'[__7OH7)iuf]c[rld?RB/M<r(<QsU%pNedj)1NmPM-_fL1VD1tNQL&@c-=<:"`[Vpojg6J[HJ4:,T\L_]InN3jJke4J(kV<hYN(d]b#E=":iOW#=k#-U%PKO/p'+,)f951AW&jRK9')Q>rP3T8Xk7<ZOVAq$3lpK6YL6tc'D2V%1G(jM8"TncWs=[!hW2(D30g$5(Q/MN1htIgRt\ADhN@$l202Af7(c#1P6?P("GPEU+>VY%=qG1""FA,mioCp,lF3^-AZtKRg/NFX>&kA^rZpnFA<r!,IA42rZQ6YFrrrLL)tME=&"E=g6gSrChSiOfRe!l*<?[tTYGRI@6&N"%Fn3=3;X6Dm0TH~>endstream
|
| 113 |
-
endobj
|
| 114 |
-
xref
|
| 115 |
-
0 20
|
| 116 |
-
0000000000 65535 f
|
| 117 |
-
0000000061 00000 n
|
| 118 |
-
0000000112 00000 n
|
| 119 |
-
0000000219 00000 n
|
| 120 |
-
0000000331 00000 n
|
| 121 |
-
0000000536 00000 n
|
| 122 |
-
0000000613 00000 n
|
| 123 |
-
0000000818 00000 n
|
| 124 |
-
0000000903 00000 n
|
| 125 |
-
0000001223 00000 n
|
| 126 |
-
0000001296 00000 n
|
| 127 |
-
0000001426 00000 n
|
| 128 |
-
0000001514 00000 n
|
| 129 |
-
0000001667 00000 n
|
| 130 |
-
0000001770 00000 n
|
| 131 |
-
0000001869 00000 n
|
| 132 |
-
0000001999 00000 n
|
| 133 |
-
0000002098 00000 n
|
| 134 |
-
0000002164 00000 n
|
| 135 |
-
0000004006 00000 n
|
| 136 |
-
trailer
|
| 137 |
-
<<
|
| 138 |
-
/ID
|
| 139 |
-
[<8e273c2672d813e3cd44109eb1edd604><8e273c2672d813e3cd44109eb1edd604>]
|
| 140 |
-
% ReportLab generated PDF document -- digest (opensource)
|
| 141 |
-
|
| 142 |
-
/Info 8 0 R
|
| 143 |
-
/Root 7 0 R
|
| 144 |
-
/Size 20
|
| 145 |
-
>>
|
| 146 |
-
startxref
|
| 147 |
-
5349
|
| 148 |
-
%%EOF
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
deps.py
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
from fastapi import Depends, Header, HTTPException, status
|
| 4 |
-
from sqlalchemy.orm import Session
|
| 5 |
-
|
| 6 |
-
from db.database import get_db
|
| 7 |
-
from db.models import User
|
| 8 |
-
from services.auth_service import decode_token, get_user
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def _extract_bearer(authorization: str | None) -> str | None:
|
| 12 |
-
if not authorization:
|
| 13 |
-
return None
|
| 14 |
-
parts = authorization.split()
|
| 15 |
-
if len(parts) != 2 or parts[0].lower() != "bearer":
|
| 16 |
-
return None
|
| 17 |
-
return parts[1]
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def get_current_user(
|
| 21 |
-
authorization: str | None = Header(default=None),
|
| 22 |
-
db: Session = Depends(get_db),
|
| 23 |
-
) -> User:
|
| 24 |
-
token = _extract_bearer(authorization)
|
| 25 |
-
if not token:
|
| 26 |
-
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Missing bearer token")
|
| 27 |
-
payload = decode_token(token)
|
| 28 |
-
if not payload or "sub" not in payload:
|
| 29 |
-
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid or expired token")
|
| 30 |
-
user = get_user(db, int(payload["sub"]))
|
| 31 |
-
if not user:
|
| 32 |
-
raise HTTPException(status.HTTP_401_UNAUTHORIZED, "User not found")
|
| 33 |
-
return user
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def optional_current_user(
|
| 37 |
-
authorization: str | None = Header(default=None),
|
| 38 |
-
db: Session = Depends(get_db),
|
| 39 |
-
) -> User | None:
|
| 40 |
-
token = _extract_bearer(authorization)
|
| 41 |
-
if not token:
|
| 42 |
-
return None
|
| 43 |
-
payload = decode_token(token)
|
| 44 |
-
if not payload or "sub" not in payload:
|
| 45 |
-
return None
|
| 46 |
-
return get_user(db, int(payload["sub"]))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
download_ffpp.py
DELETED
|
@@ -1,261 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python
|
| 2 |
-
""" Downloads FaceForensics++ and Deep Fake Detection public data release
|
| 3 |
-
Example usage:
|
| 4 |
-
see -h or https://github.com/ondyari/FaceForensics
|
| 5 |
-
"""
|
| 6 |
-
# -*- coding: utf-8 -*-
|
| 7 |
-
import argparse
|
| 8 |
-
import os
|
| 9 |
-
import urllib
|
| 10 |
-
import urllib.request
|
| 11 |
-
import tempfile
|
| 12 |
-
import time
|
| 13 |
-
import sys
|
| 14 |
-
import json
|
| 15 |
-
import random
|
| 16 |
-
from tqdm import tqdm
|
| 17 |
-
from os.path import join
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
# URLs and filenames
|
| 21 |
-
FILELIST_URL = 'misc/filelist.json'
|
| 22 |
-
DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
|
| 23 |
-
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]
|
| 24 |
-
|
| 25 |
-
# Parameters
|
| 26 |
-
DATASETS = {
|
| 27 |
-
'original_youtube_videos': 'misc/downloaded_youtube_videos.zip',
|
| 28 |
-
'original_youtube_videos_info': 'misc/downloaded_youtube_videos_info.zip',
|
| 29 |
-
'original': 'original_sequences/youtube',
|
| 30 |
-
'DeepFakeDetection_original': 'original_sequences/actors',
|
| 31 |
-
'Deepfakes': 'manipulated_sequences/Deepfakes',
|
| 32 |
-
'DeepFakeDetection': 'manipulated_sequences/DeepFakeDetection',
|
| 33 |
-
'Face2Face': 'manipulated_sequences/Face2Face',
|
| 34 |
-
'FaceShifter': 'manipulated_sequences/FaceShifter',
|
| 35 |
-
'FaceSwap': 'manipulated_sequences/FaceSwap',
|
| 36 |
-
'NeuralTextures': 'manipulated_sequences/NeuralTextures'
|
| 37 |
-
}
|
| 38 |
-
ALL_DATASETS = ['original', 'DeepFakeDetection_original', 'Deepfakes',
|
| 39 |
-
'DeepFakeDetection', 'Face2Face', 'FaceShifter', 'FaceSwap',
|
| 40 |
-
'NeuralTextures']
|
| 41 |
-
COMPRESSION = ['raw', 'c23', 'c40']
|
| 42 |
-
TYPE = ['videos', 'masks', 'models']
|
| 43 |
-
SERVERS = ['EU', 'EU2', 'CA']
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def parse_args():
|
| 47 |
-
parser = argparse.ArgumentParser(
|
| 48 |
-
description='Downloads FaceForensics v2 public data release.',
|
| 49 |
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
| 50 |
-
)
|
| 51 |
-
parser.add_argument('output_path', type=str, help='Output directory.')
|
| 52 |
-
parser.add_argument('-d', '--dataset', type=str, default='all',
|
| 53 |
-
help='Which dataset to download, either pristine or '
|
| 54 |
-
'manipulated data or the downloaded youtube '
|
| 55 |
-
'videos.',
|
| 56 |
-
choices=list(DATASETS.keys()) + ['all']
|
| 57 |
-
)
|
| 58 |
-
parser.add_argument('-c', '--compression', type=str, default='raw',
|
| 59 |
-
help='Which compression degree. All videos '
|
| 60 |
-
'have been generated with h264 with a varying '
|
| 61 |
-
'codec. Raw (c0) videos are lossless compressed.',
|
| 62 |
-
choices=COMPRESSION
|
| 63 |
-
)
|
| 64 |
-
parser.add_argument('-t', '--type', type=str, default='videos',
|
| 65 |
-
help='Which file type, i.e. videos, masks, for our '
|
| 66 |
-
'manipulation methods, models, for Deepfakes.',
|
| 67 |
-
choices=TYPE
|
| 68 |
-
)
|
| 69 |
-
parser.add_argument('-n', '--num_videos', type=int, default=None,
|
| 70 |
-
help='Select a number of videos number to '
|
| 71 |
-
"download if you don't want to download the full"
|
| 72 |
-
' dataset.')
|
| 73 |
-
parser.add_argument('--server', type=str, default='EU',
|
| 74 |
-
help='Server to download the data from. If you '
|
| 75 |
-
'encounter a slow download speed, consider '
|
| 76 |
-
'changing the server.',
|
| 77 |
-
choices=SERVERS
|
| 78 |
-
)
|
| 79 |
-
args = parser.parse_args()
|
| 80 |
-
|
| 81 |
-
# URLs
|
| 82 |
-
server = args.server
|
| 83 |
-
if server == 'EU':
|
| 84 |
-
server_url = 'http://canis.vc.in.tum.de:8100/'
|
| 85 |
-
elif server == 'EU2':
|
| 86 |
-
server_url = 'http://kaldir.vc.in.tum.de/faceforensics/'
|
| 87 |
-
elif server == 'CA':
|
| 88 |
-
server_url = 'http://falas.cmpt.sfu.ca:8100/'
|
| 89 |
-
else:
|
| 90 |
-
raise Exception('Wrong server name. Choices: {}'.format(str(SERVERS)))
|
| 91 |
-
args.tos_url = server_url + 'webpage/FaceForensics_TOS.pdf'
|
| 92 |
-
args.base_url = server_url + 'v3/'
|
| 93 |
-
args.deepfakes_model_url = server_url + 'v3/manipulated_sequences/' + \
|
| 94 |
-
'Deepfakes/models/'
|
| 95 |
-
|
| 96 |
-
return args
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
def download_files(filenames, base_url, output_path, report_progress=True):
|
| 100 |
-
os.makedirs(output_path, exist_ok=True)
|
| 101 |
-
if report_progress:
|
| 102 |
-
filenames = tqdm(filenames)
|
| 103 |
-
for filename in filenames:
|
| 104 |
-
download_file(base_url + filename, join(output_path, filename))
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
def reporthook(count, block_size, total_size):
|
| 108 |
-
global start_time
|
| 109 |
-
if count == 0:
|
| 110 |
-
start_time = time.time()
|
| 111 |
-
return
|
| 112 |
-
duration = time.time() - start_time
|
| 113 |
-
progress_size = int(count * block_size)
|
| 114 |
-
speed = int(progress_size / (1024 * duration))
|
| 115 |
-
percent = int(count * block_size * 100 / total_size)
|
| 116 |
-
sys.stdout.write("\rProgress: %d%%, %d MB, %d KB/s, %d seconds passed" %
|
| 117 |
-
(percent, progress_size / (1024 * 1024), speed, duration))
|
| 118 |
-
sys.stdout.flush()
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
def download_file(url, out_file, report_progress=False):
|
| 122 |
-
out_dir = os.path.dirname(out_file)
|
| 123 |
-
if not os.path.isfile(out_file):
|
| 124 |
-
fh, out_file_tmp = tempfile.mkstemp(dir=out_dir)
|
| 125 |
-
f = os.fdopen(fh, 'w')
|
| 126 |
-
f.close()
|
| 127 |
-
if report_progress:
|
| 128 |
-
urllib.request.urlretrieve(url, out_file_tmp,
|
| 129 |
-
reporthook=reporthook)
|
| 130 |
-
else:
|
| 131 |
-
urllib.request.urlretrieve(url, out_file_tmp)
|
| 132 |
-
os.rename(out_file_tmp, out_file)
|
| 133 |
-
else:
|
| 134 |
-
tqdm.write('WARNING: skipping download of existing file ' + out_file)
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
def main(args):
|
| 138 |
-
# TOS
|
| 139 |
-
print('By pressing any key to continue you confirm that you have agreed '\
|
| 140 |
-
'to the FaceForensics terms of use as described at:')
|
| 141 |
-
print(args.tos_url)
|
| 142 |
-
print('***')
|
| 143 |
-
print('Press any key to continue, or CTRL-C to exit.')
|
| 144 |
-
_ = input('')
|
| 145 |
-
|
| 146 |
-
# Extract arguments
|
| 147 |
-
c_datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
|
| 148 |
-
c_type = args.type
|
| 149 |
-
c_compression = args.compression
|
| 150 |
-
num_videos = args.num_videos
|
| 151 |
-
output_path = args.output_path
|
| 152 |
-
os.makedirs(output_path, exist_ok=True)
|
| 153 |
-
|
| 154 |
-
# Check for special dataset cases
|
| 155 |
-
for dataset in c_datasets:
|
| 156 |
-
dataset_path = DATASETS[dataset]
|
| 157 |
-
# Special cases
|
| 158 |
-
if 'original_youtube_videos' in dataset:
|
| 159 |
-
# Here we download the original youtube videos zip file
|
| 160 |
-
print('Downloading original youtube videos.')
|
| 161 |
-
if not 'info' in dataset_path:
|
| 162 |
-
print('Please be patient, this may take a while (~40gb)')
|
| 163 |
-
suffix = ''
|
| 164 |
-
else:
|
| 165 |
-
suffix = 'info'
|
| 166 |
-
download_file(args.base_url + '/' + dataset_path,
|
| 167 |
-
out_file=join(output_path,
|
| 168 |
-
'downloaded_videos{}.zip'.format(
|
| 169 |
-
suffix)),
|
| 170 |
-
report_progress=True)
|
| 171 |
-
return
|
| 172 |
-
|
| 173 |
-
# Else: regular datasets
|
| 174 |
-
print('Downloading {} of dataset "{}"'.format(
|
| 175 |
-
c_type, dataset_path
|
| 176 |
-
))
|
| 177 |
-
|
| 178 |
-
# Get filelists and video lenghts list from server
|
| 179 |
-
if 'DeepFakeDetection' in dataset_path or 'actors' in dataset_path:
|
| 180 |
-
filepaths = json.loads(urllib.request.urlopen(args.base_url + '/' +
|
| 181 |
-
DEEPFEAKES_DETECTION_URL).read().decode("utf-8"))
|
| 182 |
-
if 'actors' in dataset_path:
|
| 183 |
-
filelist = filepaths['actors']
|
| 184 |
-
else:
|
| 185 |
-
filelist = filepaths['DeepFakesDetection']
|
| 186 |
-
elif 'original' in dataset_path:
|
| 187 |
-
# Load filelist from server
|
| 188 |
-
file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
|
| 189 |
-
FILELIST_URL).read().decode("utf-8"))
|
| 190 |
-
filelist = []
|
| 191 |
-
for pair in file_pairs:
|
| 192 |
-
filelist += pair
|
| 193 |
-
else:
|
| 194 |
-
# Load filelist from server
|
| 195 |
-
file_pairs = json.loads(urllib.request.urlopen(args.base_url + '/' +
|
| 196 |
-
FILELIST_URL).read().decode("utf-8"))
|
| 197 |
-
# Get filelist
|
| 198 |
-
filelist = []
|
| 199 |
-
for pair in file_pairs:
|
| 200 |
-
filelist.append('_'.join(pair))
|
| 201 |
-
if c_type != 'models':
|
| 202 |
-
filelist.append('_'.join(pair[::-1]))
|
| 203 |
-
# Maybe limit number of videos for download
|
| 204 |
-
if num_videos is not None and num_videos > 0:
|
| 205 |
-
print('Downloading the first {} videos'.format(num_videos))
|
| 206 |
-
filelist = filelist[:num_videos]
|
| 207 |
-
|
| 208 |
-
# Server and local paths
|
| 209 |
-
dataset_videos_url = args.base_url + '{}/{}/{}/'.format(
|
| 210 |
-
dataset_path, c_compression, c_type)
|
| 211 |
-
dataset_mask_url = args.base_url + '{}/{}/videos/'.format(
|
| 212 |
-
dataset_path, 'masks', c_type)
|
| 213 |
-
|
| 214 |
-
if c_type == 'videos':
|
| 215 |
-
dataset_output_path = join(output_path, dataset_path, c_compression,
|
| 216 |
-
c_type)
|
| 217 |
-
print('Output path: {}'.format(dataset_output_path))
|
| 218 |
-
filelist = [filename + '.mp4' for filename in filelist]
|
| 219 |
-
download_files(filelist, dataset_videos_url, dataset_output_path)
|
| 220 |
-
elif c_type == 'masks':
|
| 221 |
-
dataset_output_path = join(output_path, dataset_path, c_type,
|
| 222 |
-
'videos')
|
| 223 |
-
print('Output path: {}'.format(dataset_output_path))
|
| 224 |
-
if 'original' in dataset:
|
| 225 |
-
if args.dataset != 'all':
|
| 226 |
-
print('Only videos available for original data. Aborting.')
|
| 227 |
-
return
|
| 228 |
-
else:
|
| 229 |
-
print('Only videos available for original data. '
|
| 230 |
-
'Skipping original.\n')
|
| 231 |
-
continue
|
| 232 |
-
if 'FaceShifter' in dataset:
|
| 233 |
-
print('Masks not available for FaceShifter. Aborting.')
|
| 234 |
-
return
|
| 235 |
-
filelist = [filename + '.mp4' for filename in filelist]
|
| 236 |
-
download_files(filelist, dataset_mask_url, dataset_output_path)
|
| 237 |
-
|
| 238 |
-
# Else: models for deepfakes
|
| 239 |
-
else:
|
| 240 |
-
if dataset != 'Deepfakes' and c_type == 'models':
|
| 241 |
-
print('Models only available for Deepfakes. Aborting')
|
| 242 |
-
return
|
| 243 |
-
dataset_output_path = join(output_path, dataset_path, c_type)
|
| 244 |
-
print('Output path: {}'.format(dataset_output_path))
|
| 245 |
-
|
| 246 |
-
# Get Deepfakes models
|
| 247 |
-
for folder in tqdm(filelist):
|
| 248 |
-
folder_filelist = DEEPFAKES_MODEL_NAMES
|
| 249 |
-
|
| 250 |
-
# Folder paths
|
| 251 |
-
folder_base_url = args.deepfakes_model_url + folder + '/'
|
| 252 |
-
folder_dataset_output_path = join(dataset_output_path,
|
| 253 |
-
folder)
|
| 254 |
-
download_files(folder_filelist, folder_base_url,
|
| 255 |
-
folder_dataset_output_path,
|
| 256 |
-
report_progress=False) # already done
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
if __name__ == "__main__":
|
| 260 |
-
args = parse_args()
|
| 261 |
-
main(args)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ela_service.py
DELETED
|
@@ -1,88 +0,0 @@
|
|
| 1 |
-
"""Error Level Analysis (ELA) — Phase 12.1
|
| 2 |
-
|
| 3 |
-
Re-saves an image at a fixed JPEG quality and diffs against the original to reveal
|
| 4 |
-
per-pixel manipulation artifacts. Regions that were recently edited will show
|
| 5 |
-
higher error levels than untouched areas.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from __future__ import annotations
|
| 9 |
-
|
| 10 |
-
import base64
|
| 11 |
-
import io
|
| 12 |
-
|
| 13 |
-
import cv2
|
| 14 |
-
import numpy as np
|
| 15 |
-
from loguru import logger
|
| 16 |
-
from PIL import Image
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def _compute_ela(pil_img: Image.Image, quality: int = 90, scale: float = 15.0) -> np.ndarray:
|
| 20 |
-
"""Return an ELA difference map as a uint8 (H,W,3) RGB array.
|
| 21 |
-
|
| 22 |
-
Args:
|
| 23 |
-
pil_img: Input image (any format — converted to RGB internally).
|
| 24 |
-
quality: JPEG re-save quality level (lower = more aggressive compression).
|
| 25 |
-
scale: Amplification factor for the difference (higher = more contrast).
|
| 26 |
-
|
| 27 |
-
Returns:
|
| 28 |
-
Difference image as uint8 (H,W,3) array.
|
| 29 |
-
"""
|
| 30 |
-
rgb = pil_img.convert("RGB")
|
| 31 |
-
|
| 32 |
-
# Re-save at specified JPEG quality into an in-memory buffer
|
| 33 |
-
buf = io.BytesIO()
|
| 34 |
-
rgb.save(buf, format="JPEG", quality=quality)
|
| 35 |
-
buf.seek(0)
|
| 36 |
-
resaved = Image.open(buf).convert("RGB")
|
| 37 |
-
|
| 38 |
-
original_arr = np.array(rgb, dtype=np.float32)
|
| 39 |
-
resaved_arr = np.array(resaved, dtype=np.float32)
|
| 40 |
-
|
| 41 |
-
# Per-pixel absolute difference, amplified
|
| 42 |
-
diff = np.abs(original_arr - resaved_arr) * scale
|
| 43 |
-
diff = np.clip(diff, 0, 255).astype(np.uint8)
|
| 44 |
-
|
| 45 |
-
return diff
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
def generate_ela_base64(pil_img: Image.Image, quality: int = 90, scale: float = 15.0) -> str:
|
| 49 |
-
"""Produce a base64 data-URL PNG of the ELA difference map.
|
| 50 |
-
|
| 51 |
-
Regions with higher error levels (brighter in the output) are more likely
|
| 52 |
-
to have been digitally manipulated.
|
| 53 |
-
"""
|
| 54 |
-
diff = _compute_ela(pil_img, quality=quality, scale=scale)
|
| 55 |
-
|
| 56 |
-
buf = io.BytesIO()
|
| 57 |
-
Image.fromarray(diff).save(buf, format="PNG")
|
| 58 |
-
b64 = base64.b64encode(buf.getvalue()).decode("ascii")
|
| 59 |
-
|
| 60 |
-
logger.info(f"ELA map generated ({diff.shape[1]}x{diff.shape[0]})")
|
| 61 |
-
return f"data:image/png;base64,{b64}"
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
def generate_blended_ela_base64(
|
| 65 |
-
pil_img: Image.Image,
|
| 66 |
-
gradcam_weight: float = 0.6,
|
| 67 |
-
ela_weight: float = 0.4,
|
| 68 |
-
quality: int = 90,
|
| 69 |
-
scale: float = 15.0,
|
| 70 |
-
) -> str:
|
| 71 |
-
"""Blend Grad-CAM heatmap overlay with ELA at specified weights.
|
| 72 |
-
|
| 73 |
-
This is a utility for the 'blended' mode — it composites the ELA
|
| 74 |
-
difference map on top of the original image for visual clarity.
|
| 75 |
-
"""
|
| 76 |
-
rgb = pil_img.convert("RGB")
|
| 77 |
-
original_arr = np.array(rgb, dtype=np.float32)
|
| 78 |
-
ela_arr = _compute_ela(pil_img, quality=quality, scale=scale).astype(np.float32)
|
| 79 |
-
|
| 80 |
-
# Blend: overlay ELA on the original for visual context
|
| 81 |
-
blended = np.clip(original_arr * 0.5 + ela_arr * 0.5, 0, 255).astype(np.uint8)
|
| 82 |
-
|
| 83 |
-
buf = io.BytesIO()
|
| 84 |
-
Image.fromarray(blended).save(buf, format="PNG")
|
| 85 |
-
b64 = base64.b64encode(buf.getvalue()).decode("ascii")
|
| 86 |
-
|
| 87 |
-
logger.info(f"Blended ELA generated ({blended.shape[1]}x{blended.shape[0]})")
|
| 88 |
-
return f"data:image/png;base64,{b64}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
exif_service.py
DELETED
|
@@ -1,129 +0,0 @@
|
|
| 1 |
-
"""EXIF Metadata Extraction — Phase 12.2
|
| 2 |
-
|
| 3 |
-
Extracts camera metadata from uploaded images and computes a trust adjustment
|
| 4 |
-
score: presence of authentic camera metadata lowers fake probability, while
|
| 5 |
-
evidence of editing software raises it.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from __future__ import annotations
|
| 9 |
-
|
| 10 |
-
from typing import Optional
|
| 11 |
-
|
| 12 |
-
from loguru import logger
|
| 13 |
-
from PIL import Image
|
| 14 |
-
from PIL.ExifTags import TAGS, GPSTAGS
|
| 15 |
-
|
| 16 |
-
from schemas.common import ExifSummary
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
# Software strings that suggest post-processing / generation
|
| 20 |
-
_SUSPICIOUS_SOFTWARE = {
|
| 21 |
-
"adobe photoshop", "photoshop", "gimp", "affinity photo",
|
| 22 |
-
"stable diffusion", "midjourney", "dall-e", "comfyui",
|
| 23 |
-
"automatic1111", "invokeai",
|
| 24 |
-
}
|
| 25 |
-
|
| 26 |
-
# Software strings that are normal camera firmware
|
| 27 |
-
_CAMERA_SOFTWARE = {
|
| 28 |
-
"ver.", "firmware", "camera", "dji", "gopro",
|
| 29 |
-
}
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
def _decode_gps(gps_info: dict) -> Optional[str]:
|
| 33 |
-
"""Decode EXIF GPSInfo dict into a human-readable lat/lon string."""
|
| 34 |
-
try:
|
| 35 |
-
def _to_decimal(values, ref):
|
| 36 |
-
d, m, s = [float(v) for v in values]
|
| 37 |
-
decimal = d + m / 60.0 + s / 3600.0
|
| 38 |
-
if ref in ("S", "W"):
|
| 39 |
-
decimal = -decimal
|
| 40 |
-
return decimal
|
| 41 |
-
|
| 42 |
-
lat = _to_decimal(gps_info.get(2, (0, 0, 0)), gps_info.get(1, "N"))
|
| 43 |
-
lon = _to_decimal(gps_info.get(4, (0, 0, 0)), gps_info.get(3, "E"))
|
| 44 |
-
return f"{lat:.6f}, {lon:.6f}"
|
| 45 |
-
except Exception:
|
| 46 |
-
return None
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
def extract_exif(pil_img: Image.Image, raw_bytes: bytes) -> ExifSummary:
|
| 50 |
-
"""Extract EXIF metadata and compute a trust adjustment score.
|
| 51 |
-
|
| 52 |
-
Trust adjustment logic:
|
| 53 |
-
- Valid Make + Model + DateTimeOriginal → -15 (more likely real camera photo)
|
| 54 |
-
- GPS info present → -5 additional (real photos often have GPS)
|
| 55 |
-
- Suspicious editing software detected → +10 (more likely manipulated)
|
| 56 |
-
- No EXIF at all → 0 (inconclusive — many platforms strip EXIF)
|
| 57 |
-
"""
|
| 58 |
-
summary = ExifSummary()
|
| 59 |
-
|
| 60 |
-
try:
|
| 61 |
-
exif_data = pil_img._getexif()
|
| 62 |
-
except Exception:
|
| 63 |
-
exif_data = None
|
| 64 |
-
|
| 65 |
-
if not exif_data:
|
| 66 |
-
# Try exifread as fallback for formats Pillow doesn't handle well
|
| 67 |
-
try:
|
| 68 |
-
import exifread
|
| 69 |
-
from io import BytesIO
|
| 70 |
-
tags = exifread.process_file(BytesIO(raw_bytes), details=False)
|
| 71 |
-
if tags:
|
| 72 |
-
summary.make = str(tags.get("Image Make", "")).strip() or None
|
| 73 |
-
summary.model = str(tags.get("Image Model", "")).strip() or None
|
| 74 |
-
summary.datetime_original = str(tags.get("EXIF DateTimeOriginal", "")).strip() or None
|
| 75 |
-
summary.software = str(tags.get("Image Software", "")).strip() or None
|
| 76 |
-
summary.lens_model = str(tags.get("EXIF LensModel", "")).strip() or None
|
| 77 |
-
except ImportError:
|
| 78 |
-
logger.debug("exifread not installed, skipping fallback EXIF extraction")
|
| 79 |
-
except Exception as e:
|
| 80 |
-
logger.debug(f"exifread fallback failed: {e}")
|
| 81 |
-
else:
|
| 82 |
-
# Decode Pillow EXIF
|
| 83 |
-
decoded = {}
|
| 84 |
-
for tag_id, value in exif_data.items():
|
| 85 |
-
tag_name = TAGS.get(tag_id, tag_id)
|
| 86 |
-
decoded[tag_name] = value
|
| 87 |
-
|
| 88 |
-
summary.make = str(decoded.get("Make", "")).strip() or None
|
| 89 |
-
summary.model = str(decoded.get("Model", "")).strip() or None
|
| 90 |
-
summary.datetime_original = str(decoded.get("DateTimeOriginal", "")).strip() or None
|
| 91 |
-
summary.software = str(decoded.get("Software", "")).strip() or None
|
| 92 |
-
summary.lens_model = str(decoded.get("LensModel", "")).strip() or None
|
| 93 |
-
|
| 94 |
-
# GPS
|
| 95 |
-
gps_raw = decoded.get("GPSInfo")
|
| 96 |
-
if gps_raw and isinstance(gps_raw, dict):
|
| 97 |
-
gps_decoded = {}
|
| 98 |
-
for k, v in gps_raw.items():
|
| 99 |
-
gps_decoded[GPSTAGS.get(k, k)] = v
|
| 100 |
-
summary.gps_info = _decode_gps(gps_decoded)
|
| 101 |
-
|
| 102 |
-
# ── Trust adjustment scoring ──
|
| 103 |
-
adjustment = 0
|
| 104 |
-
reasons = []
|
| 105 |
-
|
| 106 |
-
has_camera_meta = summary.make and summary.model and summary.datetime_original
|
| 107 |
-
if has_camera_meta:
|
| 108 |
-
adjustment -= 15
|
| 109 |
-
reasons.append("valid camera metadata (Make/Model/DateTime)")
|
| 110 |
-
|
| 111 |
-
if summary.gps_info:
|
| 112 |
-
adjustment -= 5
|
| 113 |
-
reasons.append("GPS coordinates present")
|
| 114 |
-
|
| 115 |
-
if summary.software:
|
| 116 |
-
sw_lower = summary.software.lower()
|
| 117 |
-
if any(s in sw_lower for s in _SUSPICIOUS_SOFTWARE):
|
| 118 |
-
adjustment += 10
|
| 119 |
-
reasons.append(f"editing software detected: {summary.software}")
|
| 120 |
-
elif any(s in sw_lower for s in _CAMERA_SOFTWARE):
|
| 121 |
-
adjustment -= 2
|
| 122 |
-
reasons.append("camera firmware in Software field")
|
| 123 |
-
|
| 124 |
-
summary.trust_adjustment = adjustment
|
| 125 |
-
summary.trust_reason = "; ".join(reasons) if reasons else "no EXIF metadata found"
|
| 126 |
-
|
| 127 |
-
logger.info(f"EXIF extracted: make={summary.make}, model={summary.model}, "
|
| 128 |
-
f"adjustment={adjustment} ({summary.trust_reason})")
|
| 129 |
-
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
file_handler.py
DELETED
|
@@ -1,96 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import io
|
| 4 |
-
import os
|
| 5 |
-
import tempfile
|
| 6 |
-
from typing import Iterable
|
| 7 |
-
|
| 8 |
-
from fastapi import HTTPException, UploadFile, status
|
| 9 |
-
|
| 10 |
-
from config import settings
|
| 11 |
-
|
| 12 |
-
IMAGE_MAGIC_BYTES: dict[bytes, str] = {
|
| 13 |
-
b"\xff\xd8\xff": "image/jpeg",
|
| 14 |
-
b"\x89PNG\r\n\x1a\n": "image/png",
|
| 15 |
-
b"RIFF": "image/webp", # partial; WEBP has 'RIFF....WEBP'
|
| 16 |
-
}
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def _detect_mime_by_magic(head: bytes) -> str | None:
|
| 20 |
-
for sig, mime in IMAGE_MAGIC_BYTES.items():
|
| 21 |
-
if head.startswith(sig):
|
| 22 |
-
if mime == "image/webp" and b"WEBP" not in head[:16]:
|
| 23 |
-
continue
|
| 24 |
-
return mime
|
| 25 |
-
return None
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
async def read_upload_bytes(
|
| 29 |
-
file: UploadFile,
|
| 30 |
-
allowed_mimes: Iterable[str],
|
| 31 |
-
max_size_mb: int,
|
| 32 |
-
) -> tuple[bytes, str]:
|
| 33 |
-
"""Read an UploadFile into memory after validating type and size.
|
| 34 |
-
Returns (raw_bytes, detected_mime). Raises HTTPException on failure.
|
| 35 |
-
"""
|
| 36 |
-
data = await file.read()
|
| 37 |
-
size_mb = len(data) / (1024 * 1024)
|
| 38 |
-
if size_mb > max_size_mb:
|
| 39 |
-
raise HTTPException(
|
| 40 |
-
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
| 41 |
-
detail=f"File too large ({size_mb:.1f} MB > {max_size_mb} MB)",
|
| 42 |
-
)
|
| 43 |
-
|
| 44 |
-
mime = _detect_mime_by_magic(data[:16]) or (file.content_type or "")
|
| 45 |
-
if mime not in allowed_mimes:
|
| 46 |
-
raise HTTPException(
|
| 47 |
-
status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
| 48 |
-
detail=f"Unsupported type '{mime}'. Allowed: {list(allowed_mimes)}",
|
| 49 |
-
)
|
| 50 |
-
return data, mime
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
def bytes_to_buffer(data: bytes) -> io.BytesIO:
|
| 54 |
-
return io.BytesIO(data)
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
async def save_upload_to_tempfile(
|
| 58 |
-
file: UploadFile,
|
| 59 |
-
allowed_mimes: Iterable[str],
|
| 60 |
-
max_size_mb: int,
|
| 61 |
-
suffix: str = ".mp4",
|
| 62 |
-
) -> tuple[str, str]:
|
| 63 |
-
"""Stream an UploadFile to a temp file on disk. Returns (path, mime).
|
| 64 |
-
MIME is taken from the client's content_type (no magic-byte check for videos).
|
| 65 |
-
Caller is responsible for deleting the temp file.
|
| 66 |
-
"""
|
| 67 |
-
mime = (file.content_type or "").lower()
|
| 68 |
-
if mime not in allowed_mimes:
|
| 69 |
-
raise HTTPException(
|
| 70 |
-
status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
|
| 71 |
-
detail=f"Unsupported type '{mime}'. Allowed: {list(allowed_mimes)}",
|
| 72 |
-
)
|
| 73 |
-
|
| 74 |
-
max_bytes = max_size_mb * 1024 * 1024
|
| 75 |
-
fd, path = tempfile.mkstemp(suffix=suffix, prefix="ds_vid_")
|
| 76 |
-
written = 0
|
| 77 |
-
try:
|
| 78 |
-
with os.fdopen(fd, "wb") as out:
|
| 79 |
-
while True:
|
| 80 |
-
chunk = await file.read(1024 * 1024)
|
| 81 |
-
if not chunk:
|
| 82 |
-
break
|
| 83 |
-
written += len(chunk)
|
| 84 |
-
if written > max_bytes:
|
| 85 |
-
raise HTTPException(
|
| 86 |
-
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
| 87 |
-
detail=f"File too large (> {max_size_mb} MB)",
|
| 88 |
-
)
|
| 89 |
-
out.write(chunk)
|
| 90 |
-
except Exception:
|
| 91 |
-
try:
|
| 92 |
-
os.unlink(path)
|
| 93 |
-
except OSError:
|
| 94 |
-
pass
|
| 95 |
-
raise
|
| 96 |
-
return path, mime
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_colab_nb.py
DELETED
|
@@ -1,213 +0,0 @@
|
|
| 1 |
-
import nbformat as nbf
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
nb = nbf.v4.new_notebook()
|
| 5 |
-
|
| 6 |
-
text = """\
|
| 7 |
-
# DeepShield: FaceForensics++ ViT Training
|
| 8 |
-
Run this entirely in Google Colab.
|
| 9 |
-
**Before running**:
|
| 10 |
-
1. Go to `Runtime` -> `Change runtime type` -> select **T4 GPU**.
|
| 11 |
-
2. Run the cells below sequentially.
|
| 12 |
-
"""
|
| 13 |
-
|
| 14 |
-
code_install = """\
|
| 15 |
-
!pip install timm transformers datasets accelerate evaluate opencv-python
|
| 16 |
-
"""
|
| 17 |
-
|
| 18 |
-
code_ffpp = """\
|
| 19 |
-
# We create the download script inside the Colab environment
|
| 20 |
-
download_script = '''#!/usr/bin/env python
|
| 21 |
-
import argparse
|
| 22 |
-
import os
|
| 23 |
-
import urllib.request
|
| 24 |
-
import tempfile
|
| 25 |
-
import time
|
| 26 |
-
import sys
|
| 27 |
-
import json
|
| 28 |
-
from tqdm import tqdm
|
| 29 |
-
from os.path import join
|
| 30 |
-
|
| 31 |
-
FILELIST_URL = 'misc/filelist.json'
|
| 32 |
-
DEEPFEAKES_DETECTION_URL = 'misc/deepfake_detection_filenames.json'
|
| 33 |
-
DEEPFAKES_MODEL_NAMES = ['decoder_A.h5', 'decoder_B.h5', 'encoder.h5',]
|
| 34 |
-
DATASETS = {
|
| 35 |
-
'original': 'original_sequences/youtube',
|
| 36 |
-
'Deepfakes': 'manipulated_sequences/Deepfakes',
|
| 37 |
-
'Face2Face': 'manipulated_sequences/Face2Face',
|
| 38 |
-
'FaceShifter': 'manipulated_sequences/FaceShifter',
|
| 39 |
-
'FaceSwap': 'manipulated_sequences/FaceSwap',
|
| 40 |
-
'NeuralTextures': 'manipulated_sequences/NeuralTextures'
|
| 41 |
-
}
|
| 42 |
-
ALL_DATASETS = ['original', 'Deepfakes', 'Face2Face', 'FaceShifter', 'FaceSwap', 'NeuralTextures']
|
| 43 |
-
COMPRESSION = ['raw', 'c23', 'c40']
|
| 44 |
-
TYPE = ['videos']
|
| 45 |
-
|
| 46 |
-
def download_file(url, out_file):
|
| 47 |
-
os.makedirs(os.path.dirname(out_file), exist_ok=True)
|
| 48 |
-
if not os.path.isfile(out_file):
|
| 49 |
-
urllib.request.urlretrieve(url, out_file)
|
| 50 |
-
|
| 51 |
-
def main():
|
| 52 |
-
parser = argparse.ArgumentParser()
|
| 53 |
-
parser.add_argument('output_path', type=str)
|
| 54 |
-
parser.add_argument('-d', '--dataset', type=str, default='all')
|
| 55 |
-
parser.add_argument('-c', '--compression', type=str, default='c40')
|
| 56 |
-
parser.add_argument('-t', '--type', type=str, default='videos')
|
| 57 |
-
parser.add_argument('-n', '--num_videos', type=int, default=50) # Small amount for tutorial
|
| 58 |
-
args = parser.parse_args()
|
| 59 |
-
|
| 60 |
-
base_url = 'http://kaldir.vc.in.tum.de/faceforensics/v3/'
|
| 61 |
-
|
| 62 |
-
datasets = [args.dataset] if args.dataset != 'all' else ALL_DATASETS
|
| 63 |
-
for dataset in datasets:
|
| 64 |
-
dataset_path = DATASETS[dataset]
|
| 65 |
-
print(f'Downloading {args.compression} of {dataset}')
|
| 66 |
-
|
| 67 |
-
file_pairs = json.loads(urllib.request.urlopen(base_url + FILELIST_URL).read().decode("utf-8"))
|
| 68 |
-
filelist = []
|
| 69 |
-
if 'original' in dataset_path:
|
| 70 |
-
for pair in file_pairs:
|
| 71 |
-
filelist += pair
|
| 72 |
-
else:
|
| 73 |
-
for pair in file_pairs:
|
| 74 |
-
filelist.append('_'.join(pair))
|
| 75 |
-
filelist.append('_'.join(pair[::-1]))
|
| 76 |
-
|
| 77 |
-
filelist = filelist[:args.num_videos]
|
| 78 |
-
dataset_videos_url = base_url + f'{dataset_path}/{args.compression}/{args.type}/'
|
| 79 |
-
dataset_output_path = join(args.output_path, dataset_path, args.compression, args.type)
|
| 80 |
-
|
| 81 |
-
for filename in tqdm(filelist):
|
| 82 |
-
download_file(dataset_videos_url + filename + ".mp4", join(dataset_output_path, filename + ".mp4"))
|
| 83 |
-
|
| 84 |
-
if __name__ == "__main__":
|
| 85 |
-
main()
|
| 86 |
-
'''
|
| 87 |
-
|
| 88 |
-
with open("download_ffpp.py", "w") as f:
|
| 89 |
-
f.write(download_script)
|
| 90 |
-
|
| 91 |
-
!python download_ffpp.py ./data -d all -c c40 -t videos -n 50
|
| 92 |
-
"""
|
| 93 |
-
|
| 94 |
-
code_extract = """\
|
| 95 |
-
import cv2
|
| 96 |
-
import os
|
| 97 |
-
import glob
|
| 98 |
-
from tqdm import tqdm
|
| 99 |
-
|
| 100 |
-
def extract_frames(video_folder, output_folder, label, max_frames=4):
|
| 101 |
-
os.makedirs(output_folder, exist_ok=True)
|
| 102 |
-
videos = glob.glob(os.path.join(video_folder, "*.mp4"))
|
| 103 |
-
|
| 104 |
-
for vid_path in tqdm(videos, desc=f"Extracting {label}"):
|
| 105 |
-
vid_name = os.path.basename(vid_path).replace('.mp4','')
|
| 106 |
-
cap = cv2.VideoCapture(vid_path)
|
| 107 |
-
count = 0
|
| 108 |
-
while cap.isOpened() and count < max_frames:
|
| 109 |
-
ret, frame = cap.read()
|
| 110 |
-
if not ret: break
|
| 111 |
-
frame = cv2.resize(frame, (224, 224))
|
| 112 |
-
out_path = os.path.join(output_folder, f"{vid_name}_f{count}.jpg")
|
| 113 |
-
cv2.imwrite(out_path, frame)
|
| 114 |
-
count += 1
|
| 115 |
-
cap.release()
|
| 116 |
-
|
| 117 |
-
# Extract Real
|
| 118 |
-
extract_frames('./data/original_sequences/youtube/c40/videos', './dataset/real', 'real')
|
| 119 |
-
|
| 120 |
-
# Extract Fakes
|
| 121 |
-
fakes = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']
|
| 122 |
-
for f in fakes:
|
| 123 |
-
extract_frames(f'./data/manipulated_sequences/{f}/c40/videos', './dataset/fake', 'fake')
|
| 124 |
-
"""
|
| 125 |
-
|
| 126 |
-
code_train = """\
|
| 127 |
-
import numpy as np
|
| 128 |
-
from datasets import load_dataset
|
| 129 |
-
from transformers import ViTImageProcessor, ViTForImageClassification, TrainingArguments, Trainer
|
| 130 |
-
import torch
|
| 131 |
-
|
| 132 |
-
# 1. Load Dataset
|
| 133 |
-
dataset = load_dataset('imagefolder', data_dir='./dataset')
|
| 134 |
-
# Split into train/validation
|
| 135 |
-
dataset = dataset['train'].train_test_split(test_size=0.1)
|
| 136 |
-
|
| 137 |
-
# 2. Preprocessor
|
| 138 |
-
model_name_or_path = 'google/vit-base-patch16-224-in21k'
|
| 139 |
-
processor = ViTImageProcessor.from_pretrained(model_name_or_path)
|
| 140 |
-
|
| 141 |
-
def transform(example_batch):
|
| 142 |
-
# Take a list of PIL images and turn them to pixel values
|
| 143 |
-
inputs = processor([x.convert("RGB") for x in example_batch['image']], return_tensors='pt')
|
| 144 |
-
inputs['labels'] = example_batch['label']
|
| 145 |
-
return inputs
|
| 146 |
-
|
| 147 |
-
prepared_ds = dataset.with_transform(transform)
|
| 148 |
-
|
| 149 |
-
def collate_fn(batch):
|
| 150 |
-
return {
|
| 151 |
-
'pixel_values': torch.stack([x['pixel_values'] for x in batch]),
|
| 152 |
-
'labels': torch.tensor([x['labels'] for x in batch])
|
| 153 |
-
}
|
| 154 |
-
|
| 155 |
-
# 3. Load Model
|
| 156 |
-
labels = dataset['train'].features['label'].names
|
| 157 |
-
model = ViTForImageClassification.from_pretrained(
|
| 158 |
-
model_name_or_path,
|
| 159 |
-
num_labels=len(labels),
|
| 160 |
-
id2label={str(i): c for i, c in enumerate(labels)},
|
| 161 |
-
label2id={c: str(i) for i, c in enumerate(labels)}
|
| 162 |
-
)
|
| 163 |
-
|
| 164 |
-
training_args = TrainingArguments(
|
| 165 |
-
output_dir="./vit-deepshield",
|
| 166 |
-
per_device_train_batch_size=16,
|
| 167 |
-
eval_strategy="steps",
|
| 168 |
-
num_train_epochs=3,
|
| 169 |
-
fp16=True, # Mixed precision for speed
|
| 170 |
-
save_steps=100,
|
| 171 |
-
eval_steps=100,
|
| 172 |
-
logging_steps=10,
|
| 173 |
-
learning_rate=2e-4,
|
| 174 |
-
save_total_limit=2,
|
| 175 |
-
remove_unused_columns=False,
|
| 176 |
-
push_to_hub=False,
|
| 177 |
-
load_best_model_at_end=True,
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
import evaluate
|
| 181 |
-
metric = evaluate.load("accuracy")
|
| 182 |
-
def compute_metrics(p):
|
| 183 |
-
return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
|
| 184 |
-
|
| 185 |
-
trainer = Trainer(
|
| 186 |
-
model=model,
|
| 187 |
-
args=training_args,
|
| 188 |
-
data_collator=collate_fn,
|
| 189 |
-
compute_metrics=compute_metrics,
|
| 190 |
-
train_dataset=prepared_ds["train"],
|
| 191 |
-
eval_dataset=prepared_ds["test"],
|
| 192 |
-
)
|
| 193 |
-
|
| 194 |
-
# 4. Train
|
| 195 |
-
train_results = trainer.train()
|
| 196 |
-
trainer.save_model("deepshield_vit_model")
|
| 197 |
-
processor.save_pretrained("deepshield_vit_model")
|
| 198 |
-
trainer.log_metrics("train", train_results.metrics)
|
| 199 |
-
trainer.save_metrics("train", train_results.metrics)
|
| 200 |
-
trainer.save_state()
|
| 201 |
-
print("Training Complete! The model is saved to ./deepshield_vit_model")
|
| 202 |
-
"""
|
| 203 |
-
|
| 204 |
-
nb['cells'] = [
|
| 205 |
-
nbf.v4.new_markdown_cell(text),
|
| 206 |
-
nbf.v4.new_code_cell(code_install),
|
| 207 |
-
nbf.v4.new_code_cell(code_ffpp),
|
| 208 |
-
nbf.v4.new_code_cell(code_extract),
|
| 209 |
-
nbf.v4.new_code_cell(code_train)
|
| 210 |
-
]
|
| 211 |
-
|
| 212 |
-
with open(r'c:\Users\athar\Desktop\minor2\backend\training\Colab_ViT_Training.ipynb', 'w', encoding='utf-8') as f:
|
| 213 |
-
nbf.write(nb, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
heatmap_generator.py
DELETED
|
@@ -1,164 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import base64
|
| 4 |
-
import io
|
| 5 |
-
from typing import Optional
|
| 6 |
-
|
| 7 |
-
import cv2
|
| 8 |
-
import numpy as np
|
| 9 |
-
import torch
|
| 10 |
-
from loguru import logger
|
| 11 |
-
from PIL import Image
|
| 12 |
-
from pytorch_grad_cam import GradCAMPlusPlus
|
| 13 |
-
from pytorch_grad_cam.utils.image import show_cam_on_image
|
| 14 |
-
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
|
| 15 |
-
|
| 16 |
-
from config import settings
|
| 17 |
-
from models.model_loader import get_model_loader
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class _HFLogitsWrapper(torch.nn.Module):
|
| 21 |
-
"""Wrap a HuggingFace image classification model so forward() returns logits
|
| 22 |
-
as a plain tensor (pytorch_grad_cam expects tensor outputs, not dicts/dataclasses).
|
| 23 |
-
"""
|
| 24 |
-
|
| 25 |
-
def __init__(self, model: torch.nn.Module) -> None:
|
| 26 |
-
super().__init__()
|
| 27 |
-
self.model = model
|
| 28 |
-
|
| 29 |
-
def forward(self, pixel_values: torch.Tensor) -> torch.Tensor: # type: ignore[override]
|
| 30 |
-
return self.model(pixel_values=pixel_values).logits
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
def _vit_reshape_transform(tensor: torch.Tensor, height: int = 14, width: int = 14) -> torch.Tensor:
|
| 34 |
-
"""Grad-CAM expects (B, C, H, W); ViT hidden states are (B, 1+H*W, C).
|
| 35 |
-
Drop the CLS token and reshape tokens into a spatial grid.
|
| 36 |
-
"""
|
| 37 |
-
result = tensor[:, 1:, :]
|
| 38 |
-
b, n, c = result.shape
|
| 39 |
-
result = result.reshape(b, height, width, c)
|
| 40 |
-
result = result.permute(0, 3, 1, 2) # (B, C, H, W)
|
| 41 |
-
return result
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
def _preprocess_for_cam(pil_img: Image.Image, processor) -> tuple[torch.Tensor, np.ndarray]:
|
| 45 |
-
"""Return (input_tensor, rgb_float_224) where rgb_float_224 is a (H,W,3) float
|
| 46 |
-
array in [0,1] matching the model input geometry — needed for overlaying.
|
| 47 |
-
"""
|
| 48 |
-
inputs = processor(images=pil_img, return_tensors="pt")
|
| 49 |
-
input_tensor = inputs["pixel_values"].to(settings.DEVICE)
|
| 50 |
-
|
| 51 |
-
size = getattr(processor, "size", {"height": 224, "width": 224})
|
| 52 |
-
h = size.get("height", 224) if isinstance(size, dict) else 224
|
| 53 |
-
w = size.get("width", 224) if isinstance(size, dict) else 224
|
| 54 |
-
|
| 55 |
-
resized = pil_img.resize((w, h), Image.BILINEAR)
|
| 56 |
-
rgb = np.array(resized).astype(np.float32) / 255.0 # (H,W,3) in [0,1]
|
| 57 |
-
return input_tensor, rgb
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
def _encode_overlay_to_base64(overlay: np.ndarray) -> str:
|
| 61 |
-
"""Encode a uint8 (H,W,3) RGB overlay to a base64 data-URL PNG."""
|
| 62 |
-
buf = io.BytesIO()
|
| 63 |
-
Image.fromarray(overlay).save(buf, format="PNG")
|
| 64 |
-
b64 = base64.b64encode(buf.getvalue()).decode("ascii")
|
| 65 |
-
return f"data:image/png;base64,{b64}"
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
def _compute_gradcam_pp(
|
| 69 |
-
pil_img: Image.Image,
|
| 70 |
-
target_class_idx: Optional[int] = None,
|
| 71 |
-
) -> tuple[np.ndarray, np.ndarray]:
|
| 72 |
-
"""Compute Grad-CAM++ averaged across the last 3 ViT encoder layers.
|
| 73 |
-
Returns (grayscale_cam, rgb_float) where grayscale_cam is (H,W) in [0,1].
|
| 74 |
-
"""
|
| 75 |
-
loader = get_model_loader()
|
| 76 |
-
model, processor = loader.load_image_model()
|
| 77 |
-
|
| 78 |
-
model.eval()
|
| 79 |
-
for p in model.parameters():
|
| 80 |
-
p.requires_grad_(True)
|
| 81 |
-
|
| 82 |
-
input_tensor, rgb_float = _preprocess_for_cam(pil_img, processor)
|
| 83 |
-
|
| 84 |
-
grid = int(model.config.image_size / model.config.patch_size)
|
| 85 |
-
|
| 86 |
-
# Average across last 3 ViT encoder layers for smoother heatmaps
|
| 87 |
-
num_layers = len(model.vit.encoder.layer)
|
| 88 |
-
last_n = min(3, num_layers)
|
| 89 |
-
target_layers = [
|
| 90 |
-
model.vit.encoder.layer[-(i + 1)].layernorm_before
|
| 91 |
-
for i in range(last_n)
|
| 92 |
-
]
|
| 93 |
-
|
| 94 |
-
wrapped = _HFLogitsWrapper(model)
|
| 95 |
-
|
| 96 |
-
targets = None
|
| 97 |
-
if target_class_idx is not None:
|
| 98 |
-
targets = [ClassifierOutputTarget(int(target_class_idx))]
|
| 99 |
-
|
| 100 |
-
with GradCAMPlusPlus(
|
| 101 |
-
model=wrapped,
|
| 102 |
-
target_layers=target_layers,
|
| 103 |
-
reshape_transform=lambda t: _vit_reshape_transform(t, grid, grid),
|
| 104 |
-
) as cam:
|
| 105 |
-
grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0] # (H,W) in [0,1]
|
| 106 |
-
|
| 107 |
-
return grayscale_cam, rgb_float
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def generate_heatmap_base64(
|
| 111 |
-
pil_img: Image.Image,
|
| 112 |
-
target_class_idx: Optional[int] = None,
|
| 113 |
-
) -> str:
|
| 114 |
-
"""Produce a base64 data-URL PNG of the Grad-CAM++ overlay for the given image."""
|
| 115 |
-
grayscale_cam, rgb_float = _compute_gradcam_pp(pil_img, target_class_idx)
|
| 116 |
-
overlay = show_cam_on_image(rgb_float, grayscale_cam, use_rgb=True)
|
| 117 |
-
logger.info(f"Heatmap generated ({overlay.shape[0]}x{overlay.shape[1]})")
|
| 118 |
-
return _encode_overlay_to_base64(overlay)
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
def generate_boxes_base64(
|
| 122 |
-
pil_img: Image.Image,
|
| 123 |
-
target_class_idx: Optional[int] = None,
|
| 124 |
-
top_k: int = 5,
|
| 125 |
-
threshold: float = 0.4,
|
| 126 |
-
) -> str:
|
| 127 |
-
"""Produce bounding boxes around top-K connected components from Grad-CAM++ activation.
|
| 128 |
-
Renders colored boxes (red/yellow/orange by intensity) on the original image.
|
| 129 |
-
"""
|
| 130 |
-
grayscale_cam, rgb_float = _compute_gradcam_pp(pil_img, target_class_idx)
|
| 131 |
-
|
| 132 |
-
h, w = rgb_float.shape[:2]
|
| 133 |
-
base_img = (rgb_float * 255).astype(np.uint8).copy()
|
| 134 |
-
|
| 135 |
-
# Threshold the heatmap to find activated regions
|
| 136 |
-
binary = (grayscale_cam >= threshold).astype(np.uint8) * 255
|
| 137 |
-
contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 138 |
-
|
| 139 |
-
if not contours:
|
| 140 |
-
logger.info("No significant activation regions found for bounding boxes")
|
| 141 |
-
return _encode_overlay_to_base64(base_img)
|
| 142 |
-
|
| 143 |
-
# Sort by area descending, take top_k
|
| 144 |
-
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:top_k]
|
| 145 |
-
|
| 146 |
-
# Color by mean activation intensity within each box
|
| 147 |
-
for cnt in contours:
|
| 148 |
-
x, y, bw, bh = cv2.boundingRect(cnt)
|
| 149 |
-
region_activation = grayscale_cam[y:y + bh, x:x + bw].mean()
|
| 150 |
-
|
| 151 |
-
if region_activation >= 0.7:
|
| 152 |
-
color = (220, 40, 40) # red — high suspicion
|
| 153 |
-
elif region_activation >= 0.5:
|
| 154 |
-
color = (240, 140, 20) # orange — medium
|
| 155 |
-
else:
|
| 156 |
-
color = (230, 200, 40) # yellow — lower
|
| 157 |
-
|
| 158 |
-
cv2.rectangle(base_img, (x, y), (x + bw, y + bh), color, 2)
|
| 159 |
-
label = f"{region_activation * 100:.0f}%"
|
| 160 |
-
cv2.putText(base_img, label, (x, max(y - 6, 12)),
|
| 161 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.4, color, 1, cv2.LINE_AA)
|
| 162 |
-
|
| 163 |
-
logger.info(f"Bounding boxes generated: {len(contours)} regions")
|
| 164 |
-
return _encode_overlay_to_base64(base_img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
image_service.py
DELETED
|
@@ -1,58 +0,0 @@
|
|
| 1 |
-
from __future__ import annotations
|
| 2 |
-
|
| 3 |
-
import io
|
| 4 |
-
from dataclasses import dataclass
|
| 5 |
-
from typing import Tuple
|
| 6 |
-
|
| 7 |
-
import torch
|
| 8 |
-
from loguru import logger
|
| 9 |
-
from PIL import Image
|
| 10 |
-
|
| 11 |
-
from config import settings
|
| 12 |
-
from models.model_loader import get_model_loader
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
@dataclass
|
| 16 |
-
class ImageClassification:
|
| 17 |
-
label: str
|
| 18 |
-
confidence: float
|
| 19 |
-
all_scores: dict[str, float]
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def load_image_from_bytes(data: bytes) -> Image.Image:
|
| 23 |
-
img = Image.open(io.BytesIO(data))
|
| 24 |
-
if img.mode != "RGB":
|
| 25 |
-
img = img.convert("RGB")
|
| 26 |
-
return img
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
def classify_image(pil_img: Image.Image) -> ImageClassification:
|
| 30 |
-
"""Run the ViT deepfake classifier on a PIL image."""
|
| 31 |
-
loader = get_model_loader()
|
| 32 |
-
model, processor = loader.load_image_model()
|
| 33 |
-
|
| 34 |
-
inputs = processor(images=pil_img, return_tensors="pt")
|
| 35 |
-
inputs = {k: v.to(settings.DEVICE) for k, v in inputs.items()}
|
| 36 |
-
|
| 37 |
-
with torch.no_grad():
|
| 38 |
-
outputs = model(**inputs)
|
| 39 |
-
logits = outputs.logits # (1, num_labels)
|
| 40 |
-
probs = torch.softmax(logits, dim=-1)[0]
|
| 41 |
-
|
| 42 |
-
id2label: dict[int, str] = getattr(model.config, "id2label", {})
|
| 43 |
-
all_scores = {id2label.get(i, str(i)): float(p.item()) for i, p in enumerate(probs)}
|
| 44 |
-
top_idx = int(torch.argmax(probs).item())
|
| 45 |
-
top_label = id2label.get(top_idx, str(top_idx))
|
| 46 |
-
top_conf = float(probs[top_idx].item())
|
| 47 |
-
|
| 48 |
-
logger.info(f"Image classify → {top_label} @ {top_conf:.3f}")
|
| 49 |
-
return ImageClassification(label=top_label, confidence=top_conf, all_scores=all_scores)
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
def preprocess_and_classify(raw_bytes: bytes) -> Tuple[Image.Image, ImageClassification]:
|
| 53 |
-
"""Convenience: decode bytes → PIL → classify. Returns the PIL image too so
|
| 54 |
-
downstream steps (heatmap, artifact scan) can reuse it.
|
| 55 |
-
"""
|
| 56 |
-
pil = load_image_from_bytes(raw_bytes)
|
| 57 |
-
result = classify_image(pil)
|
| 58 |
-
return pil, result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm_explainer.py
DELETED
|
@@ -1,191 +0,0 @@
|
|
| 1 |
-
"""LLM Explainability Card — Phase 12.3
|
| 2 |
-
|
| 3 |
-
Generates a plain-English summary paragraph + 3 key-signal bullets from the
|
| 4 |
-
full analysis payload. Supports Gemini (default) and OpenAI providers.
|
| 5 |
-
Results are cached per record_id to avoid re-spending tokens.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from __future__ import annotations
|
| 9 |
-
|
| 10 |
-
import json
|
| 11 |
-
from abc import ABC, abstractmethod
|
| 12 |
-
from functools import lru_cache
|
| 13 |
-
from typing import Any
|
| 14 |
-
|
| 15 |
-
from loguru import logger
|
| 16 |
-
|
| 17 |
-
from config import settings
|
| 18 |
-
from schemas.common import LLMExplainabilitySummary
|
| 19 |
-
|
| 20 |
-
# ── In-memory cache keyed by record_id ──
|
| 21 |
-
_cache: dict[str, LLMExplainabilitySummary] = {}
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
_PROMPT_TEMPLATE = """\
|
| 25 |
-
You are DeepShield's explainability engine. Given the JSON analysis payload below,
|
| 26 |
-
write a concise, accessible summary for a non-technical user.
|
| 27 |
-
|
| 28 |
-
**Output format (strict JSON only — no markdown fences):**
|
| 29 |
-
{{
|
| 30 |
-
"paragraph": "<2-3 sentence plain-English summary of the verdict and key signals>",
|
| 31 |
-
"bullets": [
|
| 32 |
-
"<key signal 1>",
|
| 33 |
-
"<key signal 2>",
|
| 34 |
-
"<key signal 3>"
|
| 35 |
-
]
|
| 36 |
-
}}
|
| 37 |
-
|
| 38 |
-
Rules:
|
| 39 |
-
- Be factual. State what the analysis found, not what you speculate.
|
| 40 |
-
- Reference specific indicators (e.g. "GAN artifact score", "EXIF metadata", "sensationalism level").
|
| 41 |
-
- If the verdict is "Likely Authentic", reassure the user and explain why.
|
| 42 |
-
- If the verdict is "Likely Manipulated" or "Suspicious", highlight the strongest evidence.
|
| 43 |
-
- Keep the paragraph under 60 words. Each bullet under 20 words.
|
| 44 |
-
|
| 45 |
-
**Analysis payload:**
|
| 46 |
-
{payload_json}
|
| 47 |
-
"""
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
class _LLMProvider(ABC):
|
| 51 |
-
@abstractmethod
|
| 52 |
-
def generate(self, prompt: str) -> str:
|
| 53 |
-
"""Send prompt to LLM and return raw text response."""
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
class _GeminiProvider(_LLMProvider):
|
| 57 |
-
def __init__(self) -> None:
|
| 58 |
-
import google.generativeai as genai
|
| 59 |
-
genai.configure(api_key=settings.LLM_API_KEY)
|
| 60 |
-
self._model = genai.GenerativeModel(settings.LLM_MODEL)
|
| 61 |
-
|
| 62 |
-
def generate(self, prompt: str) -> str:
|
| 63 |
-
response = self._model.generate_content(prompt)
|
| 64 |
-
return response.text
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
class _OpenAIProvider(_LLMProvider):
|
| 68 |
-
def __init__(self) -> None:
|
| 69 |
-
from openai import OpenAI
|
| 70 |
-
self._client = OpenAI(api_key=settings.LLM_API_KEY)
|
| 71 |
-
|
| 72 |
-
def generate(self, prompt: str) -> str:
|
| 73 |
-
response = self._client.chat.completions.create(
|
| 74 |
-
model=settings.LLM_MODEL,
|
| 75 |
-
messages=[{"role": "user", "content": prompt}],
|
| 76 |
-
temperature=0.3,
|
| 77 |
-
max_tokens=300,
|
| 78 |
-
)
|
| 79 |
-
return response.choices[0].message.content
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
@lru_cache(maxsize=1)
|
| 83 |
-
def _get_provider() -> _LLMProvider:
|
| 84 |
-
"""Lazy-init the configured LLM provider (singleton)."""
|
| 85 |
-
provider_name = settings.LLM_PROVIDER.lower()
|
| 86 |
-
if provider_name == "openai":
|
| 87 |
-
return _OpenAIProvider()
|
| 88 |
-
return _GeminiProvider()
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
def _parse_llm_response(raw: str) -> tuple[str, list[str]]:
|
| 92 |
-
"""Parse the LLM's JSON response into (paragraph, bullets).
|
| 93 |
-
Handles cases where the LLM wraps output in markdown fences.
|
| 94 |
-
"""
|
| 95 |
-
text = raw.strip()
|
| 96 |
-
# Strip markdown code fences if present
|
| 97 |
-
if text.startswith("```"):
|
| 98 |
-
lines = text.split("\n")
|
| 99 |
-
# Remove first and last fence lines
|
| 100 |
-
lines = [l for l in lines if not l.strip().startswith("```")]
|
| 101 |
-
text = "\n".join(lines).strip()
|
| 102 |
-
|
| 103 |
-
parsed = json.loads(text)
|
| 104 |
-
paragraph = parsed.get("paragraph", "")
|
| 105 |
-
bullets = parsed.get("bullets", [])
|
| 106 |
-
if not isinstance(bullets, list):
|
| 107 |
-
bullets = [str(bullets)]
|
| 108 |
-
return paragraph, bullets[:3]
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
def generate_llm_summary(
|
| 112 |
-
payload: dict[str, Any],
|
| 113 |
-
record_id: str | None = None,
|
| 114 |
-
) -> LLMExplainabilitySummary:
|
| 115 |
-
"""Generate an LLM-powered plain-English explanation for an analysis result.
|
| 116 |
-
|
| 117 |
-
Args:
|
| 118 |
-
payload: The full analysis response dict (verdict, scores, indicators, etc.).
|
| 119 |
-
record_id: Optional cache key. If provided and cached, returns cached result.
|
| 120 |
-
|
| 121 |
-
Returns:
|
| 122 |
-
LLMExplainabilitySummary with paragraph, bullets, and model info.
|
| 123 |
-
"""
|
| 124 |
-
# Check cache
|
| 125 |
-
if record_id and record_id in _cache:
|
| 126 |
-
logger.debug(f"LLM summary cache hit for record_id={record_id}")
|
| 127 |
-
cached = _cache[record_id]
|
| 128 |
-
cached.cached = True
|
| 129 |
-
return cached
|
| 130 |
-
|
| 131 |
-
# Guard: no API key configured
|
| 132 |
-
if not settings.LLM_API_KEY:
|
| 133 |
-
logger.warning("LLM_API_KEY not set — using deterministic fallback summary")
|
| 134 |
-
|
| 135 |
-
verdict_data = payload.get("verdict", {})
|
| 136 |
-
label = verdict_data.get("label", "Unknown")
|
| 137 |
-
score = verdict_data.get("authenticity_score", 50)
|
| 138 |
-
|
| 139 |
-
return LLMExplainabilitySummary(
|
| 140 |
-
paragraph=f"The DeepShield AI engine has analyzed this media and determined it is '{label}' with an authenticity score of {score}/100. We arrived at this conclusion by passing the file through our deepfake detection algorithms, artifact scanners, and metadata analyzers.",
|
| 141 |
-
bullets=[
|
| 142 |
-
f"Overall Authenticity Score: {score}/100",
|
| 143 |
-
f"Primary Verdict: {label}",
|
| 144 |
-
"Note: Configure an LLM API key for deeper contextual analysis."
|
| 145 |
-
],
|
| 146 |
-
model_used="static-fallback",
|
| 147 |
-
)
|
| 148 |
-
|
| 149 |
-
# Strip heavy base64 fields to reduce token usage
|
| 150 |
-
slim_payload = {k: v for k, v in payload.items()
|
| 151 |
-
if k not in ("explainability",)}
|
| 152 |
-
# Include explainability but strip base64 images
|
| 153 |
-
if "explainability" in payload and isinstance(payload["explainability"], dict):
|
| 154 |
-
expl = {k: v for k, v in payload["explainability"].items()
|
| 155 |
-
if not k.endswith("_base64")}
|
| 156 |
-
slim_payload["explainability"] = expl
|
| 157 |
-
|
| 158 |
-
prompt = _PROMPT_TEMPLATE.format(payload_json=json.dumps(slim_payload, indent=2, default=str))
|
| 159 |
-
|
| 160 |
-
try:
|
| 161 |
-
provider = _get_provider()
|
| 162 |
-
raw_response = provider.generate(prompt)
|
| 163 |
-
paragraph, bullets = _parse_llm_response(raw_response)
|
| 164 |
-
|
| 165 |
-
summary = LLMExplainabilitySummary(
|
| 166 |
-
paragraph=paragraph,
|
| 167 |
-
bullets=bullets,
|
| 168 |
-
model_used=f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}",
|
| 169 |
-
)
|
| 170 |
-
|
| 171 |
-
# Cache result
|
| 172 |
-
if record_id:
|
| 173 |
-
_cache[record_id] = summary
|
| 174 |
-
|
| 175 |
-
logger.info(f"LLM summary generated via {settings.LLM_PROVIDER}/{settings.LLM_MODEL}")
|
| 176 |
-
return summary
|
| 177 |
-
|
| 178 |
-
except json.JSONDecodeError as e:
|
| 179 |
-
logger.error(f"LLM returned unparseable JSON: {e}")
|
| 180 |
-
return LLMExplainabilitySummary(
|
| 181 |
-
paragraph="Analysis complete. See the detailed indicators below for specifics.",
|
| 182 |
-
bullets=["LLM explanation could not be parsed"],
|
| 183 |
-
model_used=f"{settings.LLM_PROVIDER}/{settings.LLM_MODEL}",
|
| 184 |
-
)
|
| 185 |
-
except Exception as e:
|
| 186 |
-
logger.error(f"LLM explainer failed: {e}")
|
| 187 |
-
return LLMExplainabilitySummary(
|
| 188 |
-
paragraph="Analysis complete. See the detailed indicators below for specifics.",
|
| 189 |
-
bullets=["LLM explanation temporarily unavailable"],
|
| 190 |
-
model_used="error",
|
| 191 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
logs/deepshield.log
ADDED
|
@@ -0,0 +1,949 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-04-22 18:24:59.601 | INFO | main:lifespan:83 - Starting DeepShield backend
|
| 2 |
+
2026-04-22 18:24:59.655 | INFO | main:lifespan:85 - Database initialized
|
| 3 |
+
2026-04-22 18:24:59.656 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 4 |
+
2026-04-22 18:25:06.201 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 5 |
+
2026-04-22 18:25:06.206 | INFO | services.report_service:cleanup_expired:151 - Cleaned up 1 expired reports
|
| 6 |
+
2026-04-22 18:26:20.263 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 7 |
+
2026-04-22 18:26:22.700 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
|
| 8 |
+
2026-04-22 18:26:23.034 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.078 ffpp=n/a eff=0.18335410952568054 → 0.131
|
| 9 |
+
2026-04-22 18:26:28.349 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
|
| 10 |
+
2026-04-22 18:26:28.390 | INFO | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
|
| 11 |
+
2026-04-22 18:26:29.238 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
|
| 12 |
+
2026-04-22 18:26:29.277 | INFO | services.ela_service:generate_ela_base64:60 - ELA map generated (256x256)
|
| 13 |
+
2026-04-22 18:26:30.141 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
|
| 14 |
+
2026-04-22 18:26:30.327 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 15 |
+
2026-04-22 18:26:30.347 | INFO | api.v1.analyze:analyze_image:214 - Saved AnalysisRecord id=19 score=13 verdict=Very Likely Fake
|
| 16 |
+
2026-04-22 18:26:30.349 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: No module named 'google.generativeai'
|
| 17 |
+
2026-04-22 18:26:30.349 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: No module named 'google.generativeai'
|
| 18 |
+
2026-04-22 18:27:58.805 | INFO | main:lifespan:93 - Shutting down DeepShield backend
|
| 19 |
+
2026-04-22 18:28:09.692 | INFO | main:lifespan:83 - Starting DeepShield backend
|
| 20 |
+
2026-04-22 18:28:09.698 | INFO | main:lifespan:85 - Database initialized
|
| 21 |
+
2026-04-22 18:28:09.698 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 22 |
+
2026-04-22 18:28:11.556 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 23 |
+
2026-04-24 01:50:58.220 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 24 |
+
2026-04-24 01:51:03.592 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
|
| 25 |
+
2026-04-24 01:51:03.887 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.597 ffpp=n/a eff=n/a → 0.597
|
| 26 |
+
2026-04-24 01:51:12.975 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
|
| 27 |
+
2026-04-24 01:51:13.089 | INFO | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
|
| 28 |
+
2026-04-24 01:51:13.255 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
|
| 29 |
+
2026-04-24 01:51:13.320 | INFO | services.ela_service:generate_ela_base64:60 - ELA map generated (640x427)
|
| 30 |
+
2026-04-24 01:51:14.648 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 1 regions
|
| 31 |
+
2026-04-24 01:51:14.933 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 32 |
+
2026-04-24 01:51:14.979 | INFO | api.v1.analyze:analyze_image:215 - Saved AnalysisRecord id=20 score=40 verdict=Likely Fake
|
| 33 |
+
2026-04-24 01:51:14.982 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: No module named 'google.generativeai'
|
| 34 |
+
2026-04-24 01:51:14.984 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: No module named 'google.generativeai'
|
| 35 |
+
2026-04-24 07:35:53.458 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 36 |
+
2026-04-24 07:36:02.194 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 37 |
+
2026-04-24 07:36:03.057 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
|
| 38 |
+
2026-04-24 07:36:03.058 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 68 (High) excl=4 caps=3 cb=1 emo=1
|
| 39 |
+
2026-04-24 07:36:03.061 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
|
| 40 |
+
2026-04-24 07:36:05.585 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 41 |
+
2026-04-24 07:36:06.959 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=21 text score=15 verdict=Very Likely Fake
|
| 42 |
+
2026-04-24 07:36:08.561 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 43 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 44 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 45 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 46 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 47 |
+
Please retry in 51.884484839s. [links {
|
| 48 |
+
description: "Learn more about Gemini API quotas"
|
| 49 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 50 |
+
}
|
| 51 |
+
, violations {
|
| 52 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 53 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 54 |
+
quota_dimensions {
|
| 55 |
+
key: "model"
|
| 56 |
+
value: "gemini-2.5-pro"
|
| 57 |
+
}
|
| 58 |
+
quota_dimensions {
|
| 59 |
+
key: "location"
|
| 60 |
+
value: "global"
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
violations {
|
| 64 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 65 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 66 |
+
quota_dimensions {
|
| 67 |
+
key: "model"
|
| 68 |
+
value: "gemini-2.5-pro"
|
| 69 |
+
}
|
| 70 |
+
quota_dimensions {
|
| 71 |
+
key: "location"
|
| 72 |
+
value: "global"
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
violations {
|
| 76 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 77 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 78 |
+
quota_dimensions {
|
| 79 |
+
key: "model"
|
| 80 |
+
value: "gemini-2.5-pro"
|
| 81 |
+
}
|
| 82 |
+
quota_dimensions {
|
| 83 |
+
key: "location"
|
| 84 |
+
value: "global"
|
| 85 |
+
}
|
| 86 |
+
}
|
| 87 |
+
violations {
|
| 88 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 89 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 90 |
+
quota_dimensions {
|
| 91 |
+
key: "model"
|
| 92 |
+
value: "gemini-2.5-pro"
|
| 93 |
+
}
|
| 94 |
+
quota_dimensions {
|
| 95 |
+
key: "location"
|
| 96 |
+
value: "global"
|
| 97 |
+
}
|
| 98 |
+
}
|
| 99 |
+
, retry_delay {
|
| 100 |
+
seconds: 51
|
| 101 |
+
}
|
| 102 |
+
]
|
| 103 |
+
2026-04-24 07:36:41.979 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 104 |
+
2026-04-24 07:36:47.524 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 105 |
+
2026-04-24 07:36:48.484 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 106 |
+
2026-04-24 07:36:49.759 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
|
| 107 |
+
2026-04-24 07:36:49.848 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.521 ffpp=n/a eff=n/a → 0.521
|
| 108 |
+
2026-04-24 07:36:51.638 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
|
| 109 |
+
2026-04-24 07:36:51.638 | WARNING | services.artifact_detector:detect_face_based_artifacts:213 - Face-based artifact detection failed: module 'mediapipe' has no attribute 'solutions'
|
| 110 |
+
2026-04-24 07:36:51.649 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
|
| 111 |
+
2026-04-24 07:36:51.696 | INFO | services.ela_service:generate_ela_base64:60 - ELA map generated (512x512)
|
| 112 |
+
2026-04-24 07:36:52.470 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
|
| 113 |
+
2026-04-24 07:36:52.519 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 114 |
+
2026-04-24 07:36:52.542 | INFO | api.v1.analyze:analyze_image:215 - Saved AnalysisRecord id=22 score=48 verdict=Possibly Manipulated
|
| 115 |
+
2026-04-24 07:36:53.674 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 116 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 117 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 118 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 119 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 120 |
+
Please retry in 6.748563195s. [links {
|
| 121 |
+
description: "Learn more about Gemini API quotas"
|
| 122 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 123 |
+
}
|
| 124 |
+
, violations {
|
| 125 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 126 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 127 |
+
quota_dimensions {
|
| 128 |
+
key: "model"
|
| 129 |
+
value: "gemini-2.5-pro"
|
| 130 |
+
}
|
| 131 |
+
quota_dimensions {
|
| 132 |
+
key: "location"
|
| 133 |
+
value: "global"
|
| 134 |
+
}
|
| 135 |
+
}
|
| 136 |
+
violations {
|
| 137 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 138 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 139 |
+
quota_dimensions {
|
| 140 |
+
key: "model"
|
| 141 |
+
value: "gemini-2.5-pro"
|
| 142 |
+
}
|
| 143 |
+
quota_dimensions {
|
| 144 |
+
key: "location"
|
| 145 |
+
value: "global"
|
| 146 |
+
}
|
| 147 |
+
}
|
| 148 |
+
violations {
|
| 149 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 150 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 151 |
+
quota_dimensions {
|
| 152 |
+
key: "model"
|
| 153 |
+
value: "gemini-2.5-pro"
|
| 154 |
+
}
|
| 155 |
+
quota_dimensions {
|
| 156 |
+
key: "location"
|
| 157 |
+
value: "global"
|
| 158 |
+
}
|
| 159 |
+
}
|
| 160 |
+
violations {
|
| 161 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 162 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 163 |
+
quota_dimensions {
|
| 164 |
+
key: "model"
|
| 165 |
+
value: "gemini-2.5-pro"
|
| 166 |
+
}
|
| 167 |
+
quota_dimensions {
|
| 168 |
+
key: "location"
|
| 169 |
+
value: "global"
|
| 170 |
+
}
|
| 171 |
+
}
|
| 172 |
+
, retry_delay {
|
| 173 |
+
seconds: 6
|
| 174 |
+
}
|
| 175 |
+
]
|
| 176 |
+
2026-04-24 07:36:54.760 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 177 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 178 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 179 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 180 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 181 |
+
Please retry in 5.653927512s. [links {
|
| 182 |
+
description: "Learn more about Gemini API quotas"
|
| 183 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 184 |
+
}
|
| 185 |
+
, violations {
|
| 186 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 187 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 188 |
+
quota_dimensions {
|
| 189 |
+
key: "model"
|
| 190 |
+
value: "gemini-2.5-pro"
|
| 191 |
+
}
|
| 192 |
+
quota_dimensions {
|
| 193 |
+
key: "location"
|
| 194 |
+
value: "global"
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
violations {
|
| 198 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 199 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 200 |
+
quota_dimensions {
|
| 201 |
+
key: "model"
|
| 202 |
+
value: "gemini-2.5-pro"
|
| 203 |
+
}
|
| 204 |
+
quota_dimensions {
|
| 205 |
+
key: "location"
|
| 206 |
+
value: "global"
|
| 207 |
+
}
|
| 208 |
+
}
|
| 209 |
+
violations {
|
| 210 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 211 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 212 |
+
quota_dimensions {
|
| 213 |
+
key: "model"
|
| 214 |
+
value: "gemini-2.5-pro"
|
| 215 |
+
}
|
| 216 |
+
quota_dimensions {
|
| 217 |
+
key: "location"
|
| 218 |
+
value: "global"
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
violations {
|
| 222 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 223 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 224 |
+
quota_dimensions {
|
| 225 |
+
key: "model"
|
| 226 |
+
value: "gemini-2.5-pro"
|
| 227 |
+
}
|
| 228 |
+
quota_dimensions {
|
| 229 |
+
key: "location"
|
| 230 |
+
value: "global"
|
| 231 |
+
}
|
| 232 |
+
}
|
| 233 |
+
, retry_delay {
|
| 234 |
+
seconds: 5
|
| 235 |
+
}
|
| 236 |
+
]
|
| 237 |
+
2026-04-24 15:16:36.138 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 238 |
+
2026-04-24 15:16:43.946 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 239 |
+
2026-04-24 15:16:44.719 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
|
| 240 |
+
2026-04-24 15:16:44.721 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
|
| 241 |
+
2026-04-24 15:16:44.723 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
|
| 242 |
+
2026-04-24 15:16:45.864 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 243 |
+
2026-04-24 15:16:47.113 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=23 text score=15 verdict=Very Likely Fake
|
| 244 |
+
2026-04-24 15:16:48.348 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 245 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 246 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 247 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 248 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 249 |
+
Please retry in 12.294521515s. [links {
|
| 250 |
+
description: "Learn more about Gemini API quotas"
|
| 251 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 252 |
+
}
|
| 253 |
+
, violations {
|
| 254 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 255 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 256 |
+
quota_dimensions {
|
| 257 |
+
key: "model"
|
| 258 |
+
value: "gemini-2.5-pro"
|
| 259 |
+
}
|
| 260 |
+
quota_dimensions {
|
| 261 |
+
key: "location"
|
| 262 |
+
value: "global"
|
| 263 |
+
}
|
| 264 |
+
}
|
| 265 |
+
violations {
|
| 266 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 267 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 268 |
+
quota_dimensions {
|
| 269 |
+
key: "model"
|
| 270 |
+
value: "gemini-2.5-pro"
|
| 271 |
+
}
|
| 272 |
+
quota_dimensions {
|
| 273 |
+
key: "location"
|
| 274 |
+
value: "global"
|
| 275 |
+
}
|
| 276 |
+
}
|
| 277 |
+
violations {
|
| 278 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 279 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 280 |
+
quota_dimensions {
|
| 281 |
+
key: "model"
|
| 282 |
+
value: "gemini-2.5-pro"
|
| 283 |
+
}
|
| 284 |
+
quota_dimensions {
|
| 285 |
+
key: "location"
|
| 286 |
+
value: "global"
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
violations {
|
| 290 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 291 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 292 |
+
quota_dimensions {
|
| 293 |
+
key: "model"
|
| 294 |
+
value: "gemini-2.5-pro"
|
| 295 |
+
}
|
| 296 |
+
quota_dimensions {
|
| 297 |
+
key: "location"
|
| 298 |
+
value: "global"
|
| 299 |
+
}
|
| 300 |
+
}
|
| 301 |
+
, retry_delay {
|
| 302 |
+
seconds: 12
|
| 303 |
+
}
|
| 304 |
+
]
|
| 305 |
+
2026-04-24 15:16:48.553 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 306 |
+
2026-04-24 15:16:50.111 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 307 |
+
2026-04-24 15:16:51.265 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 308 |
+
2026-04-24 15:16:52.685 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
|
| 309 |
+
2026-04-24 15:16:52.723 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.517 ffpp=n/a eff=n/a → 0.517
|
| 310 |
+
2026-04-24 15:16:52.735 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
|
| 311 |
+
2026-04-24 15:16:54.934 | WARNING | services.artifact_detector:detect_face_based_artifacts:211 - Face-based artifact detection failed: module 'mediapipe' has no attribute 'solutions'
|
| 312 |
+
2026-04-24 15:16:54.949 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
|
| 313 |
+
2026-04-24 15:16:54.965 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (256x256)
|
| 314 |
+
2026-04-24 15:16:55.916 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
|
| 315 |
+
2026-04-24 15:16:55.975 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 316 |
+
2026-04-24 15:16:55.989 | INFO | api.v1.analyze:analyze_image:214 - Saved AnalysisRecord id=24 score=48 verdict=Possibly Manipulated
|
| 317 |
+
2026-04-24 15:16:56.236 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 318 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 319 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 320 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 321 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 322 |
+
Please retry in 4.477916448s. [links {
|
| 323 |
+
description: "Learn more about Gemini API quotas"
|
| 324 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 325 |
+
}
|
| 326 |
+
, violations {
|
| 327 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 328 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 329 |
+
quota_dimensions {
|
| 330 |
+
key: "model"
|
| 331 |
+
value: "gemini-2.5-pro"
|
| 332 |
+
}
|
| 333 |
+
quota_dimensions {
|
| 334 |
+
key: "location"
|
| 335 |
+
value: "global"
|
| 336 |
+
}
|
| 337 |
+
}
|
| 338 |
+
violations {
|
| 339 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 340 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 341 |
+
quota_dimensions {
|
| 342 |
+
key: "model"
|
| 343 |
+
value: "gemini-2.5-pro"
|
| 344 |
+
}
|
| 345 |
+
quota_dimensions {
|
| 346 |
+
key: "location"
|
| 347 |
+
value: "global"
|
| 348 |
+
}
|
| 349 |
+
}
|
| 350 |
+
violations {
|
| 351 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 352 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 353 |
+
quota_dimensions {
|
| 354 |
+
key: "model"
|
| 355 |
+
value: "gemini-2.5-pro"
|
| 356 |
+
}
|
| 357 |
+
quota_dimensions {
|
| 358 |
+
key: "location"
|
| 359 |
+
value: "global"
|
| 360 |
+
}
|
| 361 |
+
}
|
| 362 |
+
violations {
|
| 363 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 364 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 365 |
+
quota_dimensions {
|
| 366 |
+
key: "model"
|
| 367 |
+
value: "gemini-2.5-pro"
|
| 368 |
+
}
|
| 369 |
+
quota_dimensions {
|
| 370 |
+
key: "location"
|
| 371 |
+
value: "global"
|
| 372 |
+
}
|
| 373 |
+
}
|
| 374 |
+
, retry_delay {
|
| 375 |
+
seconds: 4
|
| 376 |
+
}
|
| 377 |
+
]
|
| 378 |
+
2026-04-24 15:16:57.419 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:104 - VLM breakdown failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 379 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 380 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 381 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 382 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 383 |
+
Please retry in 3.282459328s. [links {
|
| 384 |
+
description: "Learn more about Gemini API quotas"
|
| 385 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 386 |
+
}
|
| 387 |
+
, violations {
|
| 388 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 389 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 390 |
+
quota_dimensions {
|
| 391 |
+
key: "model"
|
| 392 |
+
value: "gemini-2.5-pro"
|
| 393 |
+
}
|
| 394 |
+
quota_dimensions {
|
| 395 |
+
key: "location"
|
| 396 |
+
value: "global"
|
| 397 |
+
}
|
| 398 |
+
}
|
| 399 |
+
violations {
|
| 400 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 401 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 402 |
+
quota_dimensions {
|
| 403 |
+
key: "model"
|
| 404 |
+
value: "gemini-2.5-pro"
|
| 405 |
+
}
|
| 406 |
+
quota_dimensions {
|
| 407 |
+
key: "location"
|
| 408 |
+
value: "global"
|
| 409 |
+
}
|
| 410 |
+
}
|
| 411 |
+
violations {
|
| 412 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 413 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 414 |
+
quota_dimensions {
|
| 415 |
+
key: "model"
|
| 416 |
+
value: "gemini-2.5-pro"
|
| 417 |
+
}
|
| 418 |
+
quota_dimensions {
|
| 419 |
+
key: "location"
|
| 420 |
+
value: "global"
|
| 421 |
+
}
|
| 422 |
+
}
|
| 423 |
+
violations {
|
| 424 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 425 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 426 |
+
quota_dimensions {
|
| 427 |
+
key: "model"
|
| 428 |
+
value: "gemini-2.5-pro"
|
| 429 |
+
}
|
| 430 |
+
quota_dimensions {
|
| 431 |
+
key: "location"
|
| 432 |
+
value: "global"
|
| 433 |
+
}
|
| 434 |
+
}
|
| 435 |
+
, retry_delay {
|
| 436 |
+
seconds: 3
|
| 437 |
+
}
|
| 438 |
+
]
|
| 439 |
+
2026-04-24 15:16:57.445 | INFO | models.model_loader:load_ocr_engine:130 - Loading EasyOCR reader (langs: ['en', 'hi'])
|
| 440 |
+
2026-04-24 15:17:27.399 | INFO | models.model_loader:load_ocr_engine:136 - EasyOCR loaded
|
| 441 |
+
2026-04-24 15:17:27.870 | INFO | services.screenshot_service:run_ocr:48 - OCR extracted 0 text regions
|
| 442 |
+
2026-04-24 15:17:27.881 | INFO | api.v1.analyze:analyze_screenshot_endpoint:726 - Saved AnalysisRecord id=25 screenshot score=50 verdict=Possibly Manipulated
|
| 443 |
+
2026-04-24 15:17:28.066 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 444 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 445 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 446 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 447 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 448 |
+
Please retry in 32.593323033s. [links {
|
| 449 |
+
description: "Learn more about Gemini API quotas"
|
| 450 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 451 |
+
}
|
| 452 |
+
, violations {
|
| 453 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 454 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 455 |
+
quota_dimensions {
|
| 456 |
+
key: "model"
|
| 457 |
+
value: "gemini-2.5-pro"
|
| 458 |
+
}
|
| 459 |
+
quota_dimensions {
|
| 460 |
+
key: "location"
|
| 461 |
+
value: "global"
|
| 462 |
+
}
|
| 463 |
+
}
|
| 464 |
+
violations {
|
| 465 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 466 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 467 |
+
quota_dimensions {
|
| 468 |
+
key: "model"
|
| 469 |
+
value: "gemini-2.5-pro"
|
| 470 |
+
}
|
| 471 |
+
quota_dimensions {
|
| 472 |
+
key: "location"
|
| 473 |
+
value: "global"
|
| 474 |
+
}
|
| 475 |
+
}
|
| 476 |
+
violations {
|
| 477 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 478 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 479 |
+
quota_dimensions {
|
| 480 |
+
key: "model"
|
| 481 |
+
value: "gemini-2.5-pro"
|
| 482 |
+
}
|
| 483 |
+
quota_dimensions {
|
| 484 |
+
key: "location"
|
| 485 |
+
value: "global"
|
| 486 |
+
}
|
| 487 |
+
}
|
| 488 |
+
violations {
|
| 489 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 490 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 491 |
+
quota_dimensions {
|
| 492 |
+
key: "model"
|
| 493 |
+
value: "gemini-2.5-pro"
|
| 494 |
+
}
|
| 495 |
+
quota_dimensions {
|
| 496 |
+
key: "location"
|
| 497 |
+
value: "global"
|
| 498 |
+
}
|
| 499 |
+
}
|
| 500 |
+
, retry_delay {
|
| 501 |
+
seconds: 32
|
| 502 |
+
}
|
| 503 |
+
]
|
| 504 |
+
2026-04-24 15:17:54.819 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 505 |
+
2026-04-24 15:18:00.795 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 506 |
+
2026-04-24 15:18:00.888 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
|
| 507 |
+
2026-04-24 15:18:00.889 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
|
| 508 |
+
2026-04-24 15:18:00.891 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
|
| 509 |
+
2026-04-24 15:18:01.659 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 510 |
+
2026-04-24 15:18:02.878 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=26 text score=15 verdict=Very Likely Fake
|
| 511 |
+
2026-04-24 15:18:03.994 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 512 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 513 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.5-pro
|
| 514 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 515 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.5-pro
|
| 516 |
+
Please retry in 56.638939454s. [links {
|
| 517 |
+
description: "Learn more about Gemini API quotas"
|
| 518 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 519 |
+
}
|
| 520 |
+
, violations {
|
| 521 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 522 |
+
quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
|
| 523 |
+
quota_dimensions {
|
| 524 |
+
key: "model"
|
| 525 |
+
value: "gemini-2.5-pro"
|
| 526 |
+
}
|
| 527 |
+
quota_dimensions {
|
| 528 |
+
key: "location"
|
| 529 |
+
value: "global"
|
| 530 |
+
}
|
| 531 |
+
}
|
| 532 |
+
violations {
|
| 533 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 534 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 535 |
+
quota_dimensions {
|
| 536 |
+
key: "model"
|
| 537 |
+
value: "gemini-2.5-pro"
|
| 538 |
+
}
|
| 539 |
+
quota_dimensions {
|
| 540 |
+
key: "location"
|
| 541 |
+
value: "global"
|
| 542 |
+
}
|
| 543 |
+
}
|
| 544 |
+
violations {
|
| 545 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 546 |
+
quota_id: "GenerateContentInputTokensPerModelPerMinute-FreeTier"
|
| 547 |
+
quota_dimensions {
|
| 548 |
+
key: "model"
|
| 549 |
+
value: "gemini-2.5-pro"
|
| 550 |
+
}
|
| 551 |
+
quota_dimensions {
|
| 552 |
+
key: "location"
|
| 553 |
+
value: "global"
|
| 554 |
+
}
|
| 555 |
+
}
|
| 556 |
+
violations {
|
| 557 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_input_token_count"
|
| 558 |
+
quota_id: "GenerateContentInputTokensPerModelPerDay-FreeTier"
|
| 559 |
+
quota_dimensions {
|
| 560 |
+
key: "model"
|
| 561 |
+
value: "gemini-2.5-pro"
|
| 562 |
+
}
|
| 563 |
+
quota_dimensions {
|
| 564 |
+
key: "location"
|
| 565 |
+
value: "global"
|
| 566 |
+
}
|
| 567 |
+
}
|
| 568 |
+
, retry_delay {
|
| 569 |
+
seconds: 56
|
| 570 |
+
}
|
| 571 |
+
]
|
| 572 |
+
2026-04-24 15:20:38.285 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 573 |
+
2026-04-24 15:20:43.929 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 574 |
+
2026-04-24 15:20:44.034 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.998 fake_p=0.998
|
| 575 |
+
2026-04-24 15:20:44.035 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 67 (High) excl=3 caps=2 cb=1 emo=1
|
| 576 |
+
2026-04-24 15:20:44.037 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 3 found
|
| 577 |
+
2026-04-24 15:20:44.806 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 578 |
+
2026-04-24 15:20:46.001 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=27 text score=15 verdict=Very Likely Fake
|
| 579 |
+
2026-04-24 15:20:56.376 | INFO | services.llm_explainer:generate_llm_summary:175 - LLM summary generated via gemini/gemini-2.5-flash
|
| 580 |
+
2026-04-24 15:33:56.592 | INFO | api.v1.auth:register:33 - Registered user id=3 email=***@example.com
|
| 581 |
+
2026-04-24 15:33:57.227 | INFO | api.v1.auth:login:42 - Login user id=3 email=***@example.com
|
| 582 |
+
2026-04-24 15:33:57.553 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 583 |
+
2026-04-24 15:34:06.986 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 584 |
+
2026-04-24 15:34:07.731 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.997 fake_p=0.997
|
| 585 |
+
2026-04-24 15:34:07.733 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 586 |
+
2026-04-24 15:34:07.736 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 587 |
+
2026-04-24 15:34:09.017 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 588 |
+
2026-04-24 15:34:10.285 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=28 text score=30 verdict=Likely Fake
|
| 589 |
+
2026-04-24 15:34:41.718 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 590 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
|
| 591 |
+
Please retry in 19.188761533s. [links {
|
| 592 |
+
description: "Learn more about Gemini API quotas"
|
| 593 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 594 |
+
}
|
| 595 |
+
, violations {
|
| 596 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 597 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 598 |
+
quota_dimensions {
|
| 599 |
+
key: "model"
|
| 600 |
+
value: "gemini-2.5-flash"
|
| 601 |
+
}
|
| 602 |
+
quota_dimensions {
|
| 603 |
+
key: "location"
|
| 604 |
+
value: "global"
|
| 605 |
+
}
|
| 606 |
+
quota_value: 5
|
| 607 |
+
}
|
| 608 |
+
, retry_delay {
|
| 609 |
+
seconds: 19
|
| 610 |
+
}
|
| 611 |
+
]
|
| 612 |
+
2026-04-24 15:34:41.788 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.997 fake_p=0.997
|
| 613 |
+
2026-04-24 15:34:41.788 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 76 (High) excl=3 caps=2 cb=1 emo=3
|
| 614 |
+
2026-04-24 15:34:41.789 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 615 |
+
2026-04-24 15:34:41.791 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 616 |
+
2026-04-24 15:34:43.147 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=29 text score=15 verdict=Very Likely Fake
|
| 617 |
+
2026-04-24 15:34:43.555 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 618 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
|
| 619 |
+
Please retry in 17.333464233s. [links {
|
| 620 |
+
description: "Learn more about Gemini API quotas"
|
| 621 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 622 |
+
}
|
| 623 |
+
, violations {
|
| 624 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 625 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 626 |
+
quota_dimensions {
|
| 627 |
+
key: "model"
|
| 628 |
+
value: "gemini-2.5-flash"
|
| 629 |
+
}
|
| 630 |
+
quota_dimensions {
|
| 631 |
+
key: "location"
|
| 632 |
+
value: "global"
|
| 633 |
+
}
|
| 634 |
+
quota_value: 5
|
| 635 |
+
}
|
| 636 |
+
, retry_delay {
|
| 637 |
+
seconds: 17
|
| 638 |
+
}
|
| 639 |
+
]
|
| 640 |
+
2026-04-24 15:34:43.615 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.996 fake_p=0.996
|
| 641 |
+
2026-04-24 15:34:43.616 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 642 |
+
2026-04-24 15:34:43.616 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 643 |
+
2026-04-24 15:34:43.618 | WARNING | models.model_loader:load_spacy_nlp:98 - spaCy model 'en_core_web_sm' not found. Run: python -m spacy download en_core_web_sm
|
| 644 |
+
2026-04-24 15:34:44.924 | INFO | api.v1.analyze:analyze_text_endpoint:549 - Saved AnalysisRecord id=30 text score=30 verdict=Likely Fake
|
| 645 |
+
2026-04-24 15:34:45.353 | ERROR | services.llm_explainer:generate_llm_summary:186 - LLM explainer failed: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit.
|
| 646 |
+
* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 5, model: gemini-2.5-flash
|
| 647 |
+
Please retry in 15.553103918s. [links {
|
| 648 |
+
description: "Learn more about Gemini API quotas"
|
| 649 |
+
url: "https://ai.google.dev/gemini-api/docs/rate-limits"
|
| 650 |
+
}
|
| 651 |
+
, violations {
|
| 652 |
+
quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
|
| 653 |
+
quota_id: "GenerateRequestsPerMinutePerProjectPerModel-FreeTier"
|
| 654 |
+
quota_dimensions {
|
| 655 |
+
key: "model"
|
| 656 |
+
value: "gemini-2.5-flash"
|
| 657 |
+
}
|
| 658 |
+
quota_dimensions {
|
| 659 |
+
key: "location"
|
| 660 |
+
value: "global"
|
| 661 |
+
}
|
| 662 |
+
quota_value: 5
|
| 663 |
+
}
|
| 664 |
+
, retry_delay {
|
| 665 |
+
seconds: 15
|
| 666 |
+
}
|
| 667 |
+
]
|
| 668 |
+
2026-04-24 15:43:27.438 | INFO | api.v1.auth:register:33 - Registered user id=4 email=***@example.com
|
| 669 |
+
2026-04-24 15:43:27.463 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 670 |
+
2026-04-24 15:43:33.684 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 671 |
+
2026-04-24 15:43:33.796 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
|
| 672 |
+
2026-04-24 15:43:33.797 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 673 |
+
2026-04-24 15:43:33.799 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 674 |
+
2026-04-24 15:43:35.106 | INFO | models.model_loader:load_spacy_nlp:96 - spaCy en_core_web_sm loaded
|
| 675 |
+
2026-04-24 15:43:35.120 | INFO | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
|
| 676 |
+
2026-04-24 15:43:36.284 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=31 text score=31 verdict=Likely Fake
|
| 677 |
+
2026-04-24 15:43:36.352 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
|
| 678 |
+
2026-04-24 15:43:36.352 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 679 |
+
2026-04-24 15:43:36.353 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 680 |
+
2026-04-24 15:43:36.370 | INFO | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
|
| 681 |
+
2026-04-24 15:43:37.567 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=32 text score=31 verdict=Likely Fake
|
| 682 |
+
2026-04-24 15:43:47.549 | INFO | services.llm_explainer:generate_llm_summary:207 - LLM summary generated via gemini/gemini-2.5-flash
|
| 683 |
+
2026-04-24 15:43:47.614 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.991 fake_p=0.991
|
| 684 |
+
2026-04-24 15:43:47.614 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 685 |
+
2026-04-24 15:43:47.615 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 686 |
+
2026-04-24 15:43:47.630 | INFO | services.text_service:extract_entities:253 - NER extracted 3 entities: ['India', 'Elon Musk', 'New Delhi']
|
| 687 |
+
2026-04-24 15:43:49.134 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=33 text score=31 verdict=Likely Fake
|
| 688 |
+
2026-04-24 15:44:11.346 | WARNING | services.llm_explainer:mark_rate_limited:42 - LLM rate-limited — pausing all LLM calls for 300s
|
| 689 |
+
2026-04-24 15:44:11.346 | WARNING | services.llm_explainer:generate_llm_summary:220 - LLM quota hit (ResourceExhausted) — circuit open for 300s
|
| 690 |
+
2026-04-24 15:44:11.352 | WARNING | services.llm_explainer:mark_rate_limited:42 - LLM rate-limited — pausing all LLM calls for 5s
|
| 691 |
+
2026-04-24 15:44:11.404 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
|
| 692 |
+
2026-04-24 15:44:11.404 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 693 |
+
2026-04-24 15:44:11.405 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 694 |
+
2026-04-24 15:44:12.724 | INFO | api.v1.analyze:analyze_text_endpoint:550 - Saved AnalysisRecord id=34 text score=30 verdict=Likely Fake
|
| 695 |
+
2026-04-24 15:57:39.916 | INFO | api.v1.auth:register:33 - Registered user id=5 email=***@example.com
|
| 696 |
+
2026-04-24 15:57:39.958 | INFO | models.model_loader:load_text_model:57 - Loading text model: jy46604790/Fake-News-Bert-Detect
|
| 697 |
+
2026-04-24 15:57:46.475 | INFO | models.model_loader:load_text_model:65 - Text model loaded
|
| 698 |
+
2026-04-24 15:57:46.582 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
|
| 699 |
+
2026-04-24 15:57:46.584 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 700 |
+
2026-04-24 15:57:46.586 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 701 |
+
2026-04-24 15:57:47.954 | INFO | models.model_loader:load_spacy_nlp:96 - spaCy en_core_web_sm loaded
|
| 702 |
+
2026-04-24 15:57:49.166 | INFO | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=35 text score=30 verdict=Likely Fake
|
| 703 |
+
2026-04-24 15:57:58.130 | INFO | services.llm_explainer:generate_llm_summary:271 - LLM summary generated via gemini/gemini-2.5-flash
|
| 704 |
+
2026-04-24 15:57:58.196 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
|
| 705 |
+
2026-04-24 15:57:58.197 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 706 |
+
2026-04-24 15:57:58.197 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 707 |
+
2026-04-24 15:57:59.705 | INFO | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=36 text score=30 verdict=Likely Fake
|
| 708 |
+
2026-04-24 15:58:02.948 | ERROR | services.llm_explainer:generate_llm_summary:287 - LLM explainer failed: 503 UNAVAILABLE. {'error': {'code': 503, 'message': 'This model is currently experiencing high demand. Spikes in demand are usually temporary. Please try again later.', 'status': 'UNAVAILABLE'}}
|
| 709 |
+
2026-04-24 15:58:03.008 | INFO | services.text_service:classify_text:159 - Text classify [en] → LABEL_0 @ 0.999 fake_p=0.999
|
| 710 |
+
2026-04-24 15:58:03.008 | INFO | services.text_service:score_sensationalism:193 - Sensationalism → 0 (Low) excl=0 caps=0 cb=0 emo=0
|
| 711 |
+
2026-04-24 15:58:03.009 | INFO | services.text_service:detect_manipulation_indicators:213 - Manipulation indicators → 0 found
|
| 712 |
+
2026-04-24 15:58:04.488 | INFO | api.v1.analyze:analyze_text_endpoint:555 - Saved AnalysisRecord id=37 text score=30 verdict=Likely Fake
|
| 713 |
+
2026-04-24 15:59:52.694 | INFO | services.llm_explainer:_get_provider:176 - LLM chain initialized: gemini/gemini-2.5-flash → groq/llama-3.3-70b-versatile
|
| 714 |
+
2026-04-24 15:59:52.695 | INFO | services.llm_explainer:generate:161 - gemini/gemini-2.5-flash quota hit — failing over to groq/llama-3.3-70b-versatile
|
| 715 |
+
2026-04-24 23:15:36.409 | INFO | main:lifespan:108 - Starting DeepShield backend
|
| 716 |
+
2026-04-24 23:15:36.470 | INFO | main:lifespan:110 - Database initialized
|
| 717 |
+
2026-04-24 23:15:36.470 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 718 |
+
2026-04-24 23:15:46.404 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 719 |
+
2026-04-24 23:15:57.188 | INFO | api.v1.analyze:analyze_image:118 - cache hit image sha=6de55b9fc5bd record=19
|
| 720 |
+
2026-04-24 23:16:59.860 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 721 |
+
2026-04-24 23:17:03.920 | INFO | services.efficientnet_service:__init__:97 - EfficientNetDetector ready: EfficientNetAutoAttB4/DFDC on cpu | calibrator=no
|
| 722 |
+
2026-04-24 23:17:04.519 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.868 ffpp=n/a eff=0.03269108012318611 → 0.450
|
| 723 |
+
2026-04-24 23:17:04.569 | INFO | models.model_loader:load_face_detector:142 - Loading MediaPipe FaceMesh
|
| 724 |
+
2026-04-24 23:17:13.315 | INFO | models.model_loader:load_face_detector:150 - MediaPipe FaceMesh loaded
|
| 725 |
+
2026-04-24 23:17:16.988 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
|
| 726 |
+
2026-04-24 23:17:17.131 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (800x450)
|
| 727 |
+
2026-04-24 23:17:18.394 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
|
| 728 |
+
2026-04-24 23:17:18.714 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 729 |
+
2026-04-24 23:17:18.757 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=38 score=45 verdict=Possibly Manipulated
|
| 730 |
+
2026-04-24 23:29:04.622 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 731 |
+
2026-04-24 23:29:05.312 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Fake | vit=0.767 ffpp=n/a eff=0.36121347546577454 → 0.564
|
| 732 |
+
2026-04-24 23:29:06.604 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
|
| 733 |
+
2026-04-24 23:29:10.091 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (2393x4096)
|
| 734 |
+
2026-04-24 23:29:11.326 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
|
| 735 |
+
2026-04-24 23:29:11.344 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 736 |
+
2026-04-24 23:29:11.436 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=39 score=44 verdict=Possibly Manipulated
|
| 737 |
+
2026-04-24 23:30:58.303 | ERROR | api.v1.report:generate:51 - Report generation failed: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
|
| 738 |
+
Traceback (most recent call last):
|
| 739 |
+
|
| 740 |
+
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\threading.py", line 1002, in _bootstrap
|
| 741 |
+
self._bootstrap_inner()
|
| 742 |
+
│ └ <function Thread._bootstrap_inner at 0x000001A73BF11A80>
|
| 743 |
+
└ <WorkerThread(AnyIO worker thread, started 18584)>
|
| 744 |
+
File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\threading.py", line 1045, in _bootstrap_inner
|
| 745 |
+
self.run()
|
| 746 |
+
│ └ <function WorkerThread.run at 0x000001A7030349A0>
|
| 747 |
+
└ <WorkerThread(AnyIO worker thread, started 18584)>
|
| 748 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\anyio\_backends\_asyncio.py", line 1002, in run
|
| 749 |
+
result = context.run(func, *args)
|
| 750 |
+
│ │ │ └ ()
|
| 751 |
+
│ │ └ functools.partial(<function generate at 0x000001A7011BA0C0>, db=<sqlalchemy.orm.session.Session object at 0x000001A70D16E390>...
|
| 752 |
+
│ └ <method 'run' of '_contextvars.Context' objects>
|
| 753 |
+
└ <_contextvars.Context object at 0x000001A70D16CD40>
|
| 754 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\slowapi\extension.py", line 766, in sync_wrapper
|
| 755 |
+
response = func(*args, **kwargs)
|
| 756 |
+
│ │ └ {'db': <sqlalchemy.orm.session.Session object at 0x000001A70D16E390>, 'user': None, 'analysis_id': 39, 'request': <starlette....
|
| 757 |
+
│ └ ()
|
| 758 |
+
└ <function generate at 0x000001A7011BA160>
|
| 759 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\slowapi\extension.py", line 766, in sync_wrapper
|
| 760 |
+
response = func(*args, **kwargs)
|
| 761 |
+
│ │ └ {'db': <sqlalchemy.orm.session.Session object at 0x000001A70D16E390>, 'user': None, 'analysis_id': 39, 'request': <starlette....
|
| 762 |
+
│ └ ()
|
| 763 |
+
└ <function generate at 0x000001A7011BA020>
|
| 764 |
+
|
| 765 |
+
> File "C:\Users\athar\Desktop\minor2\backend\api\v1\report.py", line 49, in generate
|
| 766 |
+
path = generate_report(record)
|
| 767 |
+
│ └ <db.models.AnalysisRecord object at 0x000001A70D17A2D0>
|
| 768 |
+
└ <function generate_report at 0x000001A7011B9D00>
|
| 769 |
+
|
| 770 |
+
File "C:\Users\athar\Desktop\minor2\backend\services\report_service.py", line 119, in generate_report
|
| 771 |
+
html_to_pdf(html, out_path)
|
| 772 |
+
│ │ └ WindowsPath('temp_reports/deepshield_39_c2b71295.pdf')
|
| 773 |
+
│ └ '<!DOCTYPE html>\n<html>\n<head>\n <meta charset="utf-8" />\n <title>DeepShield Analysis Report — c9f44067-528d-4e96-9365-2...
|
| 774 |
+
└ <function html_to_pdf at 0x000001A7011B9C60>
|
| 775 |
+
|
| 776 |
+
File "C:\Users\athar\Desktop\minor2\backend\services\report_service.py", line 107, in html_to_pdf
|
| 777 |
+
result = pisa.CreatePDF(html, dest=f)
|
| 778 |
+
│ │ │ └ <_io.BufferedWriter name='temp_reports\\deepshield_39_c2b71295.pdf'>
|
| 779 |
+
│ │ └ '<!DOCTYPE html>\n<html>\n<head>\n <meta charset="utf-8" />\n <title>DeepShield Analysis Report — c9f44067-528d-4e96-9365-2...
|
| 780 |
+
│ └ <function pisaDocument at 0x000001A7011B9440>
|
| 781 |
+
└ <module 'xhtml2pdf.pisa' from 'C:\\Users\\athar\\Desktop\\minor2\\backend\\.venv\\Lib\\site-packages\\xhtml2pdf\\pisa.py'>
|
| 782 |
+
|
| 783 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\document.py", line 196, in pisaDocument
|
| 784 |
+
doc.build(context.story)
|
| 785 |
+
│ │ │ └ [PmlParagraph(
|
| 786 |
+
│ │ │ 'dir'
|
| 787 |
+
│ │ │ 'dir'
|
| 788 |
+
│ │ │ 'caseSensitive'
|
| 789 |
+
│ │ │ 'caseSensitive'
|
| 790 |
+
│ │ │ 'encoding'
|
| 791 |
+
│ │ │ 'encoding'
|
| 792 |
+
│ │ │ 'text'
|
| 793 |
+
│ │ │ 'text...
|
| 794 |
+
│ │ └ <xhtml2pdf.context.pisaContext object at 0x000001A703A22990>
|
| 795 |
+
│ └ <function BaseDocTemplate.build at 0x000001A77EFA8E00>
|
| 796 |
+
└ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
|
| 797 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\doctemplate.py", line 1083, in build
|
| 798 |
+
self.handle_flowable(flowables)
|
| 799 |
+
│ │ └ [PmlParagraph(
|
| 800 |
+
│ │ 'dir'
|
| 801 |
+
│ │ 'dir'
|
| 802 |
+
│ │ 'caseSensitive'
|
| 803 |
+
│ │ 'caseSensitive'
|
| 804 |
+
│ │ 'encoding'
|
| 805 |
+
│ │ 'encoding'
|
| 806 |
+
│ │ 'text'
|
| 807 |
+
│ │ 'text...
|
| 808 |
+
│ └ <function BaseDocTemplate.handle_flowable at 0x000001A77EFA8B80>
|
| 809 |
+
└ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
|
| 810 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\doctemplate.py", line 932, in handle_flowable
|
| 811 |
+
if frame.add(f, canv, trySplit=self.allowSplitting):
|
| 812 |
+
│ │ │ │ │ └ 1
|
| 813 |
+
│ │ │ │ └ <xhtml2pdf.xhtml2pdf_reportlab.PmlBaseDoc object at 0x000001A703756C10>
|
| 814 |
+
│ │ │ └ <reportlab.pdfgen.canvas.Canvas object at 0x000001A70D1DED50>
|
| 815 |
+
│ │ └ PmlTable(
|
| 816 |
+
│ │ rowHeights=[None],
|
| 817 |
+
│ │ colWidths=[4.93228346456693, 488.29606299212605],
|
| 818 |
+
│ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 819 |
+
│ └ <function Frame._add at 0x000001A77EECDF80>
|
| 820 |
+
└ <reportlab.platypus.frames.Frame object at 0x000001A70344D6D0>
|
| 821 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\frames.py", line 158, in _add
|
| 822 |
+
w, h = flowable.wrap(aW, h)
|
| 823 |
+
│ │ │ └ 751.1811023622049
|
| 824 |
+
│ │ └ 493.228346456693
|
| 825 |
+
│ └ <function PmlTable.wrap at 0x000001A7011719E0>
|
| 826 |
+
└ PmlTable(
|
| 827 |
+
rowHeights=[None],
|
| 828 |
+
colWidths=[4.93228346456693, 488.29606299212605],
|
| 829 |
+
[[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 830 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\xhtml2pdf_reportlab.py", line 858, in wrap
|
| 831 |
+
return Table.wrap(self, availWidth, availHeight)
|
| 832 |
+
│ │ │ │ └ 751.1811023622049
|
| 833 |
+
│ │ │ └ 493.228346456693
|
| 834 |
+
│ │ └ PmlTable(
|
| 835 |
+
│ │ rowHeights=[None],
|
| 836 |
+
│ │ colWidths=[4.93228346456693, 488.29606299212605],
|
| 837 |
+
│ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 838 |
+
│ └ <function Table.wrap at 0x000001A77EFAC400>
|
| 839 |
+
└ <class 'reportlab.platypus.tables.Table'>
|
| 840 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 1354, in wrap
|
| 841 |
+
self._calc(availWidth, availHeight)
|
| 842 |
+
│ │ │ └ 751.1811023622049
|
| 843 |
+
│ │ └ 493.228346456693
|
| 844 |
+
│ └ <function Table._calc at 0x000001A77EFAB600>
|
| 845 |
+
└ PmlTable(
|
| 846 |
+
rowHeights=[None],
|
| 847 |
+
colWidths=[4.93228346456693, 488.29606299212605],
|
| 848 |
+
[[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 849 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 740, in _calc
|
| 850 |
+
self._calc_height(availHeight,availWidth,W=W)
|
| 851 |
+
│ │ │ │ └ None
|
| 852 |
+
│ │ │ └ 493.228346456693
|
| 853 |
+
│ │ └ 751.1811023622049
|
| 854 |
+
│ └ <function Table._calc_height at 0x000001A77EFAB560>
|
| 855 |
+
└ PmlTable(
|
| 856 |
+
rowHeights=[None],
|
| 857 |
+
colWidths=[4.93228346456693, 488.29606299212605],
|
| 858 |
+
[[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 859 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 664, in _calc_height
|
| 860 |
+
dW,t = self._listCellGeom(v,w or self._listValueWidth(v),s)
|
| 861 |
+
│ │ │ │ │ │ │ └ <CellStyle '(0, 0)'>
|
| 862 |
+
│ │ │ │ │ │ └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
|
| 863 |
+
│ │ │ │ │ └ <function Table._listValueWidth at 0x000001A77EFAB380>
|
| 864 |
+
│ │ │ │ └ PmlTable(
|
| 865 |
+
│ │ │ │ rowHeights=[None],
|
| 866 |
+
│ │ │ │ colWidths=[4.93228346456693, 488.29606299212605],
|
| 867 |
+
│ │ │ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 868 |
+
│ │ │ └ 4.93228346456693
|
| 869 |
+
│ │ └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
|
| 870 |
+
│ └ <function PmlTable._listCellGeom at 0x000001A701171940>
|
| 871 |
+
└ PmlTable(
|
| 872 |
+
rowHeights=[None],
|
| 873 |
+
colWidths=[4.93228346456693, 488.29606299212605],
|
| 874 |
+
[[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 875 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\xhtml2pdf\xhtml2pdf_reportlab.py", line 810, in _listCellGeom
|
| 876 |
+
return Table._listCellGeom(self, V, w, s, W=W, H=H, aH=aH)
|
| 877 |
+
│ │ │ │ │ │ │ │ └ 751.1811023622049
|
| 878 |
+
│ │ │ │ │ │ │ └ None
|
| 879 |
+
│ │ │ │ │ │ └ None
|
| 880 |
+
│ │ │ │ │ └ <CellStyle '(0, 0)'>
|
| 881 |
+
│ │ │ │ └ 4.93228346456693
|
| 882 |
+
│ │ │ └ (<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInFrame object at 0x000001A70D1F4950>,)
|
| 883 |
+
│ │ └ PmlTable(
|
| 884 |
+
│ │ rowHeights=[None],
|
| 885 |
+
│ │ colWidths=[4.93228346456693, 488.29606299212605],
|
| 886 |
+
│ │ [[(<xhtml2pdf.xhtml2pdf_reportlab.PmlKeepInF...
|
| 887 |
+
│ └ <function Table._listCellGeom at 0x000001A77EFAB2E0>
|
| 888 |
+
└ <class 'reportlab.platypus.tables.Table'>
|
| 889 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 490, in _listCellGeom
|
| 890 |
+
raise ValueError(f'{self.identity()}: flowable given negative availWidth={aW} == width={w} - leftPadding={s.leftPadding} - rightPadding={s.rightPadding}')
|
| 891 |
+
File "C:\Users\athar\Desktop\minor2\backend\.venv\Lib\site-packages\reportlab\platypus\tables.py", line 440, in identity
|
| 892 |
+
tallest = '(tallest row %d)' % int(max(rh))
|
| 893 |
+
└ [None]
|
| 894 |
+
|
| 895 |
+
TypeError: int() argument must be a string, a bytes-like object or a real number, not 'NoneType'
|
| 896 |
+
2026-04-24 23:44:20.465 | INFO | api.v1.auth:register:33 - Registered user id=6 email=***@gmail.com
|
| 897 |
+
2026-04-24 23:45:54.152 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 898 |
+
2026-04-24 23:45:54.595 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.668 ffpp=n/a eff=0.00913542602211237 → 0.339
|
| 899 |
+
2026-04-24 23:45:55.772 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
|
| 900 |
+
2026-04-24 23:45:58.926 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (2268x4032)
|
| 901 |
+
2026-04-24 23:46:00.276 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 2 regions
|
| 902 |
+
2026-04-24 23:46:00.291 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=Google, model=Pixel 7 Pro, adjustment=-20 (valid camera metadata (Make/Model/DateTime); GPS coordinates present)
|
| 903 |
+
2026-04-24 23:46:00.379 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=40 score=14 verdict=Very Likely Fake
|
| 904 |
+
2026-04-24 23:46:00.382 | ERROR | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
|
| 905 |
+
2026-04-24 23:46:00.386 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
|
| 906 |
+
2026-04-24 23:47:37.291 | INFO | services.report_service:generate_report:120 - Report generated id=40 path=temp_reports\deepshield_40_3f0f8ff7.pdf size=14978B
|
| 907 |
+
2026-04-24 23:50:59.570 | INFO | api.v1.auth:login:42 - Login user id=6 email=***@gmail.com
|
| 908 |
+
2026-04-25 02:48:29.295 | INFO | services.report_service:cleanup_expired:149 - Cleaned up 2 expired reports
|
| 909 |
+
2026-04-25 02:48:29.419 | WARNING | services.report_service:cleanup_expired:149 - Cleanup failed for temp_reports\deepshield_40_3f0f8ff7.pdf: [WinError 2] The system cannot find the file specified: 'temp_reports\\deepshield_40_3f0f8ff7.pdf'
|
| 910 |
+
2026-04-25 21:48:15.075 | INFO | main:lifespan:108 - Starting DeepShield backend
|
| 911 |
+
2026-04-25 21:48:15.082 | INFO | main:lifespan:110 - Database initialized
|
| 912 |
+
2026-04-25 21:48:15.082 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 913 |
+
2026-04-25 21:48:18.709 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 914 |
+
2026-04-25 21:48:18.712 | INFO | main:lifespan:118 - Shutting down DeepShield backend
|
| 915 |
+
2026-04-25 21:52:02.663 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 916 |
+
2026-04-25 21:52:03.239 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.870 ffpp=n/a eff=0.0529196597635746 → 0.462
|
| 917 |
+
2026-04-25 21:52:04.390 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
|
| 918 |
+
2026-04-25 21:52:04.682 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (1223x640)
|
| 919 |
+
2026-04-25 21:52:05.863 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 5 regions
|
| 920 |
+
2026-04-25 21:52:05.883 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 921 |
+
2026-04-25 21:52:05.927 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=41 score=46 verdict=Possibly Manipulated
|
| 922 |
+
2026-04-25 22:02:22.021 | INFO | main:lifespan:108 - Starting DeepShield backend
|
| 923 |
+
2026-04-25 22:02:22.057 | INFO | main:lifespan:110 - Database initialized
|
| 924 |
+
2026-04-25 22:02:22.057 | INFO | models.model_loader:load_image_model:43 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 925 |
+
2026-04-25 22:02:30.014 | INFO | models.model_loader:load_image_model:51 - Image model loaded
|
| 926 |
+
2026-04-25 22:13:05.431 | INFO | api.v1.auth:login:42 - Login user id=6 email=***@gmail.com
|
| 927 |
+
2026-04-25 22:13:28.224 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 928 |
+
2026-04-25 22:13:28.471 | INFO | services.image_service:classify_image:152 - Image classify (vit_only) → Fake | vit=0.694 ffpp=n/a eff=n/a → 0.694
|
| 929 |
+
2026-04-25 22:13:28.859 | INFO | models.heatmap_generator:generate_heatmap_base64:176 - EfficientNet heatmap skipped — no face detected
|
| 930 |
+
2026-04-25 22:13:31.674 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (2268x4032)
|
| 931 |
+
2026-04-25 22:13:33.044 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 2 regions
|
| 932 |
+
2026-04-25 22:13:33.062 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=Apple, model=iPhone 16 Pro, adjustment=-20 (valid camera metadata (Make/Model/DateTime); GPS coordinates present)
|
| 933 |
+
2026-04-25 22:13:33.166 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=42 score=11 verdict=Very Likely Fake
|
| 934 |
+
2026-04-25 22:13:33.169 | ERROR | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
|
| 935 |
+
2026-04-25 22:13:33.171 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
|
| 936 |
+
2026-04-26 22:05:50.626 | INFO | main:lifespan:108 - Starting DeepShield backend
|
| 937 |
+
2026-04-26 22:05:50.640 | INFO | main:lifespan:110 - Database initialized
|
| 938 |
+
2026-04-26 22:05:50.641 | INFO | models.model_loader:load_image_model:44 - Loading image model: prithivMLmods/Deep-Fake-Detector-v2-Model
|
| 939 |
+
2026-04-26 22:05:58.170 | INFO | models.model_loader:load_image_model:52 - Image model loaded
|
| 940 |
+
2026-04-26 22:07:47.526 | WARNING | models.model_loader:load_ffpp_model:193 - FFPP ViT checkpoint not found at C:\Users\athar\Desktop\trained_models — skipping
|
| 941 |
+
2026-04-26 22:07:48.484 | INFO | services.image_service:classify_image:152 - Image classify (average_vit_eff) → Real | vit=0.834 ffpp=n/a eff=0.02755815163254738 → 0.431
|
| 942 |
+
2026-04-26 22:07:50.164 | INFO | models.heatmap_generator:generate_heatmap_base64:186 - Heatmap generated (224x224) source=gradcam++
|
| 943 |
+
2026-04-26 22:07:50.584 | INFO | services.ela_service:generate_ela_base64:59 - ELA map generated (1290x1290)
|
| 944 |
+
2026-04-26 22:07:52.661 | INFO | models.heatmap_generator:generate_boxes_base64:232 - Bounding boxes generated: 1 regions
|
| 945 |
+
2026-04-26 22:07:52.670 | INFO | services.exif_service:extract_exif:127 - EXIF extracted: make=None, model=None, adjustment=0 (no EXIF metadata found)
|
| 946 |
+
2026-04-26 22:07:52.747 | INFO | api.v1.analyze:analyze_image:230 - Saved AnalysisRecord id=43 score=43 verdict=Possibly Manipulated
|
| 947 |
+
2026-04-26 22:07:52.752 | ERROR | services.llm_explainer:generate_llm_summary:296 - LLM explainer failed: cannot import name 'genai' from 'google' (unknown location)
|
| 948 |
+
2026-04-26 22:07:52.756 | ERROR | services.vlm_breakdown:generate_vlm_breakdown:114 - VLM breakdown failed: cannot import name 'genai' from 'google' (unknown location)
|
| 949 |
+
2026-04-26 22:09:45.469 | INFO | services.report_service:generate_report:120 - Report generated id=43 path=temp_reports\deepshield_43_262befa5.pdf size=15602B
|
main.py
CHANGED
|
@@ -1,17 +1,98 @@
|
|
| 1 |
import asyncio
|
|
|
|
|
|
|
| 2 |
from contextlib import asynccontextmanager
|
| 3 |
|
| 4 |
from fastapi import FastAPI
|
| 5 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
|
| 6 |
from loguru import logger
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
from api.router import api_router
|
| 9 |
from config import settings
|
| 10 |
from db.database import init_db
|
| 11 |
from models.model_loader import get_model_loader
|
|
|
|
| 12 |
from services.report_service import cleanup_expired
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
async def _report_cleanup_loop():
|
| 16 |
while True:
|
| 17 |
try:
|
|
@@ -23,6 +104,7 @@ async def _report_cleanup_loop():
|
|
| 23 |
|
| 24 |
@asynccontextmanager
|
| 25 |
async def lifespan(app: FastAPI):
|
|
|
|
| 26 |
logger.info("Starting DeepShield backend")
|
| 27 |
init_db()
|
| 28 |
logger.info("Database initialized")
|
|
@@ -43,16 +125,32 @@ app = FastAPI(
|
|
| 43 |
lifespan=lifespan,
|
| 44 |
)
|
| 45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
app.add_middleware(
|
| 47 |
CORSMiddleware,
|
| 48 |
allow_origins=settings.CORS_ORIGINS,
|
| 49 |
allow_credentials=True,
|
| 50 |
-
allow_methods=["
|
| 51 |
-
allow_headers=["
|
| 52 |
)
|
| 53 |
|
| 54 |
app.include_router(api_router)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
@app.get("/")
|
| 58 |
def root():
|
|
|
|
| 1 |
import asyncio
|
| 2 |
+
import secrets
|
| 3 |
+
import sys
|
| 4 |
from contextlib import asynccontextmanager
|
| 5 |
|
| 6 |
from fastapi import FastAPI
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from fastapi.staticfiles import StaticFiles
|
| 9 |
from loguru import logger
|
| 10 |
+
from slowapi import _rate_limit_exceeded_handler
|
| 11 |
+
from slowapi.errors import RateLimitExceeded
|
| 12 |
+
|
| 13 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
| 14 |
+
from starlette.responses import JSONResponse
|
| 15 |
|
| 16 |
from api.router import api_router
|
| 17 |
from config import settings
|
| 18 |
from db.database import init_db
|
| 19 |
from models.model_loader import get_model_loader
|
| 20 |
+
from services.rate_limit import RateLimitContextMiddleware, limiter
|
| 21 |
from services.report_service import cleanup_expired
|
| 22 |
|
| 23 |
|
| 24 |
+
class ContentLengthLimitMiddleware(BaseHTTPMiddleware):
|
| 25 |
+
"""Reject oversized uploads via Content-Length header before reading body.
|
| 26 |
+
Saves bandwidth + memory vs letting read_upload_bytes reject post-read."""
|
| 27 |
+
|
| 28 |
+
def __init__(self, app, max_bytes: int) -> None:
|
| 29 |
+
super().__init__(app)
|
| 30 |
+
self._max = max_bytes
|
| 31 |
+
|
| 32 |
+
async def dispatch(self, request, call_next):
|
| 33 |
+
cl = request.headers.get("content-length")
|
| 34 |
+
if cl and cl.isdigit() and int(cl) > self._max:
|
| 35 |
+
return JSONResponse(
|
| 36 |
+
status_code=413,
|
| 37 |
+
content={"detail": f"Upload exceeds {self._max // (1024 * 1024)} MB limit"},
|
| 38 |
+
)
|
| 39 |
+
return await call_next(request)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# === Phase 15.3 — JWT / CORS / logging hardening ===
|
| 43 |
+
|
| 44 |
+
_DEFAULT_JWT_SECRET = "change-me-in-production"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _enforce_production_hardening() -> None:
|
| 48 |
+
"""Refuse to start in production with unsafe defaults (Phase 15.3)."""
|
| 49 |
+
if settings.JWT_SECRET_KEY == _DEFAULT_JWT_SECRET or not settings.JWT_SECRET_KEY:
|
| 50 |
+
example = secrets.token_urlsafe(48)
|
| 51 |
+
if settings.DEBUG:
|
| 52 |
+
logger.warning(
|
| 53 |
+
"JWT_SECRET_KEY is unset or default — safe in dev only. "
|
| 54 |
+
f"Set it before deploying. Example: {example}"
|
| 55 |
+
)
|
| 56 |
+
else:
|
| 57 |
+
logger.error(
|
| 58 |
+
"Refusing to start: JWT_SECRET_KEY is unset or default. "
|
| 59 |
+
f"Set JWT_SECRET_KEY in your environment. Example: {example}"
|
| 60 |
+
)
|
| 61 |
+
sys.exit(1)
|
| 62 |
+
if "*" in settings.CORS_ORIGINS and not settings.DEBUG:
|
| 63 |
+
logger.error(
|
| 64 |
+
"Refusing to start: CORS_ORIGINS contains '*' while allow_credentials=True. "
|
| 65 |
+
"Set an explicit origin list."
|
| 66 |
+
)
|
| 67 |
+
sys.exit(1)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _configure_logging() -> None:
|
| 71 |
+
"""Rotate + retain logs, scrub emails."""
|
| 72 |
+
import re
|
| 73 |
+
|
| 74 |
+
email_re = re.compile(r"([A-Za-z0-9._%+-]+)@([A-Za-z0-9.-]+\.[A-Za-z]{2,})")
|
| 75 |
+
|
| 76 |
+
def _scrub(record):
|
| 77 |
+
msg = record["message"]
|
| 78 |
+
record["message"] = email_re.sub(r"***@\2", msg)
|
| 79 |
+
return True
|
| 80 |
+
|
| 81 |
+
logger.remove()
|
| 82 |
+
logger.add(sys.stderr, filter=_scrub, level="INFO")
|
| 83 |
+
logger.add(
|
| 84 |
+
"logs/deepshield.log",
|
| 85 |
+
rotation="10 MB",
|
| 86 |
+
retention="7 days",
|
| 87 |
+
filter=_scrub,
|
| 88 |
+
level="INFO",
|
| 89 |
+
enqueue=True,
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
_configure_logging()
|
| 94 |
+
|
| 95 |
+
|
| 96 |
async def _report_cleanup_loop():
|
| 97 |
while True:
|
| 98 |
try:
|
|
|
|
| 104 |
|
| 105 |
@asynccontextmanager
|
| 106 |
async def lifespan(app: FastAPI):
|
| 107 |
+
_enforce_production_hardening()
|
| 108 |
logger.info("Starting DeepShield backend")
|
| 109 |
init_db()
|
| 110 |
logger.info("Database initialized")
|
|
|
|
| 125 |
lifespan=lifespan,
|
| 126 |
)
|
| 127 |
|
| 128 |
+
# Phase 15.2 — slowapi rate limiter
|
| 129 |
+
app.state.limiter = limiter
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
| 133 |
+
app.add_middleware(RateLimitContextMiddleware)
|
| 134 |
+
# Phase 15.3 — reject oversized uploads before reading body
|
| 135 |
+
app.add_middleware(ContentLengthLimitMiddleware, max_bytes=settings.MAX_UPLOAD_SIZE_MB * 1024 * 1024)
|
| 136 |
+
|
| 137 |
+
# Phase 15.3 — explicit CORS methods/headers (no wildcards with credentials)
|
| 138 |
app.add_middleware(
|
| 139 |
CORSMiddleware,
|
| 140 |
allow_origins=settings.CORS_ORIGINS,
|
| 141 |
allow_credentials=True,
|
| 142 |
+
allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
|
| 143 |
+
allow_headers=["Authorization", "Content-Type", "Accept", "Origin", "X-Requested-With"],
|
| 144 |
)
|
| 145 |
|
| 146 |
app.include_router(api_router)
|
| 147 |
|
| 148 |
+
# Phase 19.2 — serve stored thumbnails / media under /media/*
|
| 149 |
+
import os as _os
|
| 150 |
+
_media_root = _os.environ.get("MEDIA_ROOT", "./media")
|
| 151 |
+
_os.makedirs(_os.path.join(_media_root, "thumbs"), exist_ok=True)
|
| 152 |
+
app.mount("/media", StaticFiles(directory=_media_root), name="media")
|
| 153 |
+
|
| 154 |
|
| 155 |
@app.get("/")
|
| 156 |
def root():
|
media/03/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43.webp
ADDED
|
models/icpr2020dfdc/blazeface/blazeface.pth → media/2f/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06.jpg
RENAMED
|
File without changes
|
media/50/502e5d7120817956b7ed208987ecad441ef95a527ae8f975340f46669330a27c.jpg
ADDED
|
models/icpr2020dfdc/blazeface/anchors.npy → media/63/635f21138244fc1dcbff5d0525b3c0a8187b1b9cc0ad90b5bb297a76e7b3850c.jpg
RENAMED
|
File without changes
|
media/6d/6de55b9fc5bdc37898418b7c25d29080f32053a1825e3a7dc2a2ff9df1292015.jpg
ADDED
|
media/7b/7b626d0ddff59ca602e2e1eb02e62e21093aa647ab53c200ca5203f7fc17f6dd.jpg
ADDED
|
Git LFS Details
|
media/bf/bf7ec0c425d20a2161b6a55356a869aad486cf7c6a196420b75be117bf8a47cb.webp
ADDED
|
media/c0/c064c839c9469d7b616db135f08e09235abd3d73f0889d978d1f92243226a028.jpg
ADDED
|
Git LFS Details
|
media/f0/f0eec5199108c2a4476f9b44aa5454ee0506949b5480b11a6578f2bbcb1f954f.jpg
ADDED
|
media/f1/f1c22499ba7787be66a12c32ab2991df97fc4d25c88560207367214e75d7463c.jpg
ADDED
|
media/thumbs/037d518e19e841c0976352df8d390a7ac9508a4b0d689efd0661ae2db3a92c43_400.jpg
ADDED
|
|
media/thumbs/2f7d41a5b57702a9a238409e6a1b973b4398f94c51fdf447e11782ed07693f06_400.jpg
ADDED
|
|