Spaces:
Sleeping
Sleeping
Commit
·
60efa5a
0
Parent(s):
Initial deployment without models
Browse files- .dockerignore +25 -0
- .env.example +1 -0
- .gitignore +16 -0
- Dockerfile +25 -0
- README.md +226 -0
- analysis/__init__.py +0 -0
- analysis/audio_analyser.py +165 -0
- analysis/llm_analyser.py +108 -0
- analysis/prompt.py +70 -0
- analysis/video_analyser.py +183 -0
- extraction/__init__.py +0 -0
- extraction/media_extractor.py +122 -0
- extraction/timeline_generator.py +38 -0
- main.py +438 -0
- models/.gitkeep +0 -0
- models/__init__.py +0 -0
- models/audio_model/.gitkeep +0 -0
- models/download_model.py +33 -0
- models/load_models.py +49 -0
- models/video_model/.gitkeep +0 -0
- pipeline.py +35 -0
- requirements.txt +35 -0
- uploads/.gitkeep +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
*.pyd
|
| 5 |
+
.Python
|
| 6 |
+
*.so
|
| 7 |
+
*.egg
|
| 8 |
+
*.egg-info/
|
| 9 |
+
dist/
|
| 10 |
+
build/
|
| 11 |
+
*.log
|
| 12 |
+
.git/
|
| 13 |
+
.gitignore
|
| 14 |
+
uploads/*
|
| 15 |
+
!uploads/.gitkeep
|
| 16 |
+
*.mp4
|
| 17 |
+
*.avi
|
| 18 |
+
*.mov
|
| 19 |
+
venv/
|
| 20 |
+
env/
|
| 21 |
+
.env
|
| 22 |
+
.vscode/
|
| 23 |
+
.idea/
|
| 24 |
+
*.pkl
|
| 25 |
+
*.pth
|
.env.example
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
GOOGLE_API_KEY=your_gemini_api_key
|
.gitignore
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@"
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
.env
|
| 5 |
+
myenv/
|
| 6 |
+
models/video_model/*
|
| 7 |
+
models/audio_model/*
|
| 8 |
+
!models/.gitkeep
|
| 9 |
+
!models/video_model/.gitkeep
|
| 10 |
+
!models/audio_model/.gitkeep
|
| 11 |
+
uploads/*
|
| 12 |
+
!uploads/.gitkeep
|
| 13 |
+
*.bin
|
| 14 |
+
*.pt
|
| 15 |
+
*.pth
|
| 16 |
+
"@ | Out-File .gitignore -Encoding utf8
|
Dockerfile
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y \
|
| 6 |
+
ffmpeg \
|
| 7 |
+
libsm6 \
|
| 8 |
+
libxext6 \
|
| 9 |
+
libxrender-dev \
|
| 10 |
+
libgomp1 \
|
| 11 |
+
git \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 16 |
+
|
| 17 |
+
COPY . .
|
| 18 |
+
|
| 19 |
+
RUN mkdir -p uploads models/video_model models/audio_model
|
| 20 |
+
|
| 21 |
+
RUN python download_model.py
|
| 22 |
+
|
| 23 |
+
EXPOSE 7860
|
| 24 |
+
|
| 25 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# DeepDefend
|
| 2 |
+
|
| 3 |
+
> **Multi-Modal Deepfake Detection System**
|
| 4 |
+
> Detect AI-generated deepfakes in videos using computer vision and audio analysis
|
| 5 |
+
|
| 6 |
+
[](https://www.python.org/downloads/)
|
| 7 |
+
[](https://fastapi.tiangolo.com)
|
| 8 |
+
|
| 9 |
+
## Overview
|
| 10 |
+
|
| 11 |
+
DeepDefend is a comprehensive deepfake detection system that combines **video frame analysis** and **audio analysis** to identify AI-generated synthetic media. Using machine learning models and AI-powered evidence fusion, it provides detailed, interval-by-interval analysis with explainable results.
|
| 12 |
+
|
| 13 |
+
### Why DeepDefend?
|
| 14 |
+
|
| 15 |
+
- **Multi-Modal Analysis**: Combines video and audio detection for higher accuracy
|
| 16 |
+
- **AI-Powered Fusion**: Uses LLM to generate human-readable reports
|
| 17 |
+
- **Interval Breakdown**: Shows exactly which parts of the video are suspicious
|
| 18 |
+
- **REST API**: Easy integration with any frontend or application
|
| 19 |
+
|
| 20 |
+
## Features
|
| 21 |
+
|
| 22 |
+
### Core Detection Capabilities
|
| 23 |
+
|
| 24 |
+
- **Video Analysis**
|
| 25 |
+
- Frame-by-frame deepfake detection using pre-trained models
|
| 26 |
+
- Face detection and region-specific analysis
|
| 27 |
+
- Suspicious region identification (eyes, mouth, face boundaries)
|
| 28 |
+
- Confidence scoring per frame
|
| 29 |
+
|
| 30 |
+
- **Audio Analysis**
|
| 31 |
+
- Voice synthesis detection
|
| 32 |
+
- Spectrogram analysis for audio artifacts
|
| 33 |
+
- Frequency pattern recognition
|
| 34 |
+
- Audio splicing detection
|
| 35 |
+
|
| 36 |
+
- **AI-Powered Reporting**
|
| 37 |
+
- LLM-based evidence fusion (Google Gemini)
|
| 38 |
+
- Natural language explanation of findings
|
| 39 |
+
- Verdict with confidence percentage
|
| 40 |
+
- Timestamped suspicious intervals
|
| 41 |
+
|
| 42 |
+
### Processing Pipeline
|
| 43 |
+
|
| 44 |
+
```
|
| 45 |
+
Video Input
|
| 46 |
+
↓
|
| 47 |
+
┌───────────────────┐
|
| 48 |
+
│ Media Extraction │ → Extract frames (5 per interval)
|
| 49 |
+
│ │ → Extract audio chunks
|
| 50 |
+
└────────┬──────────┘
|
| 51 |
+
│
|
| 52 |
+
├──────────────────────┬──────────────────────┐
|
| 53 |
+
▼ ▼ ▼
|
| 54 |
+
┌─────────────────┐ ┌─────────────────┐ ┌────────────────┐
|
| 55 |
+
│ Video Analysis │ │ Audio Analysis │ │ Timeline Gen │
|
| 56 |
+
│ • Face detect │ │ • Spectrogram │ │ • 2s intervals │
|
| 57 |
+
│ • Region scan │ │ • Voice synth │ │ • Metadata │
|
| 58 |
+
│ • Fake score │ │ • Artifacts │ │ │
|
| 59 |
+
└────────┬────────┘ └────────┬────────┘ └────────┬───────┘
|
| 60 |
+
│ │ │
|
| 61 |
+
└──────────────┬──────────────┬─────────────┘
|
| 62 |
+
▼ ▼
|
| 63 |
+
┌──────────────────────────┐
|
| 64 |
+
│ LLM Fusion Engine │
|
| 65 |
+
│ • Combine evidence │
|
| 66 |
+
│ • Generate verdict │
|
| 67 |
+
│ • Natural language report│
|
| 68 |
+
└────────────┬─────────────┘
|
| 69 |
+
▼
|
| 70 |
+
Final Report
|
| 71 |
+
(JSON Response)
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
## Demo
|
| 75 |
+
|
| 76 |
+
### Live Demo
|
| 77 |
+
**API**: [https://deepdefend-api.hf.space](https://deepdefend-api.hf.space)
|
| 78 |
+
**Docs**: [https://deepdefend-api.hf.space/docs](https://deepdefend-api.hf.space/docs)
|
| 79 |
+
|
| 80 |
+
### Example Analysis
|
| 81 |
+
|
| 82 |
+
<details>
|
| 83 |
+
<summary>Click to see sample output</summary>
|
| 84 |
+
|
| 85 |
+
```json
|
| 86 |
+
{
|
| 87 |
+
"verdict": "DEEPFAKE",
|
| 88 |
+
"confidence": 87.5,
|
| 89 |
+
"overall_scores": {
|
| 90 |
+
"overall_video_score": 0.823,
|
| 91 |
+
"overall_audio_score": 0.756,
|
| 92 |
+
"overall_combined_score": 0.789
|
| 93 |
+
},
|
| 94 |
+
"detailed_analysis": "This video shows strong indicators of deepfake manipulation...",
|
| 95 |
+
"suspicious_intervals": [
|
| 96 |
+
{
|
| 97 |
+
"interval": "4.0-6.0",
|
| 98 |
+
"video_score": 0.891,
|
| 99 |
+
"audio_score": 0.834,
|
| 100 |
+
"video_regions": ["eyes", "mouth"],
|
| 101 |
+
"audio_regions": ["voice_synthesis_artifacts"]
|
| 102 |
+
}
|
| 103 |
+
],
|
| 104 |
+
"total_intervals_analyzed": 15,
|
| 105 |
+
"video_info": {
|
| 106 |
+
"duration": 12.498711111111112,
|
| 107 |
+
"fps": 29.923085402583734,
|
| 108 |
+
"total_frames": 374,
|
| 109 |
+
"file_size_mb": 31.36
|
| 110 |
+
},
|
| 111 |
+
"analysis_id": "4cd98ea5-8c14-4cae-8da4-689345b0aabc",
|
| 112 |
+
"timestamp": "2025-10-10T23:34:35.724916"
|
| 113 |
+
}
|
| 114 |
+
```
|
| 115 |
+
</details>
|
| 116 |
+
|
| 117 |
+
## Installation
|
| 118 |
+
|
| 119 |
+
### Prerequisites
|
| 120 |
+
|
| 121 |
+
- Python 3.10 or higher
|
| 122 |
+
- FFmpeg installed on your system
|
| 123 |
+
- Google Gemini API key
|
| 124 |
+
|
| 125 |
+
### Local Setup
|
| 126 |
+
|
| 127 |
+
1. **Clone the repository**
|
| 128 |
+
```bash
|
| 129 |
+
git clone https://github.com/yourusername/deepdefend.git
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
2. **Create virtual environment**
|
| 133 |
+
```bash
|
| 134 |
+
python -m venv venv
|
| 135 |
+
|
| 136 |
+
# On Linux/Mac
|
| 137 |
+
source venv/bin/activate
|
| 138 |
+
|
| 139 |
+
# On Windows
|
| 140 |
+
venv\Scripts\activate
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
3. **Install dependencies**
|
| 144 |
+
```bash
|
| 145 |
+
pip install -r requirements.txt
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
4. **Download ML models**
|
| 149 |
+
```bash
|
| 150 |
+
python models/download_model.py
|
| 151 |
+
```
|
| 152 |
+
*This will download ~2GB of models from Hugging Face*
|
| 153 |
+
|
| 154 |
+
5. **Configure environment**
|
| 155 |
+
```bash
|
| 156 |
+
cp .env.example .env
|
| 157 |
+
# Edit .env and add your GOOGLE_API_KEY
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
6. **Run the server**
|
| 161 |
+
```bash
|
| 162 |
+
uvicorn main:app --reload
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
The API will be available at `http://127.0.0.1:8000`
|
| 166 |
+
|
| 167 |
+
### Docker Setup
|
| 168 |
+
|
| 169 |
+
```bash
|
| 170 |
+
# Build image
|
| 171 |
+
docker build -t deepdefend .
|
| 172 |
+
|
| 173 |
+
# Run container
|
| 174 |
+
docker run -p 8000:8000 -e GOOGLE_API_KEY=your_key deepdefend
|
| 175 |
+
```
|
| 176 |
+
|
| 177 |
+
## Tech Stack
|
| 178 |
+
|
| 179 |
+
### Backend
|
| 180 |
+
- **Framework**: FastAPI 0.109.0
|
| 181 |
+
- **Server**: Uvicorn
|
| 182 |
+
- **ML Framework**: PyTorch 2.3.1
|
| 183 |
+
- **Transformers**: Hugging Face Transformers 4.36.2
|
| 184 |
+
|
| 185 |
+
### ML Models
|
| 186 |
+
- **Video Detection**: [dima806/deepfake_vs_real_image_detection](https://huggingface.co/dima806/deepfake_vs_real_image_detection)
|
| 187 |
+
- **Audio Detection**: [mo-thecreator/Deepfake-audio-detection](https://huggingface.co/mo-thecreator/Deepfake-audio-detection)
|
| 188 |
+
- **LLM Fusion**: Google Gemini 2.5 Flash
|
| 189 |
+
|
| 190 |
+
### Processing
|
| 191 |
+
- **Computer Vision**: OpenCV, Pillow
|
| 192 |
+
- **Audio Processing**: Librosa, SoundFile
|
| 193 |
+
- **Video Processing**: FFmpeg
|
| 194 |
+
|
| 195 |
+
### Deployment
|
| 196 |
+
- **Container**: Docker
|
| 197 |
+
- **Platforms**: Hugging Face Spaces
|
| 198 |
+
|
| 199 |
+
## Project Structure
|
| 200 |
+
|
| 201 |
+
```
|
| 202 |
+
deepdefend/
|
| 203 |
+
│
|
| 204 |
+
│── extraction/
|
| 205 |
+
│ ├── media_extractor.py # Frame & audio extraction
|
| 206 |
+
│ └── timeline_generator.py # Timeline creation
|
| 207 |
+
│
|
| 208 |
+
│── analysis/
|
| 209 |
+
│ ├── video_analyser.py # Video deepfake detection
|
| 210 |
+
│ ├── audio_analyser.py # Audio deepfake detection
|
| 211 |
+
│ ├── llm_analyser.py # LLM-based fusion
|
| 212 |
+
│ └── prompt.py # LLM prompts
|
| 213 |
+
│
|
| 214 |
+
│── models/
|
| 215 |
+
│ ├── download_model.py # Model downloader
|
| 216 |
+
│ ├── load_models.py # Model loader
|
| 217 |
+
│ ├── video_model/ # (Downloaded)
|
| 218 |
+
│ └── audio_model/ # (Downloaded)
|
| 219 |
+
│
|
| 220 |
+
│── main.py # FastAPI application
|
| 221 |
+
│── pipeline.py # Main detection pipeline
|
| 222 |
+
│── requirements.txt # Python dependencies
|
| 223 |
+
│── Dockerfile # Container configuration
|
| 224 |
+
├── .gitignore
|
| 225 |
+
└── README.md
|
| 226 |
+
```
|
analysis/__init__.py
ADDED
|
File without changes
|
analysis/audio_analyser.py
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import librosa
|
| 3 |
+
import numpy as np
|
| 4 |
+
from typing import Dict, List
|
| 5 |
+
from models.load_models import model_loader
|
| 6 |
+
|
| 7 |
+
class AudioAnalyzer:
|
| 8 |
+
"""Analyzes audio chunks for deepfake detection"""
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.model, self.processor = model_loader.load_audio_model()
|
| 12 |
+
self.device = model_loader.get_device()
|
| 13 |
+
|
| 14 |
+
def predict_deepfake(self, audio: np.ndarray, sample_rate: int) -> Dict:
|
| 15 |
+
"""Predict if audio chunk is deepfake"""
|
| 16 |
+
|
| 17 |
+
min_length = sample_rate * 1
|
| 18 |
+
if len(audio) < min_length:
|
| 19 |
+
audio = np.pad(audio, (0, min_length - len(audio)))
|
| 20 |
+
|
| 21 |
+
inputs = self.processor(
|
| 22 |
+
audio,
|
| 23 |
+
sampling_rate=sample_rate,
|
| 24 |
+
return_tensors="pt",
|
| 25 |
+
padding=True
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
if self.device == "cuda":
|
| 29 |
+
inputs = {k: v.cuda() for k, v in inputs.items()}
|
| 30 |
+
|
| 31 |
+
with torch.no_grad():
|
| 32 |
+
outputs = self.model(**inputs)
|
| 33 |
+
logits = outputs.logits
|
| 34 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
| 35 |
+
|
| 36 |
+
fake_prob = probs[0][1].item() if probs.shape[1] > 1 else probs[0][0].item()
|
| 37 |
+
confidence = max(probs[0]).item()
|
| 38 |
+
|
| 39 |
+
return {
|
| 40 |
+
'fake_score': round(fake_prob, 3),
|
| 41 |
+
'confidence': round(confidence, 3),
|
| 42 |
+
'label': 'fake' if fake_prob > 0.5 else 'real'
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
def analyze_spectrogram(self, audio: np.ndarray, sample_rate: int, fake_score: float) -> Dict:
|
| 46 |
+
"""Analyze audio with adaptive thresholds based on fake_score"""
|
| 47 |
+
try:
|
| 48 |
+
spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)[0]
|
| 49 |
+
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sample_rate)[0]
|
| 50 |
+
zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)[0]
|
| 51 |
+
mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
|
| 52 |
+
|
| 53 |
+
suspicious_regions = self._identify_audio_anomalies(
|
| 54 |
+
spectral_centroid, spectral_rolloff, zero_crossing_rate, mfcc, fake_score
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
return {
|
| 58 |
+
'regions': suspicious_regions,
|
| 59 |
+
'spectral_features': {
|
| 60 |
+
'avg_spectral_centroid': round(float(np.mean(spectral_centroid)), 2),
|
| 61 |
+
'avg_spectral_rolloff': round(float(np.mean(spectral_rolloff)), 2),
|
| 62 |
+
'avg_zero_crossing_rate': round(float(np.mean(zero_crossing_rate)), 3),
|
| 63 |
+
'mfcc_variance': round(float(np.var(mfcc)), 3)
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
except Exception as e:
|
| 68 |
+
if fake_score > 0.6:
|
| 69 |
+
return {
|
| 70 |
+
'regions': ['voice_synthesis_detected', 'audio_artifacts'],
|
| 71 |
+
'spectral_features': {}
|
| 72 |
+
}
|
| 73 |
+
else:
|
| 74 |
+
return {
|
| 75 |
+
'regions': ['no_suspicious_patterns'],
|
| 76 |
+
'spectral_features': {}
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
def _identify_audio_anomalies(self, spectral_centroid: np.ndarray, spectral_rolloff: np.ndarray, zero_crossing: np.ndarray, mfcc: np.ndarray, fake_score: float) -> List[str]:
|
| 80 |
+
suspicious_regions = []
|
| 81 |
+
|
| 82 |
+
if fake_score > 0.7:
|
| 83 |
+
pitch_low, pitch_high = 200, 6000
|
| 84 |
+
mfcc_threshold = 25
|
| 85 |
+
zcr_low, zcr_high = 0.02, 0.25
|
| 86 |
+
rolloff_threshold = 3000
|
| 87 |
+
centroid_jump = 800
|
| 88 |
+
elif fake_score > 0.5:
|
| 89 |
+
pitch_low, pitch_high = 250, 5500
|
| 90 |
+
mfcc_threshold = 28
|
| 91 |
+
zcr_low, zcr_high = 0.025, 0.22
|
| 92 |
+
rolloff_threshold = 2700
|
| 93 |
+
centroid_jump = 900
|
| 94 |
+
else:
|
| 95 |
+
pitch_low, pitch_high = 300, 5000
|
| 96 |
+
mfcc_threshold = 30
|
| 97 |
+
zcr_low, zcr_high = 0.03, 0.20
|
| 98 |
+
rolloff_threshold = 2500
|
| 99 |
+
centroid_jump = 1000
|
| 100 |
+
|
| 101 |
+
pitch_variance = np.var(spectral_centroid)
|
| 102 |
+
if pitch_variance < pitch_low:
|
| 103 |
+
suspicious_regions.append('monotone_voice')
|
| 104 |
+
elif pitch_variance > pitch_high:
|
| 105 |
+
suspicious_regions.append('erratic_pitch')
|
| 106 |
+
|
| 107 |
+
mfcc_var = np.var(mfcc)
|
| 108 |
+
if mfcc_var < mfcc_threshold:
|
| 109 |
+
suspicious_regions.append('voice_synthesis_artifacts')
|
| 110 |
+
|
| 111 |
+
zcr_mean = np.mean(zero_crossing)
|
| 112 |
+
if zcr_mean > zcr_high:
|
| 113 |
+
suspicious_regions.append('high_frequency_noise')
|
| 114 |
+
elif zcr_mean < zcr_low:
|
| 115 |
+
suspicious_regions.append('overly_smooth_audio')
|
| 116 |
+
|
| 117 |
+
rolloff_std = np.std(spectral_rolloff)
|
| 118 |
+
if rolloff_std > rolloff_threshold:
|
| 119 |
+
suspicious_regions.append('spectral_artifacts')
|
| 120 |
+
|
| 121 |
+
centroid_diff = np.diff(spectral_centroid)
|
| 122 |
+
if len(centroid_diff) > 0 and np.max(np.abs(centroid_diff)) > centroid_jump:
|
| 123 |
+
suspicious_regions.append('audio_splicing')
|
| 124 |
+
|
| 125 |
+
if np.std(spectral_centroid) < 50:
|
| 126 |
+
suspicious_regions.append('unnatural_consistency')
|
| 127 |
+
|
| 128 |
+
if fake_score > 0.6 and len(suspicious_regions) == 0:
|
| 129 |
+
suspicious_regions.append('general_audio_manipulation')
|
| 130 |
+
|
| 131 |
+
return suspicious_regions if suspicious_regions else ['no_suspicious_patterns']
|
| 132 |
+
|
| 133 |
+
def analyze_interval(self, interval_data: Dict) -> Dict:
|
| 134 |
+
"""Analyze audio for a single interval"""
|
| 135 |
+
audio_data = interval_data['audio_data']
|
| 136 |
+
|
| 137 |
+
if not audio_data or not audio_data.get('has_audio', False):
|
| 138 |
+
return {
|
| 139 |
+
'interval_id': interval_data['interval_id'],
|
| 140 |
+
'interval': interval_data['interval'],
|
| 141 |
+
'fake_score': 0.0,
|
| 142 |
+
'confidence': 0.0,
|
| 143 |
+
'suspicious_regions': ['no_audio'],
|
| 144 |
+
'has_audio': False,
|
| 145 |
+
'spectral_features': {}
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
audio = audio_data['audio']
|
| 149 |
+
sample_rate = audio_data['sample_rate']
|
| 150 |
+
|
| 151 |
+
prediction = self.predict_deepfake(audio, sample_rate)
|
| 152 |
+
|
| 153 |
+
spectrogram_analysis = self.analyze_spectrogram(
|
| 154 |
+
audio, sample_rate, prediction['fake_score']
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
return {
|
| 158 |
+
'interval_id': interval_data['interval_id'],
|
| 159 |
+
'interval': interval_data['interval'],
|
| 160 |
+
'fake_score': prediction['fake_score'],
|
| 161 |
+
'confidence': prediction['confidence'],
|
| 162 |
+
'suspicious_regions': spectrogram_analysis['regions'],
|
| 163 |
+
'has_audio': True,
|
| 164 |
+
'spectral_features': spectrogram_analysis['spectral_features']
|
| 165 |
+
}
|
analysis/llm_analyser.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List, Dict
|
| 2 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 3 |
+
from analysis.prompt import _create_analysis_prompt
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
import re
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
class LLMFusion:
|
| 9 |
+
"""Fuses video and audio analysis results using LLM to generate human-readable report"""
|
| 10 |
+
|
| 11 |
+
def __init__(self):
|
| 12 |
+
self.llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)
|
| 13 |
+
|
| 14 |
+
def prepare_analysis_json(self, timeline: List[Dict]) -> Dict:
|
| 15 |
+
analysis_data = {
|
| 16 |
+
'total_intervals': len(timeline),
|
| 17 |
+
'intervals': []
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
for interval in timeline:
|
| 21 |
+
interval_summary = {
|
| 22 |
+
'interval_id': interval['interval_id'],
|
| 23 |
+
'time_range': interval['interval'],
|
| 24 |
+
'video_analysis': interval.get('video_results', {}),
|
| 25 |
+
'audio_analysis': interval.get('audio_results', {})
|
| 26 |
+
}
|
| 27 |
+
analysis_data['intervals'].append(interval_summary)
|
| 28 |
+
|
| 29 |
+
return analysis_data
|
| 30 |
+
|
| 31 |
+
def calculate_overall_scores(self, timeline: List[Dict]) -> Dict:
|
| 32 |
+
"""Calculate overall video and audio fake scores"""
|
| 33 |
+
video_scores = []
|
| 34 |
+
audio_scores = []
|
| 35 |
+
|
| 36 |
+
for interval in timeline:
|
| 37 |
+
if interval.get('video_results') and 'fake_score' in interval['video_results']:
|
| 38 |
+
video_scores.append(interval['video_results']['fake_score'])
|
| 39 |
+
|
| 40 |
+
if interval.get('audio_results') and 'fake_score' in interval['audio_results']:
|
| 41 |
+
audio_scores.append(interval['audio_results']['fake_score'])
|
| 42 |
+
|
| 43 |
+
overall_video = round(sum(video_scores) / len(video_scores), 3) if len(video_scores) > 0 else 0.0
|
| 44 |
+
overall_audio = round(sum(audio_scores) / len(audio_scores), 3) if len(audio_scores) > 0 else 0.0
|
| 45 |
+
|
| 46 |
+
if overall_video > 0 and overall_audio > 0:
|
| 47 |
+
overall_combined = round((overall_video + overall_audio) / 2, 3)
|
| 48 |
+
elif overall_video > 0:
|
| 49 |
+
overall_combined = overall_video
|
| 50 |
+
elif overall_audio > 0:
|
| 51 |
+
overall_combined = overall_audio
|
| 52 |
+
else:
|
| 53 |
+
overall_combined = 0.0
|
| 54 |
+
|
| 55 |
+
return {
|
| 56 |
+
'overall_video_score': overall_video,
|
| 57 |
+
'overall_audio_score': overall_audio,
|
| 58 |
+
'overall_combined_score': overall_combined
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
def generate_report(self, timeline: List[Dict], video_info: Dict) -> Dict:
|
| 62 |
+
analysis_json = self.prepare_analysis_json(timeline)
|
| 63 |
+
overall_scores = self.calculate_overall_scores(timeline)
|
| 64 |
+
|
| 65 |
+
prompt = _create_analysis_prompt(analysis_json, overall_scores, video_info)
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
response = self.llm.invoke(prompt)
|
| 69 |
+
llm_response = response.content
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"LLM failed: {e}")
|
| 72 |
+
llm_response = "Analysis failed."
|
| 73 |
+
|
| 74 |
+
report = self._structure_report(llm_response, overall_scores, analysis_json)
|
| 75 |
+
return report
|
| 76 |
+
|
| 77 |
+
def _structure_report(self, llm_response: str, overall_scores: Dict, analysis_json: Dict) -> Dict:
|
| 78 |
+
"""Extract structured information from LLM response"""
|
| 79 |
+
|
| 80 |
+
verdict = "DEEPFAKE" if overall_scores['overall_combined_score'] > 0.5 else "REAL"
|
| 81 |
+
|
| 82 |
+
confidence = 75.0
|
| 83 |
+
conf_match = re.search(r'(\d+)\s*%', llm_response)
|
| 84 |
+
if conf_match:
|
| 85 |
+
confidence = float(conf_match.group(1))
|
| 86 |
+
|
| 87 |
+
suspicious_intervals = []
|
| 88 |
+
for interval_data in analysis_json['intervals']:
|
| 89 |
+
video_score = interval_data.get('video_analysis', {}).get('fake_score', 0)
|
| 90 |
+
audio_score = interval_data.get('audio_analysis', {}).get('fake_score', 0)
|
| 91 |
+
|
| 92 |
+
if video_score > 0.6 or audio_score > 0.6:
|
| 93 |
+
suspicious_intervals.append({
|
| 94 |
+
'interval': interval_data['time_range'],
|
| 95 |
+
'video_score': video_score,
|
| 96 |
+
'audio_score': audio_score,
|
| 97 |
+
'video_regions': interval_data.get('video_analysis', {}).get('suspicious_regions', []),
|
| 98 |
+
'audio_regions': interval_data.get('audio_analysis', {}).get('suspicious_regions', [])
|
| 99 |
+
})
|
| 100 |
+
|
| 101 |
+
return {
|
| 102 |
+
'verdict': verdict,
|
| 103 |
+
'confidence': confidence,
|
| 104 |
+
'overall_scores': overall_scores,
|
| 105 |
+
'detailed_analysis': llm_response,
|
| 106 |
+
'suspicious_intervals': suspicious_intervals,
|
| 107 |
+
'total_intervals_analyzed': analysis_json['total_intervals']
|
| 108 |
+
}
|
analysis/prompt.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
|
| 4 |
+
def _create_analysis_prompt(analysis_json: Dict, overall_scores: Dict, video_info: Dict) -> str:
|
| 5 |
+
"""Create prompt for LLM with proper score interpretation"""
|
| 6 |
+
|
| 7 |
+
prompt = f"""You are a deepfake detection expert. Analyze the following video analysis results and generate a human-readable report.
|
| 8 |
+
|
| 9 |
+
VIDEO INFORMATION:
|
| 10 |
+
- Duration: {video_info['duration']:.2f} seconds
|
| 11 |
+
- Total Intervals Analyzed: {analysis_json['total_intervals']}
|
| 12 |
+
|
| 13 |
+
OVERALL SCORES (CRITICAL - READ CAREFULLY):
|
| 14 |
+
- Video Deepfake Score: {overall_scores['overall_video_score']}
|
| 15 |
+
- Audio Deepfake Score: {overall_scores['overall_audio_score']}
|
| 16 |
+
- Averaged Combined Score: {overall_scores['overall_combined_score']}
|
| 17 |
+
|
| 18 |
+
SCORE INTERPRETATION GUIDE:
|
| 19 |
+
- Scores range from 0.0 to 1.0
|
| 20 |
+
- 0.0 - 0.3: LIKELY REAL (low probability of manipulation)
|
| 21 |
+
- 0.3 - 0.5: POSSIBLY REAL (some minor artifacts, but probably authentic)
|
| 22 |
+
- 0.5 - 0.7: POSSIBLY FAKE (suspicious patterns detected)
|
| 23 |
+
- 0.7 - 1.0: LIKELY FAKE (high probability of deepfake)
|
| 24 |
+
|
| 25 |
+
IMPORTANT: The numerical scores are the PRIMARY evidence. Suspicious regions are secondary indicators that provide detail about WHERE issues were detected, but should NOT override low scores.
|
| 26 |
+
|
| 27 |
+
INTERVAL-BY-INTERVAL ANALYSIS:
|
| 28 |
+
{json.dumps(analysis_json['intervals'], indent=2)}
|
| 29 |
+
|
| 30 |
+
ANALYSIS RULES:
|
| 31 |
+
1. If average score < 0.5, you should lean towards "REAL" verdict unless there is overwhelming contradictory evidence
|
| 32 |
+
2. If average score > 0.5, you should lean towards "DEEPFAKE" verdict
|
| 33 |
+
3. Suspicious regions (like "monotone_voice" or "eyes") only matter if the scores also indicate manipulation
|
| 34 |
+
4. A low score with suspicious regions = detection system being cautious, likely still REAL
|
| 35 |
+
5. Base your confidence on how far the scores are from 0.5 threshold
|
| 36 |
+
|
| 37 |
+
TASK:
|
| 38 |
+
Based on the analysis above, provide:
|
| 39 |
+
|
| 40 |
+
1. **VERDICT**: State clearly if this is "REAL" or "DEEPFAKE"
|
| 41 |
+
- Must align with the overall scores
|
| 42 |
+
- If avg score < 0.5, verdict should typically be REAL
|
| 43 |
+
- If avg score > 0.5, verdict should typically be DEEPFAKE
|
| 44 |
+
|
| 45 |
+
2. **CONFIDENCE**: Your confidence level (0-100%)
|
| 46 |
+
- Base this on how definitive the scores are
|
| 47 |
+
- Score near 0.0 or 1.0 = high confidence
|
| 48 |
+
- Score near 0.5 = low confidence
|
| 49 |
+
|
| 50 |
+
3. **KEY FINDINGS**: Summarize the most important patterns found
|
| 51 |
+
- Focus on intervals with scores > 0.6 (those are actually suspicious)
|
| 52 |
+
- Mention if scores are consistently low (indicates authentic content)
|
| 53 |
+
|
| 54 |
+
4. **SUSPICIOUS INTERVALS**: Only list intervals where fake_score > 0.6
|
| 55 |
+
- If no intervals exceed 0.6, state "No highly suspicious intervals detected"
|
| 56 |
+
|
| 57 |
+
5. **EVIDENCE SUMMARY**:
|
| 58 |
+
- Video evidence: Mention specific facial regions only if video score > 0.5
|
| 59 |
+
- Audio evidence: Mention audio patterns only if audio score > 0.5
|
| 60 |
+
- If scores are low, acknowledge the content appears authentic
|
| 61 |
+
|
| 62 |
+
6. **EXPLANATION**: In 2-3 sentences, explain your verdict
|
| 63 |
+
- Reference the numerical scores explicitly
|
| 64 |
+
- Explain in simple terms what the scores mean for this video
|
| 65 |
+
|
| 66 |
+
CRITICAL REMINDER: Your verdict MUST be consistent with the numerical scores. Do not declare something a deepfake if the scores indicate it's real (< 0.5).
|
| 67 |
+
|
| 68 |
+
Format your response as a clear, structured analysis that a non-technical person could understand."""
|
| 69 |
+
|
| 70 |
+
return prompt
|
analysis/video_analyser.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import cv2
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from typing import List, Dict
|
| 6 |
+
from collections import Counter
|
| 7 |
+
from models.load_models import model_loader
|
| 8 |
+
|
| 9 |
+
class VideoAnalyzer:
|
| 10 |
+
"""Simple, reliable video analyzer for hackathon demo"""
|
| 11 |
+
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.model, self.processor = model_loader.load_video_model()
|
| 14 |
+
self.device = model_loader.get_device()
|
| 15 |
+
|
| 16 |
+
self.face_cascade = cv2.CascadeClassifier(
|
| 17 |
+
cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
def detect_face(self, frame: np.ndarray) -> Dict:
|
| 21 |
+
"""Detect face in frame"""
|
| 22 |
+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 23 |
+
faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)
|
| 24 |
+
|
| 25 |
+
if len(faces) > 0:
|
| 26 |
+
x, y, w, h = max(faces, key=lambda f: f[2] * f[3])
|
| 27 |
+
face_crop = frame[y:y+h, x:x+w]
|
| 28 |
+
|
| 29 |
+
return {
|
| 30 |
+
'detected': True,
|
| 31 |
+
'bbox': {'x': int(x), 'y': int(y), 'w': int(w), 'h': int(h)},
|
| 32 |
+
'face_crop': face_crop
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
return {'detected': False, 'bbox': None, 'face_crop': None}
|
| 36 |
+
|
| 37 |
+
def predict_deepfake(self, frame: np.ndarray) -> Dict:
|
| 38 |
+
"""Predict if frame is deepfake"""
|
| 39 |
+
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 40 |
+
pil_img = Image.fromarray(frame_rgb)
|
| 41 |
+
|
| 42 |
+
inputs = self.processor(images=pil_img, return_tensors="pt")
|
| 43 |
+
|
| 44 |
+
if self.device == "cuda":
|
| 45 |
+
inputs = {k: v.cuda() for k, v in inputs.items()}
|
| 46 |
+
|
| 47 |
+
with torch.no_grad():
|
| 48 |
+
outputs = self.model(**inputs)
|
| 49 |
+
logits = outputs.logits
|
| 50 |
+
probs = torch.nn.functional.softmax(logits, dim=-1)
|
| 51 |
+
|
| 52 |
+
fake_prob = probs[0][1].item() if probs.shape[1] > 1 else probs[0][0].item()
|
| 53 |
+
confidence = max(probs[0]).item()
|
| 54 |
+
|
| 55 |
+
return {
|
| 56 |
+
'fake_score': round(fake_prob, 3),
|
| 57 |
+
'confidence': round(confidence, 3),
|
| 58 |
+
'label': 'fake' if fake_prob > 0.5 else 'real'
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
def detect_suspicious_regions(self, face: np.ndarray, fake_score: float) -> List[str]:
|
| 62 |
+
try:
|
| 63 |
+
gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
|
| 64 |
+
h, w = gray.shape
|
| 65 |
+
|
| 66 |
+
suspicious_regions = []
|
| 67 |
+
|
| 68 |
+
regions = {
|
| 69 |
+
'eyes': (int(h*0.25), int(h*0.45), int(w*0.15), int(w*0.85)),
|
| 70 |
+
'nose': (int(h*0.40), int(h*0.65), int(w*0.35), int(w*0.65)),
|
| 71 |
+
'mouth': (int(h*0.60), int(h*0.80), int(w*0.30), int(w*0.70)),
|
| 72 |
+
'forehead': (int(h*0.08), int(h*0.28), int(w*0.25), int(w*0.75)),
|
| 73 |
+
'cheeks': (int(h*0.45), int(h*0.70), int(w*0.15), int(w*0.85)),
|
| 74 |
+
'chin': (int(h*0.75), int(h*0.95), int(w*0.30), int(w*0.70))
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
for region_name, (y1, y2, x1, x2) in regions.items():
|
| 78 |
+
region = gray[y1:y2, x1:x2]
|
| 79 |
+
|
| 80 |
+
if region.size == 0:
|
| 81 |
+
continue
|
| 82 |
+
|
| 83 |
+
suspicious = False
|
| 84 |
+
|
| 85 |
+
variance = np.var(region)
|
| 86 |
+
if variance < 200 or variance > 8000:
|
| 87 |
+
suspicious = True
|
| 88 |
+
|
| 89 |
+
edges = cv2.Canny(region, 50, 150)
|
| 90 |
+
edge_density = np.sum(edges > 0) / edges.size
|
| 91 |
+
if edge_density < 0.05:
|
| 92 |
+
suspicious = True
|
| 93 |
+
|
| 94 |
+
if fake_score > 0.7 and variance < 400:
|
| 95 |
+
suspicious = True
|
| 96 |
+
|
| 97 |
+
if suspicious:
|
| 98 |
+
suspicious_regions.append(region_name)
|
| 99 |
+
|
| 100 |
+
left_half = gray[:, :w//2]
|
| 101 |
+
right_half = np.fliplr(gray[:, w//2:])
|
| 102 |
+
|
| 103 |
+
min_width = min(left_half.shape[1], right_half.shape[1])
|
| 104 |
+
left_half = left_half[:, :min_width]
|
| 105 |
+
right_half = right_half[:, :min_width]
|
| 106 |
+
|
| 107 |
+
symmetry_diff = np.mean(np.abs(left_half.astype(float) - right_half.astype(float)))
|
| 108 |
+
|
| 109 |
+
if symmetry_diff < 10:
|
| 110 |
+
suspicious_regions.append('unnatural_symmetry')
|
| 111 |
+
|
| 112 |
+
return suspicious_regions if suspicious_regions else ['none']
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
print(f"Region detection error: {e}")
|
| 116 |
+
return ['analysis_error']
|
| 117 |
+
|
| 118 |
+
def analyze_interval(self, interval_data: Dict) -> Dict:
|
| 119 |
+
"""Analyze all frames in an interval"""
|
| 120 |
+
frames_data = interval_data['video_data']
|
| 121 |
+
|
| 122 |
+
if not frames_data:
|
| 123 |
+
return {
|
| 124 |
+
'interval_id': interval_data['interval_id'],
|
| 125 |
+
'interval': interval_data['interval'],
|
| 126 |
+
'fake_score': 0.0,
|
| 127 |
+
'confidence': 0.0,
|
| 128 |
+
'suspicious_regions': [],
|
| 129 |
+
'face_detected': False,
|
| 130 |
+
'frame_results': []
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
frame_results = []
|
| 134 |
+
total_fake_score = 0
|
| 135 |
+
faces_detected = 0
|
| 136 |
+
all_regions = []
|
| 137 |
+
|
| 138 |
+
for frame_data in frames_data:
|
| 139 |
+
frame = frame_data['frame']
|
| 140 |
+
timestamp = frame_data['timestamp']
|
| 141 |
+
|
| 142 |
+
face_info = self.detect_face(frame)
|
| 143 |
+
|
| 144 |
+
if face_info['detected']:
|
| 145 |
+
faces_detected += 1
|
| 146 |
+
pred = self.predict_deepfake(face_info['face_crop'])
|
| 147 |
+
regions = self.detect_suspicious_regions(face_info['face_crop'], pred['fake_score'])
|
| 148 |
+
else:
|
| 149 |
+
pred = self.predict_deepfake(frame)
|
| 150 |
+
regions = ['no_face_detected']
|
| 151 |
+
|
| 152 |
+
total_fake_score += pred['fake_score']
|
| 153 |
+
all_regions.extend(regions)
|
| 154 |
+
|
| 155 |
+
frame_results.append({
|
| 156 |
+
'timestamp': timestamp,
|
| 157 |
+
'fake_score': pred['fake_score'],
|
| 158 |
+
'confidence': pred['confidence'],
|
| 159 |
+
'face_detected': face_info['detected'],
|
| 160 |
+
'regions': regions
|
| 161 |
+
})
|
| 162 |
+
|
| 163 |
+
avg_fake_score = total_fake_score / len(frames_data)
|
| 164 |
+
|
| 165 |
+
region_counts = Counter(all_regions)
|
| 166 |
+
threshold = len(frames_data) * 0.5
|
| 167 |
+
|
| 168 |
+
consistent_regions = [
|
| 169 |
+
region for region, count in region_counts.items()
|
| 170 |
+
if count >= threshold and region not in ['none', 'no_face_detected', 'analysis_error']
|
| 171 |
+
]
|
| 172 |
+
|
| 173 |
+
return {
|
| 174 |
+
'interval_id': interval_data['interval_id'],
|
| 175 |
+
'interval': interval_data['interval'],
|
| 176 |
+
'fake_score': round(avg_fake_score, 3),
|
| 177 |
+
'confidence': round(np.mean([f['confidence'] for f in frame_results]), 3),
|
| 178 |
+
'suspicious_regions': consistent_regions if consistent_regions else list(set(all_regions)),
|
| 179 |
+
'face_detected': faces_detected > 0,
|
| 180 |
+
'frame_count': len(frames_data),
|
| 181 |
+
'frames_with_faces': faces_detected,
|
| 182 |
+
'frame_results': frame_results
|
| 183 |
+
}
|
extraction/__init__.py
ADDED
|
File without changes
|
extraction/media_extractor.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import cv2
|
| 3 |
+
import librosa
|
| 4 |
+
import subprocess
|
| 5 |
+
import numpy as np
|
| 6 |
+
from typing import List, Dict, Tuple
|
| 7 |
+
from extraction.timeline_generator import TimelineGenerator
|
| 8 |
+
|
| 9 |
+
class MediaExtractor:
|
| 10 |
+
|
| 11 |
+
def __init__(self, frames_per_interval: int = 5):
|
| 12 |
+
self.frames_per_interval = frames_per_interval
|
| 13 |
+
|
| 14 |
+
def get_video_info(self, video_path: str) -> Dict:
|
| 15 |
+
|
| 16 |
+
cap = cv2.VideoCapture(video_path)
|
| 17 |
+
|
| 18 |
+
if not cap.isOpened():
|
| 19 |
+
raise ValueError(f"Cannot open video: {video_path}")
|
| 20 |
+
|
| 21 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 22 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 23 |
+
duration = total_frames / fps if fps > 0 else 0
|
| 24 |
+
|
| 25 |
+
cap.release()
|
| 26 |
+
|
| 27 |
+
return {
|
| 28 |
+
'fps': fps,
|
| 29 |
+
'total_frames': total_frames,
|
| 30 |
+
'duration': duration
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
def extract_frames(self, video_path: str, timeline: List[Dict]) -> List[Dict]:
|
| 34 |
+
|
| 35 |
+
cap = cv2.VideoCapture(video_path)
|
| 36 |
+
|
| 37 |
+
if not cap.isOpened():
|
| 38 |
+
raise ValueError(f"Cannot open video: {video_path}")
|
| 39 |
+
|
| 40 |
+
for interval in timeline:
|
| 41 |
+
|
| 42 |
+
sample_times = np.linspace(
|
| 43 |
+
interval['start'],
|
| 44 |
+
interval['end'],
|
| 45 |
+
self.frames_per_interval,
|
| 46 |
+
endpoint=False
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
for sample_time in sample_times:
|
| 50 |
+
cap.set(cv2.CAP_PROP_POS_MSEC, sample_time * 1000)
|
| 51 |
+
ret, frame = cap.read()
|
| 52 |
+
|
| 53 |
+
if ret:
|
| 54 |
+
interval['video_data'].append({
|
| 55 |
+
'frame': frame,
|
| 56 |
+
'timestamp': round(sample_time, 2)
|
| 57 |
+
})
|
| 58 |
+
|
| 59 |
+
cap.release()
|
| 60 |
+
|
| 61 |
+
return timeline
|
| 62 |
+
|
| 63 |
+
def extract_audio(self, video_path: str, timeline: List[Dict]) -> List[Dict]:
|
| 64 |
+
|
| 65 |
+
temp_audio = "temp_audio.wav"
|
| 66 |
+
command = [
|
| 67 |
+
'ffmpeg', '-i', video_path,
|
| 68 |
+
'-vn', '-acodec', 'pcm_s16le',
|
| 69 |
+
'-ar', '16000', '-ac', '1',
|
| 70 |
+
'-y', temp_audio
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
|
| 75 |
+
has_audio = os.path.exists(temp_audio) and os.path.getsize(temp_audio) > 0
|
| 76 |
+
except subprocess.CalledProcessError:
|
| 77 |
+
has_audio = False
|
| 78 |
+
|
| 79 |
+
if not has_audio:
|
| 80 |
+
print("Warning: No audio track detected in video")
|
| 81 |
+
for interval in timeline:
|
| 82 |
+
interval['audio_data'] = {
|
| 83 |
+
'audio': np.zeros(16000 * 2),
|
| 84 |
+
'sample_rate': 16000,
|
| 85 |
+
'has_audio': False
|
| 86 |
+
}
|
| 87 |
+
return timeline
|
| 88 |
+
|
| 89 |
+
audio, sr = librosa.load(temp_audio, sr=16000, mono=True)
|
| 90 |
+
|
| 91 |
+
for interval in timeline:
|
| 92 |
+
start_sample = int(interval['start'] * sr)
|
| 93 |
+
end_sample = int(interval['end'] * sr)
|
| 94 |
+
end_sample = min(end_sample, len(audio))
|
| 95 |
+
audio_chunk = audio[start_sample:end_sample]
|
| 96 |
+
|
| 97 |
+
if len(audio_chunk) < sr * 0.5:
|
| 98 |
+
audio_chunk = np.pad(audio_chunk, (0, int(sr * 0.5) - len(audio_chunk)))
|
| 99 |
+
|
| 100 |
+
interval['audio_data'] = {
|
| 101 |
+
'audio': audio_chunk,
|
| 102 |
+
'sample_rate': sr,
|
| 103 |
+
'has_audio': True
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
if os.path.exists(temp_audio):
|
| 107 |
+
os.remove(temp_audio)
|
| 108 |
+
|
| 109 |
+
return timeline
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def extract_all(self, video_path: str, interval_duration: float = 2.0) -> Tuple[List[Dict], Dict]:
|
| 113 |
+
|
| 114 |
+
video_info = self.get_video_info(video_path)
|
| 115 |
+
|
| 116 |
+
timeline_gen = TimelineGenerator(interval_duration)
|
| 117 |
+
timeline = timeline_gen.create_timeline(video_info['duration'])
|
| 118 |
+
|
| 119 |
+
timeline = self.extract_frames(video_path, timeline)
|
| 120 |
+
timeline = self.extract_audio(video_path, timeline)
|
| 121 |
+
|
| 122 |
+
return timeline, video_info
|
extraction/timeline_generator.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from typing import List, Dict
|
| 3 |
+
|
| 4 |
+
class TimelineGenerator:
|
| 5 |
+
|
| 6 |
+
def __init__(self, interval_duration: float = 2.0):
|
| 7 |
+
self.interval_duration = interval_duration
|
| 8 |
+
|
| 9 |
+
def create_timeline(self, video_duration: float) -> List[Dict]:
|
| 10 |
+
|
| 11 |
+
num_intervals = int(np.ceil(video_duration / self.interval_duration))
|
| 12 |
+
|
| 13 |
+
timeline = []
|
| 14 |
+
for i in range(num_intervals):
|
| 15 |
+
start_time = i * self.interval_duration
|
| 16 |
+
end_time = min((i + 1) * self.interval_duration, video_duration)
|
| 17 |
+
|
| 18 |
+
timeline.append({
|
| 19 |
+
'interval_id': i,
|
| 20 |
+
'start': round(start_time, 2),
|
| 21 |
+
'end': round(end_time, 2),
|
| 22 |
+
'interval': f"{start_time:.1f}-{end_time:.1f}",
|
| 23 |
+
'duration': round(end_time - start_time, 2),
|
| 24 |
+
'video_data': [],
|
| 25 |
+
'audio_data': None,
|
| 26 |
+
'video_results': None,
|
| 27 |
+
'audio_results': None
|
| 28 |
+
})
|
| 29 |
+
|
| 30 |
+
return timeline
|
| 31 |
+
|
| 32 |
+
def get_interval_for_timestamp(self, timeline: List[Dict], timestamp: float) -> Dict:
|
| 33 |
+
|
| 34 |
+
for interval in timeline:
|
| 35 |
+
if interval['start'] <= timestamp < interval['end']:
|
| 36 |
+
return interval
|
| 37 |
+
|
| 38 |
+
return timeline[-1]
|
main.py
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException, Query
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.responses import JSONResponse
|
| 4 |
+
from pydantic import BaseModel
|
| 5 |
+
from typing import List
|
| 6 |
+
from contextlib import asynccontextmanager
|
| 7 |
+
import os
|
| 8 |
+
import uuid
|
| 9 |
+
import shutil
|
| 10 |
+
import json
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
from pipeline import DeepfakeDetectionPipeline
|
| 14 |
+
|
| 15 |
+
analysis_history = []
|
| 16 |
+
MAX_HISTORY = 10
|
| 17 |
+
|
| 18 |
+
@asynccontextmanager
|
| 19 |
+
async def lifespan(app: FastAPI):
|
| 20 |
+
yield
|
| 21 |
+
|
| 22 |
+
app = FastAPI(
|
| 23 |
+
title="DeepDefend API",
|
| 24 |
+
description="Advanced Deepfake Detection System with Multi-Modal Analysis",
|
| 25 |
+
version="1.0.0",
|
| 26 |
+
lifespan=lifespan
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
app.add_middleware(
|
| 30 |
+
CORSMiddleware,
|
| 31 |
+
allow_origins=["*"],
|
| 32 |
+
allow_credentials=True,
|
| 33 |
+
allow_methods=["*"],
|
| 34 |
+
allow_headers=["*"],
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
UPLOAD_DIR = Path("uploads")
|
| 38 |
+
UPLOAD_DIR.mkdir(exist_ok=True)
|
| 39 |
+
|
| 40 |
+
pipeline = None
|
| 41 |
+
|
| 42 |
+
def get_pipeline():
|
| 43 |
+
global pipeline
|
| 44 |
+
if pipeline is None:
|
| 45 |
+
print("Loading DeepDefend Pipeline...")
|
| 46 |
+
pipeline = DeepfakeDetectionPipeline()
|
| 47 |
+
return pipeline
|
| 48 |
+
|
| 49 |
+
class AnalysisResult(BaseModel):
|
| 50 |
+
verdict: str
|
| 51 |
+
confidence: float
|
| 52 |
+
overall_scores: dict
|
| 53 |
+
detailed_analysis: str
|
| 54 |
+
suspicious_intervals: list
|
| 55 |
+
total_intervals_analyzed: int
|
| 56 |
+
video_info: dict
|
| 57 |
+
analysis_id: str
|
| 58 |
+
timestamp: str
|
| 59 |
+
|
| 60 |
+
class HistoryItem(BaseModel):
|
| 61 |
+
analysis_id: str
|
| 62 |
+
filename: str
|
| 63 |
+
verdict: str
|
| 64 |
+
confidence: float
|
| 65 |
+
timestamp: str
|
| 66 |
+
video_duration: float
|
| 67 |
+
|
| 68 |
+
class StatsResponse(BaseModel):
|
| 69 |
+
total_analyses: int
|
| 70 |
+
deepfakes_detected: int
|
| 71 |
+
real_videos: int
|
| 72 |
+
avg_confidence: float
|
| 73 |
+
avg_video_score: float
|
| 74 |
+
avg_audio_score: float
|
| 75 |
+
|
| 76 |
+
class IntervalDetail(BaseModel):
|
| 77 |
+
interval_id: int
|
| 78 |
+
time_range: str
|
| 79 |
+
video_score: float
|
| 80 |
+
audio_score: float
|
| 81 |
+
verdict: str
|
| 82 |
+
suspicious_regions: dict
|
| 83 |
+
|
| 84 |
+
def add_to_history(analysis_data: dict):
|
| 85 |
+
"""Add analysis to history"""
|
| 86 |
+
history_item = {
|
| 87 |
+
"analysis_id": analysis_data["analysis_id"],
|
| 88 |
+
"filename": analysis_data["filename"],
|
| 89 |
+
"verdict": analysis_data["verdict"],
|
| 90 |
+
"confidence": analysis_data["confidence"],
|
| 91 |
+
"timestamp": analysis_data["timestamp"],
|
| 92 |
+
"video_duration": analysis_data["video_info"]["duration"],
|
| 93 |
+
"overall_scores": analysis_data["overall_scores"]
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
analysis_history.insert(0, history_item)
|
| 97 |
+
|
| 98 |
+
if len(analysis_history) > MAX_HISTORY:
|
| 99 |
+
analysis_history.pop()
|
| 100 |
+
|
| 101 |
+
@app.get("/")
|
| 102 |
+
async def root():
|
| 103 |
+
return {
|
| 104 |
+
"service": "DeepDefend API",
|
| 105 |
+
"version": "1.0.0",
|
| 106 |
+
"status": "online",
|
| 107 |
+
"description": "Advanced Multi-Modal Deepfake Detection",
|
| 108 |
+
"features": [
|
| 109 |
+
"Video frame-by-frame analysis",
|
| 110 |
+
"Audio deepfake detection",
|
| 111 |
+
"AI-powered evidence fusion",
|
| 112 |
+
"Frame-level heatmap generation",
|
| 113 |
+
"Interval breakdown analysis",
|
| 114 |
+
"Analysis history tracking"
|
| 115 |
+
],
|
| 116 |
+
"endpoints": {
|
| 117 |
+
"analyze": "POST /api/analyze",
|
| 118 |
+
"history": "GET /api/history",
|
| 119 |
+
"stats": "GET /api/stats",
|
| 120 |
+
"intervals": "GET /api/intervals/{analysis_id}",
|
| 121 |
+
"compare": "GET /api/compare",
|
| 122 |
+
"health": "GET /api/health"
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
@app.get("/api/health")
|
| 127 |
+
async def health():
|
| 128 |
+
"""Health check with system info"""
|
| 129 |
+
return {
|
| 130 |
+
"status": "healthy",
|
| 131 |
+
"pipeline_loaded": pipeline is not None,
|
| 132 |
+
"total_analyses": len(analysis_history),
|
| 133 |
+
"storage_used_mb": sum(
|
| 134 |
+
f.stat().st_size for f in UPLOAD_DIR.glob('*') if f.is_file()
|
| 135 |
+
) / (1024 * 1024) if UPLOAD_DIR.exists() else 0,
|
| 136 |
+
"timestamp": datetime.now().isoformat()
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
@app.post("/api/analyze", response_model=AnalysisResult)
|
| 140 |
+
async def analyze_video(
|
| 141 |
+
file: UploadFile = File(...),
|
| 142 |
+
interval_duration: float = Query(default=2.0, ge=1.0, le=5.0)
|
| 143 |
+
):
|
| 144 |
+
"""
|
| 145 |
+
Upload and analyze video for deepfakes
|
| 146 |
+
|
| 147 |
+
Returns complete analysis with:
|
| 148 |
+
- Overall verdict and confidence
|
| 149 |
+
- Video/audio scores
|
| 150 |
+
- Suspicious intervals
|
| 151 |
+
- AI-generated detailed analysis
|
| 152 |
+
"""
|
| 153 |
+
|
| 154 |
+
allowed_extensions = ['.mp4', '.avi', '.mov', '.mkv', '.webm']
|
| 155 |
+
file_ext = os.path.splitext(file.filename)[1].lower()
|
| 156 |
+
|
| 157 |
+
if file_ext not in allowed_extensions:
|
| 158 |
+
raise HTTPException(
|
| 159 |
+
status_code=400,
|
| 160 |
+
detail=f"Invalid file type. Allowed: {', '.join(allowed_extensions)}"
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
file.file.seek(0, 2)
|
| 164 |
+
file_size = file.file.tell()
|
| 165 |
+
file.file.seek(0)
|
| 166 |
+
|
| 167 |
+
if file_size > 250 * 1024 * 1024:
|
| 168 |
+
raise HTTPException(status_code=400, detail="File too large. Max: 250MB")
|
| 169 |
+
|
| 170 |
+
if file_size < 100 * 1024:
|
| 171 |
+
raise HTTPException(status_code=400, detail="File too small. Min: 100KB")
|
| 172 |
+
|
| 173 |
+
analysis_id = str(uuid.uuid4())
|
| 174 |
+
video_path = UPLOAD_DIR / f"{analysis_id}{file_ext}"
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
with open(video_path, "wb") as buffer:
|
| 178 |
+
shutil.copyfileobj(file.file, buffer)
|
| 179 |
+
|
| 180 |
+
pipe = get_pipeline()
|
| 181 |
+
|
| 182 |
+
print(f"\nAnalyzing: {file.filename}")
|
| 183 |
+
results = pipe.analyze_video(str(video_path), interval_duration)
|
| 184 |
+
|
| 185 |
+
final_report = results['final_report']
|
| 186 |
+
video_info = results['video_info']
|
| 187 |
+
|
| 188 |
+
analysis_data = {
|
| 189 |
+
"analysis_id": analysis_id,
|
| 190 |
+
"filename": file.filename,
|
| 191 |
+
"verdict": final_report['verdict'],
|
| 192 |
+
"confidence": final_report['confidence'],
|
| 193 |
+
"overall_scores": final_report['overall_scores'],
|
| 194 |
+
"detailed_analysis": final_report['detailed_analysis'],
|
| 195 |
+
"suspicious_intervals": final_report['suspicious_intervals'],
|
| 196 |
+
"total_intervals_analyzed": final_report['total_intervals_analyzed'],
|
| 197 |
+
"video_info": {
|
| 198 |
+
"duration": video_info['duration'],
|
| 199 |
+
"fps": video_info['fps'],
|
| 200 |
+
"total_frames": video_info['total_frames'],
|
| 201 |
+
"file_size_mb": round(file_size / (1024 * 1024), 2)
|
| 202 |
+
},
|
| 203 |
+
"timestamp": datetime.now().isoformat()
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
add_to_history(analysis_data)
|
| 207 |
+
|
| 208 |
+
interval_data = {
|
| 209 |
+
'analysis_id': analysis_id,
|
| 210 |
+
'timeline': [
|
| 211 |
+
{
|
| 212 |
+
'interval_id': interval['interval_id'],
|
| 213 |
+
'interval': interval['interval'],
|
| 214 |
+
'start': interval['start'],
|
| 215 |
+
'end': interval['end'],
|
| 216 |
+
'video_results': interval.get('video_results'),
|
| 217 |
+
'audio_results': interval.get('audio_results')
|
| 218 |
+
}
|
| 219 |
+
for interval in results.get('timeline', [])
|
| 220 |
+
]
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
results_path = UPLOAD_DIR / f"{analysis_id}_results.json"
|
| 224 |
+
with open(results_path, 'w') as f:
|
| 225 |
+
json.dump(interval_data, f, indent=2)
|
| 226 |
+
|
| 227 |
+
return AnalysisResult(**analysis_data)
|
| 228 |
+
|
| 229 |
+
except Exception as e:
|
| 230 |
+
print(f"Error: {e}")
|
| 231 |
+
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
| 232 |
+
|
| 233 |
+
finally:
|
| 234 |
+
if video_path.exists():
|
| 235 |
+
os.remove(video_path)
|
| 236 |
+
|
| 237 |
+
@app.get("/api/history", response_model=List[HistoryItem])
|
| 238 |
+
async def get_history(limit: int = Query(default=10, ge=1, le=50)):
|
| 239 |
+
"""Get recent analysis history"""
|
| 240 |
+
return [
|
| 241 |
+
HistoryItem(
|
| 242 |
+
analysis_id=item["analysis_id"],
|
| 243 |
+
filename=item["filename"],
|
| 244 |
+
verdict=item["verdict"],
|
| 245 |
+
confidence=item["confidence"],
|
| 246 |
+
timestamp=item["timestamp"],
|
| 247 |
+
video_duration=item["video_duration"]
|
| 248 |
+
)
|
| 249 |
+
for item in analysis_history[:limit]
|
| 250 |
+
]
|
| 251 |
+
|
| 252 |
+
@app.get("/api/stats", response_model=StatsResponse)
|
| 253 |
+
async def get_stats():
|
| 254 |
+
"""Get overall statistics"""
|
| 255 |
+
|
| 256 |
+
if not analysis_history:
|
| 257 |
+
return StatsResponse(
|
| 258 |
+
total_analyses=0,
|
| 259 |
+
deepfakes_detected=0,
|
| 260 |
+
real_videos=0,
|
| 261 |
+
avg_confidence=0.0,
|
| 262 |
+
avg_video_score=0.0,
|
| 263 |
+
avg_audio_score=0.0
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
deepfakes = sum(1 for item in analysis_history if item["verdict"] == "DEEPFAKE")
|
| 267 |
+
real = len(analysis_history) - deepfakes
|
| 268 |
+
|
| 269 |
+
avg_confidence = sum(item["confidence"] for item in analysis_history) / len(analysis_history)
|
| 270 |
+
avg_video = sum(item["overall_scores"]["overall_video_score"] for item in analysis_history) / len(analysis_history)
|
| 271 |
+
avg_audio = sum(item["overall_scores"]["overall_audio_score"] for item in analysis_history) / len(analysis_history)
|
| 272 |
+
|
| 273 |
+
return StatsResponse(
|
| 274 |
+
total_analyses=len(analysis_history),
|
| 275 |
+
deepfakes_detected=deepfakes,
|
| 276 |
+
real_videos=real,
|
| 277 |
+
avg_confidence=round(avg_confidence, 2),
|
| 278 |
+
avg_video_score=round(avg_video, 3),
|
| 279 |
+
avg_audio_score=round(avg_audio, 3)
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
@app.get("/api/intervals/{analysis_id}")
|
| 283 |
+
async def get_interval_details(analysis_id: str):
|
| 284 |
+
"""Get detailed interval-by-interval breakdown"""
|
| 285 |
+
|
| 286 |
+
results_path = UPLOAD_DIR / f"{analysis_id}_results.json"
|
| 287 |
+
|
| 288 |
+
if not results_path.exists():
|
| 289 |
+
raise HTTPException(status_code=404, detail="Analysis not found")
|
| 290 |
+
|
| 291 |
+
with open(results_path, 'r') as f:
|
| 292 |
+
interval_data = json.load(f)
|
| 293 |
+
|
| 294 |
+
timeline = interval_data.get('timeline', [])
|
| 295 |
+
|
| 296 |
+
intervals = []
|
| 297 |
+
for interval in timeline:
|
| 298 |
+
video_res = interval.get('video_results', {})
|
| 299 |
+
audio_res = interval.get('audio_results', {})
|
| 300 |
+
|
| 301 |
+
avg_score = (video_res.get('fake_score', 0) + audio_res.get('fake_score', 0)) / 2
|
| 302 |
+
|
| 303 |
+
intervals.append({
|
| 304 |
+
"interval_id": interval['interval_id'],
|
| 305 |
+
"time_range": interval['interval'],
|
| 306 |
+
"start": interval['start'],
|
| 307 |
+
"end": interval['end'],
|
| 308 |
+
"video_score": video_res.get('fake_score', 0),
|
| 309 |
+
"audio_score": audio_res.get('fake_score', 0),
|
| 310 |
+
"combined_score": round(avg_score, 3),
|
| 311 |
+
"verdict": "SUSPICIOUS" if avg_score > 0.6 else "NORMAL",
|
| 312 |
+
"suspicious_regions": {
|
| 313 |
+
"video": video_res.get('suspicious_regions', []),
|
| 314 |
+
"audio": audio_res.get('suspicious_regions', [])
|
| 315 |
+
},
|
| 316 |
+
"has_face": video_res.get('face_detected', False),
|
| 317 |
+
"has_audio": audio_res.get('has_audio', False)
|
| 318 |
+
})
|
| 319 |
+
|
| 320 |
+
return {
|
| 321 |
+
"analysis_id": analysis_id,
|
| 322 |
+
"total_intervals": len(intervals),
|
| 323 |
+
"intervals": intervals
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
@app.get("/api/compare")
|
| 327 |
+
async def compare_scores():
|
| 328 |
+
"""Compare video vs audio detection rates"""
|
| 329 |
+
|
| 330 |
+
if not analysis_history:
|
| 331 |
+
return {
|
| 332 |
+
"message": "No analysis data available",
|
| 333 |
+
"comparison": None
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
video_higher = 0
|
| 337 |
+
audio_higher = 0
|
| 338 |
+
equal = 0
|
| 339 |
+
|
| 340 |
+
for item in analysis_history:
|
| 341 |
+
scores = item["overall_scores"]
|
| 342 |
+
v_score = scores["overall_video_score"]
|
| 343 |
+
a_score = scores["overall_audio_score"]
|
| 344 |
+
|
| 345 |
+
if v_score > a_score:
|
| 346 |
+
video_higher += 1
|
| 347 |
+
elif a_score > v_score:
|
| 348 |
+
audio_higher += 1
|
| 349 |
+
else:
|
| 350 |
+
equal += 1
|
| 351 |
+
|
| 352 |
+
return {
|
| 353 |
+
"total_analyses": len(analysis_history),
|
| 354 |
+
"comparison": {
|
| 355 |
+
"video_better_detection": video_higher,
|
| 356 |
+
"audio_better_detection": audio_higher,
|
| 357 |
+
"equal_detection": equal
|
| 358 |
+
},
|
| 359 |
+
"percentages": {
|
| 360 |
+
"video_dominant": round((video_higher / len(analysis_history)) * 100, 1),
|
| 361 |
+
"audio_dominant": round((audio_higher / len(analysis_history)) * 100, 1),
|
| 362 |
+
"balanced": round((equal / len(analysis_history)) * 100, 1)
|
| 363 |
+
}
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
@app.get("/api/recent-verdict")
|
| 367 |
+
async def get_recent_verdict_distribution(limit: int = Query(default=20, ge=5, le=50)):
|
| 368 |
+
"""Get verdict distribution for recent analyses"""
|
| 369 |
+
|
| 370 |
+
recent = analysis_history[:limit]
|
| 371 |
+
|
| 372 |
+
if not recent:
|
| 373 |
+
return {
|
| 374 |
+
"total": 0,
|
| 375 |
+
"deepfakes": 0,
|
| 376 |
+
"real": 0,
|
| 377 |
+
"distribution": []
|
| 378 |
+
}
|
| 379 |
+
|
| 380 |
+
deepfakes = sum(1 for item in recent if item["verdict"] == "DEEPFAKE")
|
| 381 |
+
real = len(recent) - deepfakes
|
| 382 |
+
|
| 383 |
+
distribution = {
|
| 384 |
+
"very_confident": 0,
|
| 385 |
+
"confident": 0,
|
| 386 |
+
"moderate": 0,
|
| 387 |
+
"low": 0
|
| 388 |
+
}
|
| 389 |
+
|
| 390 |
+
for item in recent:
|
| 391 |
+
conf = item["confidence"]
|
| 392 |
+
if conf >= 80:
|
| 393 |
+
distribution["very_confident"] += 1
|
| 394 |
+
elif conf >= 60:
|
| 395 |
+
distribution["confident"] += 1
|
| 396 |
+
elif conf >= 40:
|
| 397 |
+
distribution["moderate"] += 1
|
| 398 |
+
else:
|
| 399 |
+
distribution["low"] += 1
|
| 400 |
+
|
| 401 |
+
return {
|
| 402 |
+
"total": len(recent),
|
| 403 |
+
"deepfakes": deepfakes,
|
| 404 |
+
"real": real,
|
| 405 |
+
"deepfake_rate": round((deepfakes / len(recent)) * 100, 1),
|
| 406 |
+
"confidence_distribution": distribution
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
@app.delete("/api/clear-history")
|
| 410 |
+
async def clear_history():
|
| 411 |
+
"""Clear analysis history (for demo reset)"""
|
| 412 |
+
global analysis_history
|
| 413 |
+
|
| 414 |
+
count = len(analysis_history)
|
| 415 |
+
analysis_history.clear()
|
| 416 |
+
|
| 417 |
+
for file in UPLOAD_DIR.glob("*_results.json"):
|
| 418 |
+
os.remove(file)
|
| 419 |
+
|
| 420 |
+
return {
|
| 421 |
+
"message": "History cleared",
|
| 422 |
+
"items_removed": count
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
@app.exception_handler(HTTPException)
|
| 426 |
+
async def http_exception_handler(request, exc):
|
| 427 |
+
return JSONResponse(
|
| 428 |
+
status_code=exc.status_code,
|
| 429 |
+
content={"error": exc.detail, "status_code": exc.status_code}
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
@app.exception_handler(Exception)
|
| 433 |
+
async def global_exception_handler(request, exc):
|
| 434 |
+
print(f"Error: {exc}")
|
| 435 |
+
return JSONResponse(
|
| 436 |
+
status_code=500,
|
| 437 |
+
content={"error": "Internal server error", "detail": str(exc)}
|
| 438 |
+
)
|
models/.gitkeep
ADDED
|
File without changes
|
models/__init__.py
ADDED
|
File without changes
|
models/audio_model/.gitkeep
ADDED
|
File without changes
|
models/download_model.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoModelForImageClassification, AutoImageProcessor
|
| 2 |
+
from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def download_models():
|
| 6 |
+
|
| 7 |
+
os.makedirs("./models/video_model", exist_ok=True)
|
| 8 |
+
os.makedirs("./models/audio_model", exist_ok=True)
|
| 9 |
+
|
| 10 |
+
print("Downloading video deepfake detection model...")
|
| 11 |
+
video_model_name = "dima806/deepfake_vs_real_image_detection"
|
| 12 |
+
|
| 13 |
+
video_model = AutoModelForImageClassification.from_pretrained(video_model_name)
|
| 14 |
+
video_processor = AutoImageProcessor.from_pretrained(video_model_name)
|
| 15 |
+
|
| 16 |
+
video_model.save_pretrained("./models/video_model")
|
| 17 |
+
video_processor.save_pretrained("./models/video_model")
|
| 18 |
+
print("Video model saved to ./models/video_model")
|
| 19 |
+
|
| 20 |
+
print("\nDownloading audio deepfake detection model...")
|
| 21 |
+
audio_model_name = "mo-thecreator/Deepfake-audio-detection"
|
| 22 |
+
|
| 23 |
+
audio_model = AutoModelForAudioClassification.from_pretrained(audio_model_name)
|
| 24 |
+
audio_processor = AutoFeatureExtractor.from_pretrained(audio_model_name)
|
| 25 |
+
|
| 26 |
+
audio_model.save_pretrained("./models/audio_model")
|
| 27 |
+
audio_processor.save_pretrained("./models/audio_model")
|
| 28 |
+
print("Audio model saved to ./models/audio_model")
|
| 29 |
+
|
| 30 |
+
print("\nAll models downloaded successfully!")
|
| 31 |
+
|
| 32 |
+
if __name__ == "__main__":
|
| 33 |
+
download_models()
|
models/load_models.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoModelForImageClassification, AutoImageProcessor
|
| 2 |
+
from transformers import AutoModelForAudioClassification, AutoFeatureExtractor
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
class ModelLoader:
|
| 6 |
+
|
| 7 |
+
_instance = None
|
| 8 |
+
_video_model = None
|
| 9 |
+
_video_processor = None
|
| 10 |
+
_audio_model = None
|
| 11 |
+
_audio_processor = None
|
| 12 |
+
|
| 13 |
+
def __new__(cls):
|
| 14 |
+
if cls._instance is None:
|
| 15 |
+
cls._instance = super(ModelLoader, cls).__new__(cls)
|
| 16 |
+
return cls._instance
|
| 17 |
+
|
| 18 |
+
def load_video_model(self):
|
| 19 |
+
if self._video_model is None:
|
| 20 |
+
self._video_model = AutoModelForImageClassification.from_pretrained("./models/video_model")
|
| 21 |
+
self._video_processor = AutoImageProcessor.from_pretrained("./models/video_model")
|
| 22 |
+
|
| 23 |
+
self._video_model.eval()
|
| 24 |
+
|
| 25 |
+
if torch.cuda.is_available():
|
| 26 |
+
self._video_model = self._video_model.cuda()
|
| 27 |
+
|
| 28 |
+
print("Video model loaded!")
|
| 29 |
+
|
| 30 |
+
return self._video_model, self._video_processor
|
| 31 |
+
|
| 32 |
+
def load_audio_model(self):
|
| 33 |
+
if self._audio_model is None:
|
| 34 |
+
self._audio_model = AutoModelForAudioClassification.from_pretrained("./models/audio_model")
|
| 35 |
+
self._audio_processor = AutoFeatureExtractor.from_pretrained("./models/audio_model")
|
| 36 |
+
|
| 37 |
+
self._audio_model.eval()
|
| 38 |
+
|
| 39 |
+
if torch.cuda.is_available():
|
| 40 |
+
self._audio_model = self._audio_model.cuda()
|
| 41 |
+
|
| 42 |
+
print("Audio model loaded!")
|
| 43 |
+
|
| 44 |
+
return self._audio_model, self._audio_processor
|
| 45 |
+
|
| 46 |
+
def get_device(self):
|
| 47 |
+
return "cuda" if torch.cuda.is_available() else "cpu"
|
| 48 |
+
|
| 49 |
+
model_loader = ModelLoader()
|
models/video_model/.gitkeep
ADDED
|
File without changes
|
pipeline.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict
|
| 2 |
+
from extraction.media_extractor import MediaExtractor
|
| 3 |
+
from analysis.video_analyser import VideoAnalyzer
|
| 4 |
+
from analysis.audio_analyser import AudioAnalyzer
|
| 5 |
+
from analysis.llm_analyser import LLMFusion
|
| 6 |
+
|
| 7 |
+
class DeepfakeDetectionPipeline:
|
| 8 |
+
"""Complete deepfake detection pipeline"""
|
| 9 |
+
|
| 10 |
+
def __init__(self):
|
| 11 |
+
self.media_extractor = MediaExtractor(frames_per_interval=5)
|
| 12 |
+
self.video_analyzer = VideoAnalyzer()
|
| 13 |
+
self.audio_analyzer = AudioAnalyzer()
|
| 14 |
+
self.llm_fusion = LLMFusion()
|
| 15 |
+
|
| 16 |
+
def analyze_video(self, video_path: str, interval_duration: float = 2.0) -> Dict:
|
| 17 |
+
|
| 18 |
+
timeline, video_info = self.media_extractor.extract_all(video_path, interval_duration)
|
| 19 |
+
|
| 20 |
+
for i, interval in enumerate(timeline):
|
| 21 |
+
video_results = self.video_analyzer.analyze_interval(interval)
|
| 22 |
+
interval['video_results'] = video_results
|
| 23 |
+
|
| 24 |
+
for i, interval in enumerate(timeline):
|
| 25 |
+
audio_results = self.audio_analyzer.analyze_interval(interval)
|
| 26 |
+
interval['audio_results'] = audio_results
|
| 27 |
+
|
| 28 |
+
final_report = self.llm_fusion.generate_report(timeline, video_info)
|
| 29 |
+
|
| 30 |
+
return {
|
| 31 |
+
'video_info': video_info,
|
| 32 |
+
'timeline': timeline,
|
| 33 |
+
'final_report': final_report,
|
| 34 |
+
'summary': self.llm_fusion.generate_report(timeline, video_info)
|
| 35 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FastAPI and Server
|
| 2 |
+
fastapi==0.109.0
|
| 3 |
+
uvicorn[standard]==0.27.0
|
| 4 |
+
python-multipart==0.0.6
|
| 5 |
+
pydantic==2.5.3
|
| 6 |
+
|
| 7 |
+
# Core ML Framework
|
| 8 |
+
torch==2.3.1
|
| 9 |
+
torchvision==0.18.1
|
| 10 |
+
torchaudio==2.3.1
|
| 11 |
+
transformers==4.36.2
|
| 12 |
+
|
| 13 |
+
# Computer Vision
|
| 14 |
+
opencv-python-headless==4.9.0.80 # Changed: headless for Docker
|
| 15 |
+
Pillow==10.2.0
|
| 16 |
+
|
| 17 |
+
# Audio Processing
|
| 18 |
+
librosa==0.10.1
|
| 19 |
+
soundfile==0.12.1
|
| 20 |
+
ffmpeg-python==0.2.0
|
| 21 |
+
audioread==3.0.1 # Added: required for librosa
|
| 22 |
+
|
| 23 |
+
# LLM Integration
|
| 24 |
+
langchain==0.1.0
|
| 25 |
+
langchain-google-genai==0.0.6
|
| 26 |
+
google-generativeai==0.3.2
|
| 27 |
+
|
| 28 |
+
# Data Processing
|
| 29 |
+
numpy==1.24.3 # Compatible with librosa
|
| 30 |
+
pandas==2.0.3
|
| 31 |
+
scipy==1.11.4
|
| 32 |
+
|
| 33 |
+
# Utilities
|
| 34 |
+
requests==2.31.0
|
| 35 |
+
python-dotenv==1.0.0
|
uploads/.gitkeep
ADDED
|
File without changes
|