Zakha123-cyber
commited on
Commit
Β·
8e73bed
1
Parent(s):
53ea4a4
Initial deployment: SWARA API with eye tracking, facial expression, and gesture detection
Browse files- .env +27 -0
- Dockerfile +36 -0
- README.md +447 -7
- app.py +18 -0
- app/__init__.py +7 -0
- app/__pycache__/__init__.cpython-312.pyc +0 -0
- app/__pycache__/config.cpython-312.pyc +0 -0
- app/api/__init__.py +3 -0
- app/api/routes.py +288 -0
- app/config.py +78 -0
- app/core/__init__.py +3 -0
- app/core/redis_client.py +107 -0
- app/core/storage.py +113 -0
- app/main.py +131 -0
- app/models.py +115 -0
- app/services/__init__.py +3 -0
- app/services/__pycache__/__init__.cpython-312.pyc +0 -0
- app/services/__pycache__/eye_tracking.cpython-312.pyc +0 -0
- app/services/__pycache__/facial_expression.cpython-312.pyc +0 -0
- app/services/__pycache__/gesture_detection.cpython-312.pyc +0 -0
- app/services/eye_tracking.py +894 -0
- app/services/eye_tracking_production.py +873 -0
- app/services/facial_expression.py +206 -0
- app/services/gesture_detection.py +569 -0
- app/services/struktur_berbicara_nlp.py +578 -0
- app/services/video_processor.py +319 -0
- app/tasks.py +171 -0
- app/worker.py +65 -0
- models/.gitkeep +3 -0
- models/best.onnx +3 -0
- requirements.txt +29 -0
.env
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Environment Configuration
|
| 2 |
+
ENV=production
|
| 3 |
+
|
| 4 |
+
# Redis Configuration (Upstash)
|
| 5 |
+
REDIS_URL=rediss://default:ASjMAAIncDJkMmEyMzAxMDdhOWI0YzQyOThmNDg3ZjkxMDZkYmQ3ZXAyMTA0NDQ@profound-catfish-10444.upstash.io:6379
|
| 6 |
+
|
| 7 |
+
# API Configuration
|
| 8 |
+
API_HOST=0.0.0.0
|
| 9 |
+
API_PORT=7860
|
| 10 |
+
API_WORKERS=1
|
| 11 |
+
|
| 12 |
+
# Processing Configuration
|
| 13 |
+
MAX_VIDEO_SIZE_MB=50
|
| 14 |
+
MAX_VIDEO_DURATION_SECONDS=60
|
| 15 |
+
TEMP_DIR=./temp
|
| 16 |
+
MODELS_DIR=./models
|
| 17 |
+
|
| 18 |
+
# Task Configuration
|
| 19 |
+
TASK_TIMEOUT_SECONDS=300
|
| 20 |
+
TASK_RESULT_TTL_SECONDS=3600
|
| 21 |
+
|
| 22 |
+
# Rate Limiting
|
| 23 |
+
RATE_LIMIT_REQUESTS=10
|
| 24 |
+
RATE_LIMIT_PERIOD_SECONDS=3600
|
| 25 |
+
|
| 26 |
+
# Logging
|
| 27 |
+
LOG_LEVEL=INFO
|
Dockerfile
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
# Set working directory
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install system dependencies for OpenCV and MediaPipe
|
| 7 |
+
RUN apt-get update && apt-get install -y \
|
| 8 |
+
libgl1 \
|
| 9 |
+
libglib2.0-0 \
|
| 10 |
+
libsm6 \
|
| 11 |
+
libxext6 \
|
| 12 |
+
libxrender1 \
|
| 13 |
+
libgomp1 \
|
| 14 |
+
libgstreamer1.0-0 \
|
| 15 |
+
libgstreamer-plugins-base1.0-0 \
|
| 16 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 17 |
+
|
| 18 |
+
# Copy requirements first (for better caching)
|
| 19 |
+
COPY requirements.txt .
|
| 20 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 21 |
+
|
| 22 |
+
# Copy application code
|
| 23 |
+
COPY . .
|
| 24 |
+
|
| 25 |
+
# Create necessary directories
|
| 26 |
+
RUN mkdir -p temp models logs
|
| 27 |
+
|
| 28 |
+
# Expose port
|
| 29 |
+
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
# Health check
|
| 32 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 33 |
+
CMD python -c "import requests; requests.get('http://localhost:7860/health')"
|
| 34 |
+
|
| 35 |
+
# Run application
|
| 36 |
+
CMD ["python", "-m", "app.main"]
|
README.md
CHANGED
|
@@ -1,11 +1,451 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
|
| 8 |
-
license: mit
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: SWARA - AI Public Speaking Evaluation
|
| 3 |
+
emoji: π€
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# π€ SWARA API - AI-Powered Public Speaking Evaluation
|
| 11 |
+
|
| 12 |
+
API backend untuk sistem evaluasi public speaking berbasis AI.
|
| 13 |
+
|
| 14 |
+
## π Fitur
|
| 15 |
+
|
| 16 |
+
- β
**Async Video Processing** - Non-blocking video analysis dengan RQ (Redis Queue)
|
| 17 |
+
- β
**Multi-Model AI** - Eye tracking, facial expression, gesture detection
|
| 18 |
+
- β
**Level-based Evaluation** - 5 level kesulitan dengan indikator berbeda
|
| 19 |
+
- β
**RESTful API** - FastAPI dengan OpenAPI documentation
|
| 20 |
+
- β
**Cloud Redis** - Upstash Redis untuk production
|
| 21 |
+
- β
**Progress Tracking** - Real-time progress updates untuk analysis
|
| 22 |
+
|
| 23 |
+
## ποΈ Arsitektur
|
| 24 |
+
|
| 25 |
+
```
|
| 26 |
+
βββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
+
β Docker Container β
|
| 28 |
+
β β
|
| 29 |
+
β ββββββββββββββββ ββββββββββββββββ β
|
| 30 |
+
β β FastAPI βββββΆβ Redis β β
|
| 31 |
+
β β (Port 7860) β β (Queue & KV) β β
|
| 32 |
+
β ββββββββββββββββ ββββββββββββββββ β
|
| 33 |
+
β β β β
|
| 34 |
+
β β POST /analyze β β
|
| 35 |
+
β β return task_id β β
|
| 36 |
+
β β βΌ β
|
| 37 |
+
β β ββββββββββββββββ β
|
| 38 |
+
β β β RQ Worker β β
|
| 39 |
+
β β β (Background) β β
|
| 40 |
+
β β ββββββββββββββββ β
|
| 41 |
+
β β β β
|
| 42 |
+
β β GET /task/{id} β β
|
| 43 |
+
β ββββββββββββββββββββββ β
|
| 44 |
+
βββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
```
|
| 46 |
+
|
| 47 |
+
## π Quick Start
|
| 48 |
+
|
| 49 |
+
### Prerequisites
|
| 50 |
+
|
| 51 |
+
- Docker & Docker Compose
|
| 52 |
+
- Python 3.10+ (untuk development tanpa Docker)
|
| 53 |
+
|
| 54 |
+
### 1. Clone & Setup
|
| 55 |
+
|
| 56 |
+
```powershell
|
| 57 |
+
# Clone repository (if applicable)
|
| 58 |
+
cd API-MODEL
|
| 59 |
+
|
| 60 |
+
# Copy environment file
|
| 61 |
+
cp .env.example .env
|
| 62 |
+
|
| 63 |
+
# Edit .env jika perlu (optional untuk local development)
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### 2. Run dengan Docker Compose
|
| 67 |
+
|
| 68 |
+
```powershell
|
| 69 |
+
# Build dan start semua services
|
| 70 |
+
docker-compose up --build
|
| 71 |
+
|
| 72 |
+
# Atau run di background
|
| 73 |
+
docker-compose up -d --build
|
| 74 |
+
|
| 75 |
+
# Lihat logs
|
| 76 |
+
docker-compose logs -f
|
| 77 |
+
|
| 78 |
+
# Stop services
|
| 79 |
+
docker-compose down
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
API akan tersedia di: `http://localhost:7860`
|
| 83 |
+
|
| 84 |
+
### 3. Akses Documentation
|
| 85 |
+
|
| 86 |
+
- **Swagger UI**: http://localhost:7860/docs
|
| 87 |
+
- **ReDoc**: http://localhost:7860/redoc
|
| 88 |
+
- **OpenAPI JSON**: http://localhost:7860/openapi.json
|
| 89 |
+
|
| 90 |
+
## π API Endpoints
|
| 91 |
+
|
| 92 |
+
### 1. Health Check
|
| 93 |
+
|
| 94 |
+
```bash
|
| 95 |
+
GET /health
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
**Response:**
|
| 99 |
+
|
| 100 |
+
```json
|
| 101 |
+
{
|
| 102 |
+
"status": "healthy",
|
| 103 |
+
"version": "1.0.0",
|
| 104 |
+
"redis_connected": true,
|
| 105 |
+
"timestamp": "2025-11-10T10:00:00"
|
| 106 |
+
}
|
| 107 |
+
```
|
| 108 |
+
|
| 109 |
+
### 2. Upload Video untuk Analysis
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
POST /api/v1/analyze
|
| 113 |
+
Content-Type: multipart/form-data
|
| 114 |
+
```
|
| 115 |
+
|
| 116 |
+
**Parameters:**
|
| 117 |
+
|
| 118 |
+
- `video` (file): Video file (max 50MB, max 1 minute)
|
| 119 |
+
- `level` (int): Level 1-5
|
| 120 |
+
- `user_id` (string, optional): User identifier
|
| 121 |
+
|
| 122 |
+
**Response:**
|
| 123 |
+
|
| 124 |
+
```json
|
| 125 |
+
{
|
| 126 |
+
"task_id": "abc123def456",
|
| 127 |
+
"status": "pending",
|
| 128 |
+
"message": "Video uploaded successfully. Processing has been queued.",
|
| 129 |
+
"created_at": "2025-11-10T10:00:00"
|
| 130 |
+
}
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### 3. Get Task Status
|
| 134 |
+
|
| 135 |
+
```bash
|
| 136 |
+
GET /api/v1/task/{task_id}
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
**Response (Processing):**
|
| 140 |
+
|
| 141 |
+
```json
|
| 142 |
+
{
|
| 143 |
+
"task_id": "abc123def456",
|
| 144 |
+
"status": "processing",
|
| 145 |
+
"progress": {
|
| 146 |
+
"current_step": "processing",
|
| 147 |
+
"percentage": 45.5,
|
| 148 |
+
"message": "Analyzing facial expressions..."
|
| 149 |
+
},
|
| 150 |
+
"created_at": "2025-11-10T10:00:00"
|
| 151 |
+
}
|
| 152 |
+
```
|
| 153 |
+
|
| 154 |
+
**Response (Completed):**
|
| 155 |
+
|
| 156 |
+
```json
|
| 157 |
+
{
|
| 158 |
+
"task_id": "abc123def456",
|
| 159 |
+
"status": "completed",
|
| 160 |
+
"result": {
|
| 161 |
+
"level": 2,
|
| 162 |
+
"video_metadata": {
|
| 163 |
+
"duration": 58.5,
|
| 164 |
+
"fps": 30,
|
| 165 |
+
"resolution": "1920x1080",
|
| 166 |
+
"file_size": 15728640
|
| 167 |
+
},
|
| 168 |
+
"main_indicators": {
|
| 169 |
+
"kontak_mata": {
|
| 170 |
+
"score": 4,
|
| 171 |
+
"raw_data": {...}
|
| 172 |
+
}
|
| 173 |
+
},
|
| 174 |
+
"bonus_indicators": {
|
| 175 |
+
"first_impression": {
|
| 176 |
+
"detected": true,
|
| 177 |
+
"raw_data": {...}
|
| 178 |
+
},
|
| 179 |
+
"face_expression": {...},
|
| 180 |
+
"gesture": {...}
|
| 181 |
+
},
|
| 182 |
+
"processing_time": 42.3
|
| 183 |
+
},
|
| 184 |
+
"created_at": "2025-11-10T10:00:00",
|
| 185 |
+
"completed_at": "2025-11-10T10:01:00"
|
| 186 |
+
}
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
### 4. Delete Task
|
| 190 |
+
|
| 191 |
+
```bash
|
| 192 |
+
DELETE /api/v1/task/{task_id}
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
## π§ͺ Testing dengan cURL
|
| 196 |
+
|
| 197 |
+
### Upload Video
|
| 198 |
+
|
| 199 |
+
```powershell
|
| 200 |
+
curl -X POST "http://localhost:7860/api/v1/analyze" `
|
| 201 |
+
-F "video=@test_video.mp4" `
|
| 202 |
+
-F "level=2" `
|
| 203 |
+
-F "user_id=user123"
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
### Check Status
|
| 207 |
+
|
| 208 |
+
```powershell
|
| 209 |
+
curl "http://localhost:7860/api/v1/task/abc123def456"
|
| 210 |
+
```
|
| 211 |
+
|
| 212 |
+
## π οΈ Development Setup (Tanpa Docker)
|
| 213 |
+
|
| 214 |
+
### 1. Install Dependencies
|
| 215 |
+
|
| 216 |
+
```powershell
|
| 217 |
+
# Create virtual environment
|
| 218 |
+
python -m venv venv
|
| 219 |
+
.\venv\Scripts\activate
|
| 220 |
+
|
| 221 |
+
# Install dependencies
|
| 222 |
+
pip install -r requirements.txt
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
### 2. Setup Redis (Local atau Upstash)
|
| 226 |
+
|
| 227 |
+
**Option A: Local Redis (dengan Docker)**
|
| 228 |
+
|
| 229 |
+
```powershell
|
| 230 |
+
docker run -d -p 6379:6379 redis:7-alpine
|
| 231 |
+
```
|
| 232 |
+
|
| 233 |
+
**Option B: Upstash Redis (Gratis)**
|
| 234 |
+
|
| 235 |
+
1. Daftar di https://upstash.com
|
| 236 |
+
2. Create Redis database
|
| 237 |
+
3. Copy connection string ke `.env`:
|
| 238 |
+
|
| 239 |
+
```
|
| 240 |
+
REDIS_URL=redis://default:YOUR_PASSWORD@YOUR_ENDPOINT:6379
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
### 3. Run API Server
|
| 244 |
+
|
| 245 |
+
```powershell
|
| 246 |
+
python -m app.main
|
| 247 |
+
```
|
| 248 |
+
|
| 249 |
+
### 4. Run Worker (Terminal terpisah)
|
| 250 |
+
|
| 251 |
+
```powershell
|
| 252 |
+
python -m app.worker
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
## π Project Structure
|
| 256 |
+
|
| 257 |
+
```
|
| 258 |
+
API-MODEL/
|
| 259 |
+
βββ app/
|
| 260 |
+
β βββ __init__.py
|
| 261 |
+
β βββ main.py # FastAPI app
|
| 262 |
+
β βββ config.py # Configuration
|
| 263 |
+
β βββ models.py # Pydantic models
|
| 264 |
+
β βββ tasks.py # Background tasks
|
| 265 |
+
β βββ worker.py # RQ worker
|
| 266 |
+
β βββ api/
|
| 267 |
+
β β βββ routes.py # API endpoints
|
| 268 |
+
β βββ core/
|
| 269 |
+
β β βββ redis_client.py # Redis connection
|
| 270 |
+
β β βββ storage.py # File storage
|
| 271 |
+
β βββ services/
|
| 272 |
+
β βββ video_processor.py # Main orchestrator
|
| 273 |
+
β βββ eye_tracking.py # Eye tracking service
|
| 274 |
+
β βββ facial_expression.py # Facial expression service
|
| 275 |
+
β βββ gesture_detection.py # Gesture detection service
|
| 276 |
+
βββ models/ # AI model files
|
| 277 |
+
βββ temp/ # Temporary video storage
|
| 278 |
+
βββ logs/ # Application logs
|
| 279 |
+
βββ docker-compose.yml
|
| 280 |
+
βββ Dockerfile
|
| 281 |
+
βββ requirements.txt
|
| 282 |
+
βββ README.md
|
| 283 |
+
```
|
| 284 |
+
|
| 285 |
+
## βοΈ Configuration
|
| 286 |
+
|
| 287 |
+
Edit `.env` file untuk konfigurasi:
|
| 288 |
+
|
| 289 |
+
```env
|
| 290 |
+
# Environment
|
| 291 |
+
ENV=development
|
| 292 |
+
|
| 293 |
+
# Redis (Upstash atau local)
|
| 294 |
+
REDIS_URL=redis://localhost:6379
|
| 295 |
+
|
| 296 |
+
# API
|
| 297 |
+
API_HOST=0.0.0.0
|
| 298 |
+
API_PORT=7860
|
| 299 |
+
|
| 300 |
+
# Processing
|
| 301 |
+
MAX_VIDEO_SIZE_MB=50
|
| 302 |
+
MAX_VIDEO_DURATION_SECONDS=60
|
| 303 |
+
TASK_TIMEOUT_SECONDS=300
|
| 304 |
+
|
| 305 |
+
# Logging
|
| 306 |
+
LOG_LEVEL=INFO
|
| 307 |
+
```
|
| 308 |
+
|
| 309 |
+
## π Monitoring
|
| 310 |
+
|
| 311 |
+
### View Logs
|
| 312 |
+
|
| 313 |
+
```powershell
|
| 314 |
+
# API logs
|
| 315 |
+
docker-compose logs -f api
|
| 316 |
+
|
| 317 |
+
# Worker logs
|
| 318 |
+
docker-compose logs -f worker
|
| 319 |
+
|
| 320 |
+
# Redis logs
|
| 321 |
+
docker-compose logs -f redis
|
| 322 |
+
|
| 323 |
+
# All logs
|
| 324 |
+
docker-compose logs -f
|
| 325 |
+
```
|
| 326 |
+
|
| 327 |
+
### Check Redis Queue
|
| 328 |
+
|
| 329 |
+
```powershell
|
| 330 |
+
# Connect to Redis container
|
| 331 |
+
docker exec -it swara-redis redis-cli
|
| 332 |
+
|
| 333 |
+
# Check queue length
|
| 334 |
+
LLEN swara:tasks
|
| 335 |
+
|
| 336 |
+
# View all keys
|
| 337 |
+
KEYS *
|
| 338 |
+
|
| 339 |
+
# Get task data
|
| 340 |
+
GET task:abc123def456
|
| 341 |
+
```
|
| 342 |
+
|
| 343 |
+
## π§ Troubleshooting
|
| 344 |
+
|
| 345 |
+
### Problem: Redis connection failed
|
| 346 |
+
|
| 347 |
+
**Solution:**
|
| 348 |
+
|
| 349 |
+
```powershell
|
| 350 |
+
# Check Redis is running
|
| 351 |
+
docker-compose ps
|
| 352 |
+
|
| 353 |
+
# Restart Redis
|
| 354 |
+
docker-compose restart redis
|
| 355 |
+
|
| 356 |
+
# Check Redis logs
|
| 357 |
+
docker-compose logs redis
|
| 358 |
+
```
|
| 359 |
+
|
| 360 |
+
### Problem: Worker not processing tasks
|
| 361 |
+
|
| 362 |
+
**Solution:**
|
| 363 |
+
|
| 364 |
+
```powershell
|
| 365 |
+
# Check worker logs
|
| 366 |
+
docker-compose logs worker
|
| 367 |
+
|
| 368 |
+
# Restart worker
|
| 369 |
+
docker-compose restart worker
|
| 370 |
+
|
| 371 |
+
# Check if worker is running
|
| 372 |
+
docker-compose ps worker
|
| 373 |
+
```
|
| 374 |
+
|
| 375 |
+
### Problem: Video file too large
|
| 376 |
+
|
| 377 |
+
**Solution:**
|
| 378 |
+
|
| 379 |
+
- Compress video atau gunakan format yang lebih efisien
|
| 380 |
+
- Increase `MAX_VIDEO_SIZE_MB` di `.env`
|
| 381 |
+
|
| 382 |
+
## π Next Steps (TODO)
|
| 383 |
+
|
| 384 |
+
Untuk complete implementation, Anda perlu:
|
| 385 |
+
|
| 386 |
+
1. **Refactor existing code** ke services:
|
| 387 |
+
|
| 388 |
+
- [ ] `app/services/eye_tracking.py` - dari `eye_tracking_production.py`
|
| 389 |
+
- [ ] `app/services/facial_expression.py` - dari `facial_expression.py`
|
| 390 |
+
- [ ] `app/services/gesture_detection.py` - dari `gesture_detection.py`
|
| 391 |
+
|
| 392 |
+
2. **Add audio processing**:
|
| 393 |
+
|
| 394 |
+
- [ ] `app/services/audio_processor.py` - untuk tempo, artikulasi, jeda
|
| 395 |
+
- [ ] Speech-to-text integration
|
| 396 |
+
- [ ] Kata pengisi & kata tidak senonoh detection
|
| 397 |
+
|
| 398 |
+
3. **Add NLP processing**:
|
| 399 |
+
|
| 400 |
+
- [ ] `app/services/nlp_processor.py` - untuk kesesuaian topik, struktur kalimat
|
| 401 |
+
- [ ] Topic matching
|
| 402 |
+
- [ ] Sentence structure analysis
|
| 403 |
+
|
| 404 |
+
4. **Optimization**:
|
| 405 |
+
|
| 406 |
+
- [ ] Implement frame sampling (5 fps instead of 30 fps)
|
| 407 |
+
- [ ] Model loading optimization
|
| 408 |
+
- [ ] Memory management
|
| 409 |
+
|
| 410 |
+
5. **Testing**:
|
| 411 |
+
- [ ] Unit tests
|
| 412 |
+
- [ ] Integration tests
|
| 413 |
+
- [ ] Load testing
|
| 414 |
+
|
| 415 |
+
## π’ Deployment ke HuggingFace Spaces
|
| 416 |
+
|
| 417 |
+
### 1. Create Space
|
| 418 |
+
|
| 419 |
+
1. Go to https://huggingface.co/spaces
|
| 420 |
+
2. Create new Space (Docker type)
|
| 421 |
+
3. Clone repository
|
| 422 |
+
|
| 423 |
+
### 2. Prepare Files
|
| 424 |
+
|
| 425 |
+
```bash
|
| 426 |
+
# Add Dockerfile for HF Spaces
|
| 427 |
+
# (sudah ada di repository)
|
| 428 |
+
```
|
| 429 |
+
|
| 430 |
+
### 3. Setup Secrets
|
| 431 |
+
|
| 432 |
+
Di HuggingFace Space settings, add secrets:
|
| 433 |
+
|
| 434 |
+
- `REDIS_URL` - Upstash Redis URL
|
| 435 |
+
- `ENV` - production
|
| 436 |
+
|
| 437 |
+
### 4. Push & Deploy
|
| 438 |
+
|
| 439 |
+
```bash
|
| 440 |
+
git push origin main
|
| 441 |
+
```
|
| 442 |
+
|
| 443 |
+
HuggingFace akan auto-deploy!
|
| 444 |
+
|
| 445 |
+
## π Support
|
| 446 |
+
|
| 447 |
+
Untuk pertanyaan atau issues, contact SWARA team.
|
| 448 |
+
|
| 449 |
+
---
|
| 450 |
+
|
| 451 |
+
**Built with β€οΈ by SWARA Team for LIDM Competition 2025**
|
app.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SWARA API - Entry Point untuk HuggingFace Spaces (Non-Docker)
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import uvicorn
|
| 6 |
+
from app.main import app
|
| 7 |
+
|
| 8 |
+
if __name__ == "__main__":
|
| 9 |
+
# Get port from environment (HuggingFace uses 7860)
|
| 10 |
+
port = int(os.environ.get("PORT", 7860))
|
| 11 |
+
|
| 12 |
+
# Run server
|
| 13 |
+
uvicorn.run(
|
| 14 |
+
app,
|
| 15 |
+
host="0.0.0.0",
|
| 16 |
+
port=port,
|
| 17 |
+
log_level="info"
|
| 18 |
+
)
|
app/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
SWARA - AI-Powered Public Speaking Evaluation System
|
| 3 |
+
API Application Package
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
__version__ = "1.0.0"
|
| 7 |
+
__author__ = "SWARA Team"
|
app/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (301 Bytes). View file
|
|
|
app/__pycache__/config.cpython-312.pyc
ADDED
|
Binary file (3.08 kB). View file
|
|
|
app/api/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API module initialization
|
| 3 |
+
"""
|
app/api/routes.py
ADDED
|
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
API Route Handlers
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
import uuid
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from typing import Optional
|
| 8 |
+
from fastapi import APIRouter, UploadFile, File, Form, HTTPException, status
|
| 9 |
+
from loguru import logger
|
| 10 |
+
from rq import Queue
|
| 11 |
+
from rq.job import Job
|
| 12 |
+
|
| 13 |
+
from app.models import (
|
| 14 |
+
TaskCreateResponse,
|
| 15 |
+
TaskStatusResponse,
|
| 16 |
+
TaskStatus,
|
| 17 |
+
HealthResponse,
|
| 18 |
+
ErrorResponse,
|
| 19 |
+
Level
|
| 20 |
+
)
|
| 21 |
+
from app.core.redis_client import get_redis_client
|
| 22 |
+
from app.core.storage import get_storage_manager
|
| 23 |
+
from app.config import settings
|
| 24 |
+
|
| 25 |
+
# Create router
|
| 26 |
+
router = APIRouter()
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@router.get("/", tags=["Root"])
|
| 30 |
+
async def root():
|
| 31 |
+
"""Root endpoint"""
|
| 32 |
+
return {
|
| 33 |
+
"message": "SWARA API - AI-Powered Public Speaking Evaluation",
|
| 34 |
+
"version": "1.0.0",
|
| 35 |
+
"docs": "/docs"
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@router.get("/health", response_model=HealthResponse, tags=["Health"])
|
| 40 |
+
async def health_check():
|
| 41 |
+
"""
|
| 42 |
+
Health check endpoint
|
| 43 |
+
|
| 44 |
+
Checks:
|
| 45 |
+
- API status
|
| 46 |
+
- Redis connection
|
| 47 |
+
"""
|
| 48 |
+
redis_client = get_redis_client()
|
| 49 |
+
redis_connected = redis_client.is_connected()
|
| 50 |
+
|
| 51 |
+
return HealthResponse(
|
| 52 |
+
status="healthy" if redis_connected else "degraded",
|
| 53 |
+
version=settings.API_VERSION,
|
| 54 |
+
redis_connected=redis_connected
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@router.post(
|
| 59 |
+
"/api/v1/analyze",
|
| 60 |
+
response_model=TaskCreateResponse,
|
| 61 |
+
status_code=status.HTTP_202_ACCEPTED,
|
| 62 |
+
tags=["Analysis"]
|
| 63 |
+
)
|
| 64 |
+
async def analyze_video(
|
| 65 |
+
video: UploadFile = File(..., description="Video file to analyze"),
|
| 66 |
+
level: int = Form(..., ge=1, le=5, description="Public speaking level (1-5)"),
|
| 67 |
+
user_id: Optional[str] = Form(None, description="Optional user ID")
|
| 68 |
+
):
|
| 69 |
+
"""
|
| 70 |
+
Upload video for analysis
|
| 71 |
+
|
| 72 |
+
This endpoint accepts a video file and queues it for processing.
|
| 73 |
+
Returns a task_id that can be used to check the analysis status.
|
| 74 |
+
|
| 75 |
+
**Parameters:**
|
| 76 |
+
- **video**: Video file (MP4 format recommended, max 50MB, max 1 minute)
|
| 77 |
+
- **level**: Public speaking level (1-5)
|
| 78 |
+
- **user_id**: Optional user identifier for tracking
|
| 79 |
+
|
| 80 |
+
**Returns:**
|
| 81 |
+
- task_id: Unique identifier to check task status
|
| 82 |
+
- status: Task status (pending)
|
| 83 |
+
- created_at: Task creation timestamp
|
| 84 |
+
"""
|
| 85 |
+
try:
|
| 86 |
+
# Validate file type
|
| 87 |
+
if not video.content_type or not video.content_type.startswith("video/"):
|
| 88 |
+
raise HTTPException(
|
| 89 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 90 |
+
detail="Invalid file type. Please upload a video file."
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
# Read video content
|
| 94 |
+
video_content = await video.read()
|
| 95 |
+
video_size = len(video_content)
|
| 96 |
+
|
| 97 |
+
# Validate file size
|
| 98 |
+
if video_size > settings.max_video_size_bytes:
|
| 99 |
+
raise HTTPException(
|
| 100 |
+
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
| 101 |
+
detail=f"Video size exceeds maximum allowed size of {settings.MAX_VIDEO_SIZE_MB}MB"
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# Save video to temporary storage
|
| 105 |
+
storage = get_storage_manager()
|
| 106 |
+
video_path = await storage.save_video(video_content)
|
| 107 |
+
|
| 108 |
+
# Generate task ID
|
| 109 |
+
task_id = uuid.uuid4().hex
|
| 110 |
+
|
| 111 |
+
# Create task metadata
|
| 112 |
+
task_data = {
|
| 113 |
+
"task_id": task_id,
|
| 114 |
+
"status": TaskStatus.PENDING.value,
|
| 115 |
+
"video_path": video_path,
|
| 116 |
+
"level": level,
|
| 117 |
+
"user_id": user_id,
|
| 118 |
+
"video_size": video_size,
|
| 119 |
+
"original_filename": video.filename,
|
| 120 |
+
"created_at": datetime.utcnow().isoformat()
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
# Store task in Redis
|
| 124 |
+
redis_client = get_redis_client().get_client()
|
| 125 |
+
task_key = f"task:{task_id}"
|
| 126 |
+
redis_client.setex(
|
| 127 |
+
task_key,
|
| 128 |
+
settings.TASK_RESULT_TTL_SECONDS,
|
| 129 |
+
json.dumps(task_data)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# Queue the task for processing
|
| 133 |
+
queue = Queue(settings.TASK_QUEUE_NAME, connection=redis_client)
|
| 134 |
+
job = queue.enqueue(
|
| 135 |
+
'app.tasks.process_video_task',
|
| 136 |
+
task_id,
|
| 137 |
+
video_path,
|
| 138 |
+
level,
|
| 139 |
+
job_timeout=settings.TASK_TIMEOUT_SECONDS,
|
| 140 |
+
result_ttl=settings.TASK_RESULT_TTL_SECONDS,
|
| 141 |
+
job_id=task_id
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
logger.info(f"β Task created: {task_id} (Level {level}, Size: {video_size} bytes)")
|
| 145 |
+
|
| 146 |
+
return TaskCreateResponse(
|
| 147 |
+
task_id=task_id,
|
| 148 |
+
status=TaskStatus.PENDING,
|
| 149 |
+
message="Video uploaded successfully. Processing has been queued.",
|
| 150 |
+
created_at=datetime.utcnow()
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
except HTTPException:
|
| 154 |
+
raise
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.error(f"β Error creating task: {e}")
|
| 157 |
+
raise HTTPException(
|
| 158 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 159 |
+
detail=f"Failed to create analysis task: {str(e)}"
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
@router.get(
|
| 164 |
+
"/api/v1/task/{task_id}",
|
| 165 |
+
response_model=TaskStatusResponse,
|
| 166 |
+
tags=["Analysis"]
|
| 167 |
+
)
|
| 168 |
+
async def get_task_status(task_id: str):
|
| 169 |
+
"""
|
| 170 |
+
Get task status and results
|
| 171 |
+
|
| 172 |
+
Check the status of a video analysis task. If the task is completed,
|
| 173 |
+
this endpoint returns the full analysis results.
|
| 174 |
+
|
| 175 |
+
**Parameters:**
|
| 176 |
+
- **task_id**: Task identifier returned from the analyze endpoint
|
| 177 |
+
|
| 178 |
+
**Returns:**
|
| 179 |
+
- Task status (pending, processing, completed, failed)
|
| 180 |
+
- Progress information (if processing)
|
| 181 |
+
- Analysis results (if completed)
|
| 182 |
+
"""
|
| 183 |
+
try:
|
| 184 |
+
redis_client = get_redis_client().get_client()
|
| 185 |
+
|
| 186 |
+
# Get task data from Redis
|
| 187 |
+
task_key = f"task:{task_id}"
|
| 188 |
+
task_data_str = redis_client.get(task_key)
|
| 189 |
+
|
| 190 |
+
if not task_data_str:
|
| 191 |
+
raise HTTPException(
|
| 192 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 193 |
+
detail=f"Task {task_id} not found or has expired"
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
task_data = json.loads(task_data_str)
|
| 197 |
+
|
| 198 |
+
# Get RQ job status
|
| 199 |
+
try:
|
| 200 |
+
job = Job.fetch(task_id, connection=redis_client)
|
| 201 |
+
|
| 202 |
+
# Update status based on job state
|
| 203 |
+
if job.is_finished:
|
| 204 |
+
task_data["status"] = TaskStatus.COMPLETED.value
|
| 205 |
+
task_data["completed_at"] = datetime.utcnow().isoformat()
|
| 206 |
+
if job.result:
|
| 207 |
+
task_data["result"] = job.result
|
| 208 |
+
elif job.is_failed:
|
| 209 |
+
task_data["status"] = TaskStatus.FAILED.value
|
| 210 |
+
task_data["error"] = str(job.exc_info) if job.exc_info else "Unknown error"
|
| 211 |
+
task_data["completed_at"] = datetime.utcnow().isoformat()
|
| 212 |
+
elif job.is_started:
|
| 213 |
+
task_data["status"] = TaskStatus.PROCESSING.value
|
| 214 |
+
except:
|
| 215 |
+
# Job not found in queue, use stored status
|
| 216 |
+
pass
|
| 217 |
+
|
| 218 |
+
# Parse response
|
| 219 |
+
response = TaskStatusResponse(
|
| 220 |
+
task_id=task_data["task_id"],
|
| 221 |
+
status=TaskStatus(task_data["status"]),
|
| 222 |
+
progress=task_data.get("progress"),
|
| 223 |
+
result=task_data.get("result"),
|
| 224 |
+
error=task_data.get("error"),
|
| 225 |
+
created_at=datetime.fromisoformat(task_data["created_at"]),
|
| 226 |
+
completed_at=datetime.fromisoformat(task_data["completed_at"]) if task_data.get("completed_at") else None
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
return response
|
| 230 |
+
|
| 231 |
+
except HTTPException:
|
| 232 |
+
raise
|
| 233 |
+
except Exception as e:
|
| 234 |
+
logger.error(f"β Error getting task status: {e}")
|
| 235 |
+
raise HTTPException(
|
| 236 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 237 |
+
detail=f"Failed to get task status: {str(e)}"
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
@router.delete(
|
| 242 |
+
"/api/v1/task/{task_id}",
|
| 243 |
+
tags=["Analysis"]
|
| 244 |
+
)
|
| 245 |
+
async def delete_task(task_id: str):
|
| 246 |
+
"""
|
| 247 |
+
Delete task and cleanup associated files
|
| 248 |
+
|
| 249 |
+
**Parameters:**
|
| 250 |
+
- **task_id**: Task identifier to delete
|
| 251 |
+
|
| 252 |
+
**Returns:**
|
| 253 |
+
- Success message
|
| 254 |
+
"""
|
| 255 |
+
try:
|
| 256 |
+
redis_client = get_redis_client().get_client()
|
| 257 |
+
storage = get_storage_manager()
|
| 258 |
+
|
| 259 |
+
# Get task data
|
| 260 |
+
task_key = f"task:{task_id}"
|
| 261 |
+
task_data_str = redis_client.get(task_key)
|
| 262 |
+
|
| 263 |
+
if task_data_str:
|
| 264 |
+
task_data = json.loads(task_data_str)
|
| 265 |
+
|
| 266 |
+
# Delete video file
|
| 267 |
+
if "video_path" in task_data:
|
| 268 |
+
storage.delete_video(task_data["video_path"])
|
| 269 |
+
|
| 270 |
+
# Delete task from Redis
|
| 271 |
+
redis_client.delete(task_key)
|
| 272 |
+
|
| 273 |
+
logger.info(f"β Task deleted: {task_id}")
|
| 274 |
+
return {"message": f"Task {task_id} deleted successfully"}
|
| 275 |
+
else:
|
| 276 |
+
raise HTTPException(
|
| 277 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 278 |
+
detail=f"Task {task_id} not found"
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
except HTTPException:
|
| 282 |
+
raise
|
| 283 |
+
except Exception as e:
|
| 284 |
+
logger.error(f"β Error deleting task: {e}")
|
| 285 |
+
raise HTTPException(
|
| 286 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 287 |
+
detail=f"Failed to delete task: {str(e)}"
|
| 288 |
+
)
|
app/config.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration Management
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from typing import Optional
|
| 7 |
+
from pydantic_settings import BaseSettings
|
| 8 |
+
from functools import lru_cache
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Settings(BaseSettings):
|
| 12 |
+
"""Application Settings"""
|
| 13 |
+
|
| 14 |
+
# Environment
|
| 15 |
+
ENV: str = "development"
|
| 16 |
+
|
| 17 |
+
# API Configuration
|
| 18 |
+
API_HOST: str = "0.0.0.0"
|
| 19 |
+
API_PORT: int = 7860
|
| 20 |
+
API_WORKERS: int = 1
|
| 21 |
+
API_TITLE: str = "SWARA API"
|
| 22 |
+
API_VERSION: str = "1.0.0"
|
| 23 |
+
API_DESCRIPTION: str = "AI-Powered Public Speaking Evaluation API"
|
| 24 |
+
|
| 25 |
+
# Redis Configuration
|
| 26 |
+
REDIS_URL: str = "redis://localhost:6379"
|
| 27 |
+
|
| 28 |
+
# Processing Configuration
|
| 29 |
+
MAX_VIDEO_SIZE_MB: int = 50
|
| 30 |
+
MAX_VIDEO_DURATION_SECONDS: int = 60
|
| 31 |
+
TEMP_DIR: str = "./temp"
|
| 32 |
+
MODELS_DIR: str = "./models"
|
| 33 |
+
|
| 34 |
+
# Task Configuration
|
| 35 |
+
TASK_TIMEOUT_SECONDS: int = 300
|
| 36 |
+
TASK_RESULT_TTL_SECONDS: int = 3600 # 1 hour
|
| 37 |
+
TASK_QUEUE_NAME: str = "swara:tasks"
|
| 38 |
+
|
| 39 |
+
# Rate Limiting
|
| 40 |
+
RATE_LIMIT_REQUESTS: int = 10
|
| 41 |
+
RATE_LIMIT_PERIOD_SECONDS: int = 3600
|
| 42 |
+
|
| 43 |
+
# Logging
|
| 44 |
+
LOG_LEVEL: str = "INFO"
|
| 45 |
+
|
| 46 |
+
# Model Paths
|
| 47 |
+
FACIAL_EXPRESSION_MODEL: str = "models/best.onnx"
|
| 48 |
+
|
| 49 |
+
class Config:
|
| 50 |
+
env_file = ".env"
|
| 51 |
+
case_sensitive = True
|
| 52 |
+
|
| 53 |
+
def get_temp_dir(self) -> Path:
|
| 54 |
+
"""Get temporary directory path"""
|
| 55 |
+
path = Path(self.TEMP_DIR)
|
| 56 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 57 |
+
return path
|
| 58 |
+
|
| 59 |
+
def get_models_dir(self) -> Path:
|
| 60 |
+
"""Get models directory path"""
|
| 61 |
+
path = Path(self.MODELS_DIR)
|
| 62 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 63 |
+
return path
|
| 64 |
+
|
| 65 |
+
@property
|
| 66 |
+
def max_video_size_bytes(self) -> int:
|
| 67 |
+
"""Get max video size in bytes"""
|
| 68 |
+
return self.MAX_VIDEO_SIZE_MB * 1024 * 1024
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@lru_cache()
|
| 72 |
+
def get_settings() -> Settings:
|
| 73 |
+
"""Get cached settings instance"""
|
| 74 |
+
return Settings()
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Global settings instance
|
| 78 |
+
settings = get_settings()
|
app/core/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Core module initialization
|
| 3 |
+
"""
|
app/core/redis_client.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Redis Client Manager
|
| 3 |
+
"""
|
| 4 |
+
import redis
|
| 5 |
+
from typing import Optional
|
| 6 |
+
from loguru import logger
|
| 7 |
+
from app.config import settings
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class RedisClient:
|
| 11 |
+
"""Redis client wrapper with connection pooling"""
|
| 12 |
+
|
| 13 |
+
def __init__(self):
|
| 14 |
+
self._client: Optional[redis.Redis] = None
|
| 15 |
+
self._connection_pool: Optional[redis.ConnectionPool] = None
|
| 16 |
+
|
| 17 |
+
def connect(self) -> redis.Redis:
|
| 18 |
+
"""Establish Redis connection"""
|
| 19 |
+
if self._client is None:
|
| 20 |
+
try:
|
| 21 |
+
logger.info(f"Connecting to Redis at {settings.REDIS_URL}")
|
| 22 |
+
|
| 23 |
+
self._connection_pool = redis.ConnectionPool.from_url(
|
| 24 |
+
settings.REDIS_URL,
|
| 25 |
+
decode_responses=True,
|
| 26 |
+
max_connections=10
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
self._client = redis.Redis(connection_pool=self._connection_pool)
|
| 30 |
+
|
| 31 |
+
# Test connection
|
| 32 |
+
self._client.ping()
|
| 33 |
+
logger.info("β Redis connected successfully")
|
| 34 |
+
|
| 35 |
+
except Exception as e:
|
| 36 |
+
logger.error(f"β Failed to connect to Redis: {e}")
|
| 37 |
+
raise
|
| 38 |
+
|
| 39 |
+
return self._client
|
| 40 |
+
|
| 41 |
+
def disconnect(self):
|
| 42 |
+
"""Close Redis connection"""
|
| 43 |
+
if self._client:
|
| 44 |
+
self._client.close()
|
| 45 |
+
self._client = None
|
| 46 |
+
logger.info("Redis connection closed")
|
| 47 |
+
|
| 48 |
+
def get_client(self) -> redis.Redis:
|
| 49 |
+
"""Get Redis client instance"""
|
| 50 |
+
if self._client is None:
|
| 51 |
+
self.connect()
|
| 52 |
+
return self._client
|
| 53 |
+
|
| 54 |
+
def is_connected(self) -> bool:
|
| 55 |
+
"""Check if Redis is connected"""
|
| 56 |
+
try:
|
| 57 |
+
if self._client:
|
| 58 |
+
self._client.ping()
|
| 59 |
+
return True
|
| 60 |
+
except:
|
| 61 |
+
pass
|
| 62 |
+
return False
|
| 63 |
+
|
| 64 |
+
def set_with_ttl(self, key: str, value: str, ttl: int) -> bool:
|
| 65 |
+
"""Set key with TTL (Time To Live)"""
|
| 66 |
+
try:
|
| 67 |
+
client = self.get_client()
|
| 68 |
+
return client.setex(key, ttl, value)
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.error(f"Error setting key {key}: {e}")
|
| 71 |
+
return False
|
| 72 |
+
|
| 73 |
+
def get(self, key: str) -> Optional[str]:
|
| 74 |
+
"""Get value by key"""
|
| 75 |
+
try:
|
| 76 |
+
client = self.get_client()
|
| 77 |
+
return client.get(key)
|
| 78 |
+
except Exception as e:
|
| 79 |
+
logger.error(f"Error getting key {key}: {e}")
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
def delete(self, key: str) -> bool:
|
| 83 |
+
"""Delete key"""
|
| 84 |
+
try:
|
| 85 |
+
client = self.get_client()
|
| 86 |
+
return bool(client.delete(key))
|
| 87 |
+
except Exception as e:
|
| 88 |
+
logger.error(f"Error deleting key {key}: {e}")
|
| 89 |
+
return False
|
| 90 |
+
|
| 91 |
+
def exists(self, key: str) -> bool:
|
| 92 |
+
"""Check if key exists"""
|
| 93 |
+
try:
|
| 94 |
+
client = self.get_client()
|
| 95 |
+
return bool(client.exists(key))
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.error(f"Error checking key {key}: {e}")
|
| 98 |
+
return False
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# Global Redis client instance
|
| 102 |
+
redis_client = RedisClient()
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def get_redis_client() -> RedisClient:
|
| 106 |
+
"""Get global Redis client instance"""
|
| 107 |
+
return redis_client
|
app/core/storage.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Storage Manager for Video Files
|
| 3 |
+
"""
|
| 4 |
+
import os
|
| 5 |
+
import uuid
|
| 6 |
+
import aiofiles
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Optional
|
| 9 |
+
from datetime import datetime, timedelta
|
| 10 |
+
from loguru import logger
|
| 11 |
+
from app.config import settings
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class StorageManager:
|
| 15 |
+
"""Manage video file storage and cleanup"""
|
| 16 |
+
|
| 17 |
+
def __init__(self):
|
| 18 |
+
self.temp_dir = settings.get_temp_dir()
|
| 19 |
+
|
| 20 |
+
async def save_video(self, file_content: bytes, extension: str = "mp4") -> str:
|
| 21 |
+
"""
|
| 22 |
+
Save uploaded video to temporary storage
|
| 23 |
+
|
| 24 |
+
Args:
|
| 25 |
+
file_content: Video file bytes
|
| 26 |
+
extension: File extension (default: mp4)
|
| 27 |
+
|
| 28 |
+
Returns:
|
| 29 |
+
str: Saved file path
|
| 30 |
+
"""
|
| 31 |
+
# Generate unique filename
|
| 32 |
+
file_id = uuid.uuid4().hex
|
| 33 |
+
filename = f"{file_id}.{extension}"
|
| 34 |
+
file_path = self.temp_dir / filename
|
| 35 |
+
|
| 36 |
+
try:
|
| 37 |
+
# Save file asynchronously
|
| 38 |
+
async with aiofiles.open(file_path, 'wb') as f:
|
| 39 |
+
await f.write(file_content)
|
| 40 |
+
|
| 41 |
+
logger.info(f"β Video saved: {file_path} ({len(file_content)} bytes)")
|
| 42 |
+
return str(file_path)
|
| 43 |
+
|
| 44 |
+
except Exception as e:
|
| 45 |
+
logger.error(f"β Failed to save video: {e}")
|
| 46 |
+
raise
|
| 47 |
+
|
| 48 |
+
def delete_video(self, file_path: str) -> bool:
|
| 49 |
+
"""
|
| 50 |
+
Delete video file
|
| 51 |
+
|
| 52 |
+
Args:
|
| 53 |
+
file_path: Path to video file
|
| 54 |
+
|
| 55 |
+
Returns:
|
| 56 |
+
bool: True if deleted successfully
|
| 57 |
+
"""
|
| 58 |
+
try:
|
| 59 |
+
path = Path(file_path)
|
| 60 |
+
if path.exists():
|
| 61 |
+
path.unlink()
|
| 62 |
+
logger.info(f"β Video deleted: {file_path}")
|
| 63 |
+
return True
|
| 64 |
+
else:
|
| 65 |
+
logger.warning(f"β Video not found: {file_path}")
|
| 66 |
+
return False
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logger.error(f"β Failed to delete video: {e}")
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
def cleanup_old_files(self, hours: int = 2):
|
| 72 |
+
"""
|
| 73 |
+
Delete files older than specified hours
|
| 74 |
+
|
| 75 |
+
Args:
|
| 76 |
+
hours: Age threshold in hours
|
| 77 |
+
"""
|
| 78 |
+
try:
|
| 79 |
+
threshold = datetime.now() - timedelta(hours=hours)
|
| 80 |
+
deleted_count = 0
|
| 81 |
+
|
| 82 |
+
for file_path in self.temp_dir.glob("*.*"):
|
| 83 |
+
if file_path.is_file():
|
| 84 |
+
file_time = datetime.fromtimestamp(file_path.stat().st_mtime)
|
| 85 |
+
if file_time < threshold:
|
| 86 |
+
file_path.unlink()
|
| 87 |
+
deleted_count += 1
|
| 88 |
+
|
| 89 |
+
if deleted_count > 0:
|
| 90 |
+
logger.info(f"β Cleaned up {deleted_count} old files")
|
| 91 |
+
|
| 92 |
+
except Exception as e:
|
| 93 |
+
logger.error(f"β Failed to cleanup old files: {e}")
|
| 94 |
+
|
| 95 |
+
def get_file_size(self, file_path: str) -> Optional[int]:
|
| 96 |
+
"""Get file size in bytes"""
|
| 97 |
+
try:
|
| 98 |
+
return Path(file_path).stat().st_size
|
| 99 |
+
except:
|
| 100 |
+
return None
|
| 101 |
+
|
| 102 |
+
def file_exists(self, file_path: str) -> bool:
|
| 103 |
+
"""Check if file exists"""
|
| 104 |
+
return Path(file_path).exists()
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# Global storage manager instance
|
| 108 |
+
storage_manager = StorageManager()
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def get_storage_manager() -> StorageManager:
|
| 112 |
+
"""Get global storage manager instance"""
|
| 113 |
+
return storage_manager
|
app/main.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI Main Application
|
| 3 |
+
"""
|
| 4 |
+
import sys
|
| 5 |
+
from contextlib import asynccontextmanager
|
| 6 |
+
from fastapi import FastAPI
|
| 7 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
+
from fastapi.responses import JSONResponse
|
| 9 |
+
from loguru import logger
|
| 10 |
+
|
| 11 |
+
from app.config import settings
|
| 12 |
+
from app.core.redis_client import get_redis_client
|
| 13 |
+
from app.api.routes import router
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Configure logging
|
| 17 |
+
logger.remove()
|
| 18 |
+
logger.add(
|
| 19 |
+
sys.stdout,
|
| 20 |
+
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan> - <level>{message}</level>",
|
| 21 |
+
level=settings.LOG_LEVEL
|
| 22 |
+
)
|
| 23 |
+
logger.add(
|
| 24 |
+
"logs/swara_api_{time:YYYY-MM-DD}.log",
|
| 25 |
+
rotation="1 day",
|
| 26 |
+
retention="7 days",
|
| 27 |
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function} - {message}",
|
| 28 |
+
level=settings.LOG_LEVEL
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@asynccontextmanager
|
| 33 |
+
async def lifespan(app: FastAPI):
|
| 34 |
+
"""
|
| 35 |
+
Application lifespan events
|
| 36 |
+
"""
|
| 37 |
+
# Startup
|
| 38 |
+
logger.info("=" * 70)
|
| 39 |
+
logger.info("π SWARA API Starting...")
|
| 40 |
+
logger.info("=" * 70)
|
| 41 |
+
logger.info(f"Environment: {settings.ENV}")
|
| 42 |
+
logger.info(f"API Version: {settings.API_VERSION}")
|
| 43 |
+
logger.info(f"Redis URL: {settings.REDIS_URL}")
|
| 44 |
+
|
| 45 |
+
# Connect to Redis
|
| 46 |
+
try:
|
| 47 |
+
redis_client = get_redis_client()
|
| 48 |
+
redis_client.connect()
|
| 49 |
+
logger.info("β Redis connection established")
|
| 50 |
+
except Exception as e:
|
| 51 |
+
logger.error(f"β Failed to connect to Redis: {e}")
|
| 52 |
+
logger.warning("β API will start but background tasks will not work")
|
| 53 |
+
|
| 54 |
+
# Create necessary directories
|
| 55 |
+
settings.get_temp_dir()
|
| 56 |
+
settings.get_models_dir()
|
| 57 |
+
logger.info("β Directories created")
|
| 58 |
+
|
| 59 |
+
logger.info("=" * 70)
|
| 60 |
+
logger.info(f"β SWARA API Ready at http://{settings.API_HOST}:{settings.API_PORT}")
|
| 61 |
+
logger.info("=" * 70)
|
| 62 |
+
|
| 63 |
+
yield
|
| 64 |
+
|
| 65 |
+
# Shutdown
|
| 66 |
+
logger.info("=" * 70)
|
| 67 |
+
logger.info("π SWARA API Shutting down...")
|
| 68 |
+
logger.info("=" * 70)
|
| 69 |
+
|
| 70 |
+
# Disconnect from Redis
|
| 71 |
+
try:
|
| 72 |
+
redis_client = get_redis_client()
|
| 73 |
+
redis_client.disconnect()
|
| 74 |
+
logger.info("β Redis disconnected")
|
| 75 |
+
except:
|
| 76 |
+
pass
|
| 77 |
+
|
| 78 |
+
logger.info("β Shutdown complete")
|
| 79 |
+
logger.info("=" * 70)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# Create FastAPI application
|
| 83 |
+
app = FastAPI(
|
| 84 |
+
title=settings.API_TITLE,
|
| 85 |
+
version=settings.API_VERSION,
|
| 86 |
+
description=settings.API_DESCRIPTION,
|
| 87 |
+
lifespan=lifespan,
|
| 88 |
+
docs_url="/docs",
|
| 89 |
+
redoc_url="/redoc",
|
| 90 |
+
openapi_url="/openapi.json"
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# CORS Middleware
|
| 95 |
+
app.add_middleware(
|
| 96 |
+
CORSMiddleware,
|
| 97 |
+
allow_origins=["*"], # In production, specify allowed origins
|
| 98 |
+
allow_credentials=True,
|
| 99 |
+
allow_methods=["*"],
|
| 100 |
+
allow_headers=["*"],
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
# Include routers
|
| 105 |
+
app.include_router(router)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# Global exception handler
|
| 109 |
+
@app.exception_handler(Exception)
|
| 110 |
+
async def global_exception_handler(request, exc):
|
| 111 |
+
"""Global exception handler"""
|
| 112 |
+
logger.error(f"Unhandled exception: {exc}")
|
| 113 |
+
return JSONResponse(
|
| 114 |
+
status_code=500,
|
| 115 |
+
content={
|
| 116 |
+
"error": "Internal server error",
|
| 117 |
+
"detail": str(exc) if settings.ENV == "development" else "An unexpected error occurred"
|
| 118 |
+
}
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
if __name__ == "__main__":
|
| 123 |
+
import uvicorn
|
| 124 |
+
|
| 125 |
+
uvicorn.run(
|
| 126 |
+
"app.main:app",
|
| 127 |
+
host=settings.API_HOST,
|
| 128 |
+
port=settings.API_PORT,
|
| 129 |
+
reload=settings.ENV == "development",
|
| 130 |
+
workers=settings.API_WORKERS
|
| 131 |
+
)
|
app/models.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic Models for Request/Response
|
| 3 |
+
"""
|
| 4 |
+
from typing import Optional, Dict, Any, List
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from enum import Enum
|
| 7 |
+
from pydantic import BaseModel, Field, validator
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TaskStatus(str, Enum):
|
| 11 |
+
"""Task status enum"""
|
| 12 |
+
PENDING = "pending"
|
| 13 |
+
PROCESSING = "processing"
|
| 14 |
+
COMPLETED = "completed"
|
| 15 |
+
FAILED = "failed"
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class Level(int, Enum):
|
| 19 |
+
"""Public speaking level enum"""
|
| 20 |
+
LEVEL_1 = 1
|
| 21 |
+
LEVEL_2 = 2
|
| 22 |
+
LEVEL_3 = 3
|
| 23 |
+
LEVEL_4 = 4
|
| 24 |
+
LEVEL_5 = 5
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class VideoUploadRequest(BaseModel):
|
| 28 |
+
"""Video upload request model"""
|
| 29 |
+
level: Level = Field(..., description="Public speaking level (1-5)")
|
| 30 |
+
user_id: Optional[str] = Field(None, description="Optional user ID for tracking")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class TaskCreateResponse(BaseModel):
|
| 34 |
+
"""Task creation response"""
|
| 35 |
+
task_id: str = Field(..., description="Unique task identifier")
|
| 36 |
+
status: TaskStatus = Field(default=TaskStatus.PENDING)
|
| 37 |
+
message: str = Field(default="Task created successfully")
|
| 38 |
+
created_at: datetime = Field(default_factory=datetime.utcnow)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class TaskProgress(BaseModel):
|
| 42 |
+
"""Task progress information"""
|
| 43 |
+
current_step: str = Field(..., description="Current processing step")
|
| 44 |
+
percentage: float = Field(..., ge=0, le=100, description="Progress percentage")
|
| 45 |
+
message: str = Field(..., description="Progress message")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class IndicatorResult(BaseModel):
|
| 49 |
+
"""Individual indicator result"""
|
| 50 |
+
score: Optional[float] = Field(None, description="Indicator score")
|
| 51 |
+
raw_data: Dict[str, Any] = Field(default_factory=dict, description="Raw analysis data")
|
| 52 |
+
detected: Optional[bool] = Field(None, description="Detection status (for boolean indicators)")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class MainIndicators(BaseModel):
|
| 56 |
+
"""Main indicators for scoring"""
|
| 57 |
+
tempo: Optional[IndicatorResult] = None
|
| 58 |
+
artikulasi: Optional[IndicatorResult] = None
|
| 59 |
+
kontak_mata: Optional[IndicatorResult] = None
|
| 60 |
+
kesesuaian_topik: Optional[IndicatorResult] = None
|
| 61 |
+
struktur_kalimat: Optional[IndicatorResult] = None
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class BonusIndicators(BaseModel):
|
| 65 |
+
"""Bonus indicators for additional points"""
|
| 66 |
+
jeda: Optional[IndicatorResult] = None
|
| 67 |
+
first_impression: Optional[IndicatorResult] = None
|
| 68 |
+
face_expression: Optional[IndicatorResult] = None
|
| 69 |
+
gesture: Optional[IndicatorResult] = None
|
| 70 |
+
kata_pengisi: Optional[IndicatorResult] = None
|
| 71 |
+
kata_tidak_senonoh: Optional[IndicatorResult] = None
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class VideoMetadata(BaseModel):
|
| 75 |
+
"""Video metadata information"""
|
| 76 |
+
duration: float = Field(..., description="Video duration in seconds")
|
| 77 |
+
fps: int = Field(..., description="Frames per second")
|
| 78 |
+
resolution: str = Field(..., description="Video resolution (e.g., '1920x1080')")
|
| 79 |
+
file_size: int = Field(..., description="File size in bytes")
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class AnalysisResult(BaseModel):
|
| 83 |
+
"""Complete analysis result"""
|
| 84 |
+
level: Level = Field(..., description="Evaluated level")
|
| 85 |
+
video_metadata: VideoMetadata
|
| 86 |
+
main_indicators: MainIndicators
|
| 87 |
+
bonus_indicators: BonusIndicators
|
| 88 |
+
processing_time: float = Field(..., description="Total processing time in seconds")
|
| 89 |
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
class TaskStatusResponse(BaseModel):
|
| 93 |
+
"""Task status response"""
|
| 94 |
+
task_id: str
|
| 95 |
+
status: TaskStatus
|
| 96 |
+
progress: Optional[TaskProgress] = None
|
| 97 |
+
result: Optional[AnalysisResult] = None
|
| 98 |
+
error: Optional[str] = None
|
| 99 |
+
created_at: datetime
|
| 100 |
+
completed_at: Optional[datetime] = None
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class HealthResponse(BaseModel):
|
| 104 |
+
"""Health check response"""
|
| 105 |
+
status: str = Field(default="healthy")
|
| 106 |
+
version: str = Field(default="1.0.0")
|
| 107 |
+
redis_connected: bool
|
| 108 |
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
class ErrorResponse(BaseModel):
|
| 112 |
+
"""Error response model"""
|
| 113 |
+
error: str = Field(..., description="Error message")
|
| 114 |
+
detail: Optional[str] = Field(None, description="Detailed error information")
|
| 115 |
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
app/services/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Services module initialization
|
| 3 |
+
"""
|
app/services/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (200 Bytes). View file
|
|
|
app/services/__pycache__/eye_tracking.cpython-312.pyc
ADDED
|
Binary file (35.7 kB). View file
|
|
|
app/services/__pycache__/facial_expression.cpython-312.pyc
ADDED
|
Binary file (8.03 kB). View file
|
|
|
app/services/__pycache__/gesture_detection.cpython-312.pyc
ADDED
|
Binary file (20.1 kB). View file
|
|
|
app/services/eye_tracking.py
ADDED
|
@@ -0,0 +1,894 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Eye Tracking Service
|
| 3 |
+
|
| 4 |
+
Refactored from eye_tracking_production.py for production use.
|
| 5 |
+
Production-ready eye tracking untuk website SWARA
|
| 6 |
+
"""
|
| 7 |
+
import cv2 as cv
|
| 8 |
+
import math
|
| 9 |
+
import numpy as np
|
| 10 |
+
import mediapipe as mp
|
| 11 |
+
from datetime import datetime
|
| 12 |
+
from typing import Dict, List, Tuple, Optional, Any
|
| 13 |
+
from loguru import logger
|
| 14 |
+
|
| 15 |
+
from app.config import settings
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class EyeTrackingConfig:
|
| 19 |
+
"""Configuration class untuk eye tracking parameters"""
|
| 20 |
+
|
| 21 |
+
# MediaPipe landmarks indices
|
| 22 |
+
LEFT_EYE = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
|
| 23 |
+
RIGHT_EYE = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
|
| 24 |
+
|
| 25 |
+
# Eye size classification thresholds
|
| 26 |
+
SMALL_EYE_THRESHOLD = 600
|
| 27 |
+
MEDIUM_EYE_THRESHOLD = 1500
|
| 28 |
+
|
| 29 |
+
# Position boundaries (optimized)
|
| 30 |
+
LEFT_BOUNDARY = 0.35
|
| 31 |
+
RIGHT_BOUNDARY = 0.65
|
| 32 |
+
|
| 33 |
+
# Temporal smoothing zone
|
| 34 |
+
SMOOTHING_LEFT_MIN = 0.33
|
| 35 |
+
SMOOTHING_LEFT_MAX = 0.37
|
| 36 |
+
SMOOTHING_RIGHT_MIN = 0.63
|
| 37 |
+
SMOOTHING_RIGHT_MAX = 0.67
|
| 38 |
+
|
| 39 |
+
# Blink ratio threshold
|
| 40 |
+
BLINK_THRESHOLD = 5.5
|
| 41 |
+
|
| 42 |
+
# Score thresholds (dalam detik)
|
| 43 |
+
SCORE_THRESHOLDS = {
|
| 44 |
+
5: (5, "Sangat Baik"),
|
| 45 |
+
4: (8, "Baik"),
|
| 46 |
+
3: (10, "Cukup Baik"),
|
| 47 |
+
2: (12, "Buruk"),
|
| 48 |
+
1: (float('inf'), "Perlu Ditingkatkan")
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
# Adaptive parameters by eye size
|
| 52 |
+
ADAPTIVE_PARAMS = {
|
| 53 |
+
'SMALL': {
|
| 54 |
+
'scale_factor': 3.0,
|
| 55 |
+
'interpolation': cv.INTER_LANCZOS4,
|
| 56 |
+
'clahe_clip': 4.0,
|
| 57 |
+
'clahe_grid': (4, 4),
|
| 58 |
+
'bilateral_d': 7,
|
| 59 |
+
'bilateral_sigma': 75,
|
| 60 |
+
'thresholds': [20, 25, 30, 35, 40, 45, 50, 55],
|
| 61 |
+
'min_area_ratio': 0.001,
|
| 62 |
+
'max_area_ratio': 0.50,
|
| 63 |
+
'min_circularity': 0.3,
|
| 64 |
+
'min_solidity': 0.5,
|
| 65 |
+
'morph_kernel': 5,
|
| 66 |
+
'morph_close_iter': 3,
|
| 67 |
+
'morph_open_iter': 2
|
| 68 |
+
},
|
| 69 |
+
'MEDIUM': {
|
| 70 |
+
'scale_factor': 2.0,
|
| 71 |
+
'interpolation': cv.INTER_CUBIC,
|
| 72 |
+
'clahe_clip': 3.0,
|
| 73 |
+
'clahe_grid': (8, 8),
|
| 74 |
+
'bilateral_d': 5,
|
| 75 |
+
'bilateral_sigma': 50,
|
| 76 |
+
'thresholds': [30, 35, 40, 45, 50, 55, 60],
|
| 77 |
+
'min_area_ratio': 0.005,
|
| 78 |
+
'max_area_ratio': 0.45,
|
| 79 |
+
'min_circularity': 0.4,
|
| 80 |
+
'min_solidity': 0.6,
|
| 81 |
+
'morph_kernel': 3,
|
| 82 |
+
'morph_close_iter': 2,
|
| 83 |
+
'morph_open_iter': 1
|
| 84 |
+
},
|
| 85 |
+
'LARGE': {
|
| 86 |
+
'scale_factor': 1.5,
|
| 87 |
+
'interpolation': cv.INTER_CUBIC,
|
| 88 |
+
'clahe_clip': 2.0,
|
| 89 |
+
'clahe_grid': (8, 8),
|
| 90 |
+
'bilateral_d': 3,
|
| 91 |
+
'bilateral_sigma': 30,
|
| 92 |
+
'thresholds': [35, 40, 45, 50, 55, 60, 65],
|
| 93 |
+
'min_area_ratio': 0.01,
|
| 94 |
+
'max_area_ratio': 0.40,
|
| 95 |
+
'min_circularity': 0.5,
|
| 96 |
+
'min_solidity': 0.7,
|
| 97 |
+
'morph_kernel': 3,
|
| 98 |
+
'morph_close_iter': 2,
|
| 99 |
+
'morph_open_iter': 1
|
| 100 |
+
}
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class EyeTracker:
|
| 105 |
+
"""Main class untuk eye tracking"""
|
| 106 |
+
|
| 107 |
+
def __init__(self, config: EyeTrackingConfig = None):
|
| 108 |
+
self.config = config or EyeTrackingConfig()
|
| 109 |
+
self.face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 110 |
+
min_detection_confidence=0.5,
|
| 111 |
+
min_tracking_confidence=0.5
|
| 112 |
+
)
|
| 113 |
+
self.prev_position_right = None
|
| 114 |
+
self.prev_position_left = None
|
| 115 |
+
|
| 116 |
+
def __del__(self):
|
| 117 |
+
"""Cleanup resources"""
|
| 118 |
+
if hasattr(self, 'face_mesh') and self.face_mesh:
|
| 119 |
+
self.face_mesh.close()
|
| 120 |
+
|
| 121 |
+
@staticmethod
|
| 122 |
+
def euclidean_distance(point1: Tuple[int, int], point2: Tuple[int, int]) -> float:
|
| 123 |
+
"""Calculate Euclidean distance between two points"""
|
| 124 |
+
return math.sqrt((point2[0] - point1[0])**2 + (point2[1] - point1[1])**2)
|
| 125 |
+
|
| 126 |
+
def detect_landmarks(self, frame: np.ndarray) -> Optional[List[Tuple[int, int]]]:
|
| 127 |
+
"""Detect facial landmarks"""
|
| 128 |
+
try:
|
| 129 |
+
rgb_frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
| 130 |
+
results = self.face_mesh.process(rgb_frame)
|
| 131 |
+
|
| 132 |
+
if not results.multi_face_landmarks:
|
| 133 |
+
return None
|
| 134 |
+
|
| 135 |
+
img_height, img_width = frame.shape[:2]
|
| 136 |
+
mesh_coords = [
|
| 137 |
+
(int(point.x * img_width), int(point.y * img_height))
|
| 138 |
+
for point in results.multi_face_landmarks[0].landmark
|
| 139 |
+
]
|
| 140 |
+
return mesh_coords
|
| 141 |
+
except Exception as e:
|
| 142 |
+
logger.error(f"Error detecting landmarks: {e}")
|
| 143 |
+
return None
|
| 144 |
+
|
| 145 |
+
def calculate_blink_ratio(self, landmarks: List[Tuple[int, int]]) -> float:
|
| 146 |
+
"""Calculate blink ratio from eye landmarks"""
|
| 147 |
+
try:
|
| 148 |
+
# Right eye
|
| 149 |
+
rh_distance = self.euclidean_distance(
|
| 150 |
+
landmarks[self.config.RIGHT_EYE[0]],
|
| 151 |
+
landmarks[self.config.RIGHT_EYE[8]]
|
| 152 |
+
)
|
| 153 |
+
rv_distance = self.euclidean_distance(
|
| 154 |
+
landmarks[self.config.RIGHT_EYE[12]],
|
| 155 |
+
landmarks[self.config.RIGHT_EYE[4]]
|
| 156 |
+
)
|
| 157 |
+
|
| 158 |
+
# Left eye
|
| 159 |
+
lh_distance = self.euclidean_distance(
|
| 160 |
+
landmarks[self.config.LEFT_EYE[0]],
|
| 161 |
+
landmarks[self.config.LEFT_EYE[8]]
|
| 162 |
+
)
|
| 163 |
+
lv_distance = self.euclidean_distance(
|
| 164 |
+
landmarks[self.config.LEFT_EYE[12]],
|
| 165 |
+
landmarks[self.config.LEFT_EYE[4]]
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
if rv_distance == 0 or lv_distance == 0:
|
| 169 |
+
return 0
|
| 170 |
+
|
| 171 |
+
re_ratio = rh_distance / rv_distance
|
| 172 |
+
le_ratio = lh_distance / lv_distance
|
| 173 |
+
ratio = (re_ratio + le_ratio) / 2
|
| 174 |
+
|
| 175 |
+
return ratio
|
| 176 |
+
except Exception as e:
|
| 177 |
+
logger.error(f"Error calculating blink ratio: {e}")
|
| 178 |
+
return 0
|
| 179 |
+
|
| 180 |
+
def extract_eye_region(self, frame: np.ndarray, eye_coords: List[Tuple[int, int]]) -> Optional[np.ndarray]:
|
| 181 |
+
"""Extract and crop eye region from frame"""
|
| 182 |
+
try:
|
| 183 |
+
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
|
| 184 |
+
mask = np.zeros(gray.shape, dtype=np.uint8)
|
| 185 |
+
|
| 186 |
+
cv.fillPoly(mask, [np.array(eye_coords, dtype=np.int32)], 255)
|
| 187 |
+
eye = cv.bitwise_and(gray, gray, mask=mask)
|
| 188 |
+
eye[mask == 0] = 155
|
| 189 |
+
|
| 190 |
+
# Get bounding box
|
| 191 |
+
x_coords = [coord[0] for coord in eye_coords]
|
| 192 |
+
y_coords = [coord[1] for coord in eye_coords]
|
| 193 |
+
|
| 194 |
+
min_x, max_x = min(x_coords), max(x_coords)
|
| 195 |
+
min_y, max_y = min(y_coords), max(y_coords)
|
| 196 |
+
|
| 197 |
+
cropped = eye[min_y:max_y, min_x:max_x]
|
| 198 |
+
return cropped if cropped.size > 0 else None
|
| 199 |
+
except Exception as e:
|
| 200 |
+
logger.error(f"Error extracting eye region: {e}")
|
| 201 |
+
return None
|
| 202 |
+
|
| 203 |
+
def classify_eye_size(self, eye_region: np.ndarray) -> str:
|
| 204 |
+
"""Classify eye size (SMALL/MEDIUM/LARGE)"""
|
| 205 |
+
if eye_region is None or eye_region.size == 0:
|
| 206 |
+
return 'UNKNOWN'
|
| 207 |
+
|
| 208 |
+
h, w = eye_region.shape
|
| 209 |
+
area = h * w
|
| 210 |
+
|
| 211 |
+
if area < self.config.SMALL_EYE_THRESHOLD:
|
| 212 |
+
return 'SMALL'
|
| 213 |
+
elif area < self.config.MEDIUM_EYE_THRESHOLD:
|
| 214 |
+
return 'MEDIUM'
|
| 215 |
+
else:
|
| 216 |
+
return 'LARGE'
|
| 217 |
+
|
| 218 |
+
def adaptive_preprocessing(self, eye_region: np.ndarray, eye_size: str) -> Optional[np.ndarray]:
|
| 219 |
+
"""
|
| 220 |
+
Adaptive preprocessing: upscale + enhancement berdasarkan ukuran mata
|
| 221 |
+
"""
|
| 222 |
+
if eye_region is None or eye_region.size == 0:
|
| 223 |
+
return None
|
| 224 |
+
|
| 225 |
+
try:
|
| 226 |
+
params = self.config.ADAPTIVE_PARAMS[eye_size]
|
| 227 |
+
scale_factor = params['scale_factor']
|
| 228 |
+
|
| 229 |
+
# Adaptive upscaling based on eye size
|
| 230 |
+
if eye_size == 'SMALL':
|
| 231 |
+
interpolation = cv.INTER_LANCZOS4
|
| 232 |
+
else:
|
| 233 |
+
interpolation = cv.INTER_CUBIC
|
| 234 |
+
|
| 235 |
+
upscaled = cv.resize(
|
| 236 |
+
eye_region, None,
|
| 237 |
+
fx=scale_factor, fy=scale_factor,
|
| 238 |
+
interpolation=interpolation
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
# Adaptive enhancement based on eye size
|
| 242 |
+
if eye_size == 'SMALL':
|
| 243 |
+
# Aggressive enhancement for small eyes
|
| 244 |
+
clahe = cv.createCLAHE(clipLimit=4.0, tileGridSize=(4,4))
|
| 245 |
+
enhanced = clahe.apply(upscaled)
|
| 246 |
+
enhanced = cv.bilateralFilter(enhanced, 7, 75, 75)
|
| 247 |
+
|
| 248 |
+
# Unsharp mask untuk detail
|
| 249 |
+
gaussian = cv.GaussianBlur(enhanced, (3, 3), 2.0)
|
| 250 |
+
enhanced = cv.addWeighted(enhanced, 1.5, gaussian, -0.5, 0)
|
| 251 |
+
enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
|
| 252 |
+
|
| 253 |
+
elif eye_size == 'MEDIUM':
|
| 254 |
+
clahe = cv.createCLAHE(clipLimit=3.0, tileGridSize=(8,8))
|
| 255 |
+
enhanced = clahe.apply(upscaled)
|
| 256 |
+
enhanced = cv.bilateralFilter(enhanced, 5, 50, 50)
|
| 257 |
+
|
| 258 |
+
else: # LARGE
|
| 259 |
+
clahe = cv.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
| 260 |
+
enhanced = clahe.apply(upscaled)
|
| 261 |
+
enhanced = cv.bilateralFilter(enhanced, 3, 30, 30)
|
| 262 |
+
|
| 263 |
+
return enhanced
|
| 264 |
+
|
| 265 |
+
except Exception as e:
|
| 266 |
+
logger.error(f"Error in adaptive preprocessing: {e}")
|
| 267 |
+
return None
|
| 268 |
+
|
| 269 |
+
def aggressive_morphology(self, mask: np.ndarray, eye_size: str) -> np.ndarray:
|
| 270 |
+
"""
|
| 271 |
+
STAGE 1: Aggressive morphology untuk solid contour
|
| 272 |
+
Mengatasi masalah kontour terpecah-pecah
|
| 273 |
+
"""
|
| 274 |
+
params = self.config.ADAPTIVE_PARAMS[eye_size]
|
| 275 |
+
kernel = cv.getStructuringElement(
|
| 276 |
+
cv.MORPH_ELLIPSE,
|
| 277 |
+
(params['morph_kernel'], params['morph_kernel'])
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
# Close gaps - menggabungkan fragmen yang terpisah
|
| 281 |
+
mask = cv.morphologyEx(
|
| 282 |
+
mask, cv.MORPH_CLOSE, kernel,
|
| 283 |
+
iterations=params['morph_close_iter']
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
# Remove noise
|
| 287 |
+
mask = cv.morphologyEx(
|
| 288 |
+
mask, cv.MORPH_OPEN, kernel,
|
| 289 |
+
iterations=params['morph_open_iter']
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
# Fill holes untuk SMALL eyes
|
| 293 |
+
if eye_size == 'SMALL':
|
| 294 |
+
kernel_dilate = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))
|
| 295 |
+
mask = cv.dilate(mask, kernel_dilate, iterations=1)
|
| 296 |
+
|
| 297 |
+
return mask
|
| 298 |
+
|
| 299 |
+
def connected_components_analysis(self, mask: np.ndarray, params: Dict) -> Optional[Dict]:
|
| 300 |
+
"""
|
| 301 |
+
STAGE 2: Connected Components Analysis untuk filtering blob yang lebih akurat
|
| 302 |
+
Mengatasi false positives dari noise
|
| 303 |
+
"""
|
| 304 |
+
h, w = mask.shape
|
| 305 |
+
min_area = (h * w) * params['min_area_ratio']
|
| 306 |
+
max_area = (h * w) * params['max_area_ratio']
|
| 307 |
+
|
| 308 |
+
# Connected components with stats
|
| 309 |
+
num_labels, labels, stats, centroids = cv.connectedComponentsWithStats(
|
| 310 |
+
mask, connectivity=8
|
| 311 |
+
)
|
| 312 |
+
|
| 313 |
+
candidates = []
|
| 314 |
+
|
| 315 |
+
for i in range(1, num_labels): # Skip background (label 0)
|
| 316 |
+
area = stats[i, cv.CC_STAT_AREA]
|
| 317 |
+
|
| 318 |
+
# Filter by area
|
| 319 |
+
if area < min_area or area > max_area:
|
| 320 |
+
continue
|
| 321 |
+
|
| 322 |
+
# Create component mask
|
| 323 |
+
component_mask = np.zeros_like(mask)
|
| 324 |
+
component_mask[labels == i] = 255
|
| 325 |
+
|
| 326 |
+
# Calculate properties
|
| 327 |
+
contours, _ = cv.findContours(
|
| 328 |
+
component_mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
if not contours:
|
| 332 |
+
continue
|
| 333 |
+
|
| 334 |
+
contour = contours[0]
|
| 335 |
+
|
| 336 |
+
# Circularity
|
| 337 |
+
perimeter = cv.arcLength(contour, True)
|
| 338 |
+
if perimeter == 0:
|
| 339 |
+
continue
|
| 340 |
+
circularity = 4 * np.pi * area / (perimeter ** 2)
|
| 341 |
+
|
| 342 |
+
if circularity < params['min_circularity']:
|
| 343 |
+
continue
|
| 344 |
+
|
| 345 |
+
# Solidity (area / convex hull area) - filter irregular shapes
|
| 346 |
+
hull = cv.convexHull(contour)
|
| 347 |
+
hull_area = cv.contourArea(hull)
|
| 348 |
+
if hull_area == 0:
|
| 349 |
+
continue
|
| 350 |
+
solidity = area / hull_area
|
| 351 |
+
|
| 352 |
+
if solidity < params['min_solidity']:
|
| 353 |
+
continue
|
| 354 |
+
|
| 355 |
+
# Distance from center (prefer pupil near center)
|
| 356 |
+
center_x = w / 2
|
| 357 |
+
cx = centroids[i][0]
|
| 358 |
+
distance_from_center = abs(cx - center_x) / w
|
| 359 |
+
center_score = 1.0 - distance_from_center
|
| 360 |
+
|
| 361 |
+
# Aspect ratio (prefer circular)
|
| 362 |
+
x, y, w_bbox, h_bbox = (stats[i, cv.CC_STAT_LEFT],
|
| 363 |
+
stats[i, cv.CC_STAT_TOP],
|
| 364 |
+
stats[i, cv.CC_STAT_WIDTH],
|
| 365 |
+
stats[i, cv.CC_STAT_HEIGHT])
|
| 366 |
+
if h_bbox == 0:
|
| 367 |
+
continue
|
| 368 |
+
aspect_ratio = w_bbox / h_bbox
|
| 369 |
+
aspect_score = 1.0 - abs(aspect_ratio - 1.0)
|
| 370 |
+
|
| 371 |
+
# Combined score (COLAB METHOD: multiplicative)
|
| 372 |
+
# Mengutamakan kandidat dengan SEMUA metrik bagus
|
| 373 |
+
score = area * circularity * solidity * center_score * aspect_score
|
| 374 |
+
|
| 375 |
+
candidates.append({
|
| 376 |
+
'mask': component_mask,
|
| 377 |
+
'contour': contour,
|
| 378 |
+
'centroid': centroids[i],
|
| 379 |
+
'area': area,
|
| 380 |
+
'circularity': circularity,
|
| 381 |
+
'solidity': solidity,
|
| 382 |
+
'center_score': center_score,
|
| 383 |
+
'aspect_ratio': aspect_ratio,
|
| 384 |
+
'score': score
|
| 385 |
+
})
|
| 386 |
+
|
| 387 |
+
if not candidates:
|
| 388 |
+
return None
|
| 389 |
+
|
| 390 |
+
return max(candidates, key=lambda x: x['score'])
|
| 391 |
+
|
| 392 |
+
def distance_transform_refinement(self, mask: np.ndarray) -> Tuple[int, int]:
|
| 393 |
+
"""
|
| 394 |
+
STAGE 3: Distance Transform untuk memperbaiki centroid
|
| 395 |
+
Memberikan posisi yang lebih akurat dibanding moment
|
| 396 |
+
"""
|
| 397 |
+
dist_transform = cv.distanceTransform(mask, cv.DIST_L2, 5)
|
| 398 |
+
_, _, _, max_loc = cv.minMaxLoc(dist_transform)
|
| 399 |
+
return max_loc
|
| 400 |
+
|
| 401 |
+
def detect_pupil(self, enhanced: np.ndarray, eye_size: str) -> Optional[Dict]:
|
| 402 |
+
"""
|
| 403 |
+
Detect pupil using multi-stage OPTIMIZED pipeline
|
| 404 |
+
|
| 405 |
+
OPTIMIZATIONS dari Colab:
|
| 406 |
+
1. Aggressive Morphology - solid contour, no fragments
|
| 407 |
+
2. Connected Components Analysis - better blob detection
|
| 408 |
+
3. Distance Transform - accurate centroid
|
| 409 |
+
4. Solidity Filter - reject irregular shapes
|
| 410 |
+
"""
|
| 411 |
+
params = self.config.ADAPTIVE_PARAMS[eye_size]
|
| 412 |
+
h, w = enhanced.shape
|
| 413 |
+
|
| 414 |
+
best_candidate = None
|
| 415 |
+
best_score = 0
|
| 416 |
+
best_threshold = 0
|
| 417 |
+
|
| 418 |
+
for thresh_val in params['thresholds']:
|
| 419 |
+
_, binary = cv.threshold(enhanced, thresh_val, 255, cv.THRESH_BINARY_INV)
|
| 420 |
+
|
| 421 |
+
# STAGE 1: Aggressive Morphology
|
| 422 |
+
binary = self.aggressive_morphology(binary, eye_size)
|
| 423 |
+
|
| 424 |
+
# STAGE 2: Connected Components Analysis
|
| 425 |
+
candidate = self.connected_components_analysis(binary, params)
|
| 426 |
+
|
| 427 |
+
if candidate and candidate['score'] > best_score:
|
| 428 |
+
best_candidate = candidate
|
| 429 |
+
best_score = candidate['score']
|
| 430 |
+
best_threshold = thresh_val
|
| 431 |
+
|
| 432 |
+
if not best_candidate:
|
| 433 |
+
return None
|
| 434 |
+
|
| 435 |
+
# STAGE 3: Distance transform refinement
|
| 436 |
+
dt_center = self.distance_transform_refinement(best_candidate['mask'])
|
| 437 |
+
best_candidate['dt_center'] = dt_center
|
| 438 |
+
best_candidate['threshold'] = best_threshold
|
| 439 |
+
|
| 440 |
+
return best_candidate
|
| 441 |
+
|
| 442 |
+
def determine_gaze_position(self, centroid_x: int, width: int, prev_position: Optional[str]) -> str:
|
| 443 |
+
"""Determine gaze position (LEFT/CENTER/RIGHT)"""
|
| 444 |
+
ratio = centroid_x / width
|
| 445 |
+
|
| 446 |
+
# Base position
|
| 447 |
+
if ratio < self.config.LEFT_BOUNDARY:
|
| 448 |
+
position = "LEFT"
|
| 449 |
+
elif ratio > self.config.RIGHT_BOUNDARY:
|
| 450 |
+
position = "RIGHT"
|
| 451 |
+
else:
|
| 452 |
+
position = "CENTER"
|
| 453 |
+
|
| 454 |
+
# Temporal smoothing
|
| 455 |
+
if prev_position and prev_position != "UNKNOWN":
|
| 456 |
+
if position == "LEFT" and self.config.SMOOTHING_LEFT_MIN < ratio < self.config.SMOOTHING_LEFT_MAX:
|
| 457 |
+
position = prev_position
|
| 458 |
+
elif position == "RIGHT" and self.config.SMOOTHING_RIGHT_MIN < ratio < self.config.SMOOTHING_RIGHT_MAX:
|
| 459 |
+
position = prev_position
|
| 460 |
+
elif position == "CENTER" and prev_position != "CENTER":
|
| 461 |
+
if ratio < self.config.SMOOTHING_LEFT_MAX or ratio > self.config.SMOOTHING_RIGHT_MIN:
|
| 462 |
+
position = prev_position
|
| 463 |
+
|
| 464 |
+
return position
|
| 465 |
+
|
| 466 |
+
def estimate_eye_position(self, eye_region: np.ndarray, prev_position: Optional[str] = None) -> Tuple[str, Dict]:
|
| 467 |
+
"""
|
| 468 |
+
Estimate eye gaze position using OPTIMIZED METHOD
|
| 469 |
+
|
| 470 |
+
Priority untuk centroid: Distance Transform > Ellipse > Connected Components
|
| 471 |
+
"""
|
| 472 |
+
if eye_region is None or eye_region.size == 0:
|
| 473 |
+
return "UNKNOWN", {}
|
| 474 |
+
|
| 475 |
+
h, w = eye_region.shape
|
| 476 |
+
if h < 5 or w < 10:
|
| 477 |
+
return "UNKNOWN", {}
|
| 478 |
+
|
| 479 |
+
try:
|
| 480 |
+
eye_size = self.classify_eye_size(eye_region)
|
| 481 |
+
enhanced = self.adaptive_preprocessing(eye_region, eye_size)
|
| 482 |
+
|
| 483 |
+
if enhanced is None:
|
| 484 |
+
return "UNKNOWN", {}
|
| 485 |
+
|
| 486 |
+
pupil_data = self.detect_pupil(enhanced, eye_size)
|
| 487 |
+
|
| 488 |
+
if not pupil_data:
|
| 489 |
+
return "UNKNOWN", {}
|
| 490 |
+
|
| 491 |
+
# OPTIMIZED: Use Distance Transform center (most accurate)
|
| 492 |
+
scale_factor = self.config.ADAPTIVE_PARAMS[eye_size]['scale_factor']
|
| 493 |
+
cx_dt, cy_dt = pupil_data['dt_center']
|
| 494 |
+
|
| 495 |
+
# Scale back to original size
|
| 496 |
+
centroid_x = int(cx_dt / scale_factor)
|
| 497 |
+
|
| 498 |
+
# Determine position
|
| 499 |
+
position = self.determine_gaze_position(centroid_x, w, prev_position)
|
| 500 |
+
|
| 501 |
+
return position, {
|
| 502 |
+
'eye_size': eye_size,
|
| 503 |
+
'centroid': (centroid_x, int(cy_dt / scale_factor)),
|
| 504 |
+
'circularity': pupil_data['circularity'],
|
| 505 |
+
'solidity': pupil_data['solidity'],
|
| 506 |
+
'dt_center': pupil_data['dt_center'],
|
| 507 |
+
'threshold': pupil_data['threshold']
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
except Exception as e:
|
| 511 |
+
logger.error(f"Error estimating eye position: {e}")
|
| 512 |
+
return "UNKNOWN", {}
|
| 513 |
+
|
| 514 |
+
def process_frame(self, frame: np.ndarray) -> Dict:
|
| 515 |
+
"""Process single frame and return analysis"""
|
| 516 |
+
result = {
|
| 517 |
+
'face_detected': False,
|
| 518 |
+
'blink_detected': False,
|
| 519 |
+
'blink_ratio': 0.0,
|
| 520 |
+
'right_eye': {'position': 'UNKNOWN', 'data': {}},
|
| 521 |
+
'left_eye': {'position': 'UNKNOWN', 'data': {}},
|
| 522 |
+
'gaze_position': 'UNKNOWN'
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
try:
|
| 526 |
+
landmarks = self.detect_landmarks(frame)
|
| 527 |
+
|
| 528 |
+
if landmarks is None:
|
| 529 |
+
return result
|
| 530 |
+
|
| 531 |
+
result['face_detected'] = True
|
| 532 |
+
|
| 533 |
+
# Blink detection
|
| 534 |
+
blink_ratio = self.calculate_blink_ratio(landmarks)
|
| 535 |
+
result['blink_ratio'] = round(blink_ratio, 2)
|
| 536 |
+
result['blink_detected'] = blink_ratio > self.config.BLINK_THRESHOLD
|
| 537 |
+
|
| 538 |
+
if not result['blink_detected']:
|
| 539 |
+
# Right eye
|
| 540 |
+
right_eye_coords = [landmarks[i] for i in self.config.RIGHT_EYE]
|
| 541 |
+
right_eye_region = self.extract_eye_region(frame, right_eye_coords)
|
| 542 |
+
|
| 543 |
+
if right_eye_region is not None:
|
| 544 |
+
right_position, right_data = self.estimate_eye_position(
|
| 545 |
+
right_eye_region, self.prev_position_right
|
| 546 |
+
)
|
| 547 |
+
result['right_eye'] = {'position': right_position, 'data': right_data}
|
| 548 |
+
self.prev_position_right = right_position
|
| 549 |
+
|
| 550 |
+
# Left eye
|
| 551 |
+
left_eye_coords = [landmarks[i] for i in self.config.LEFT_EYE]
|
| 552 |
+
left_eye_region = self.extract_eye_region(frame, left_eye_coords)
|
| 553 |
+
|
| 554 |
+
if left_eye_region is not None:
|
| 555 |
+
left_position, left_data = self.estimate_eye_position(
|
| 556 |
+
left_eye_region, self.prev_position_left
|
| 557 |
+
)
|
| 558 |
+
result['left_eye'] = {'position': left_position, 'data': left_data}
|
| 559 |
+
self.prev_position_left = left_position
|
| 560 |
+
|
| 561 |
+
# Determine overall gaze
|
| 562 |
+
if result['right_eye']['position'] == result['left_eye']['position']:
|
| 563 |
+
result['gaze_position'] = result['right_eye']['position']
|
| 564 |
+
elif result['right_eye']['position'] == 'UNKNOWN':
|
| 565 |
+
result['gaze_position'] = result['left_eye']['position']
|
| 566 |
+
elif result['left_eye']['position'] == 'UNKNOWN':
|
| 567 |
+
result['gaze_position'] = result['right_eye']['position']
|
| 568 |
+
else:
|
| 569 |
+
result['gaze_position'] = result['right_eye']['position']
|
| 570 |
+
|
| 571 |
+
except Exception as e:
|
| 572 |
+
logger.error(f"Error processing frame: {e}")
|
| 573 |
+
|
| 574 |
+
return result
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
class EyeTrackingService:
|
| 578 |
+
"""
|
| 579 |
+
Eye Tracking Service for SWARA API
|
| 580 |
+
|
| 581 |
+
Analyzes eye contact and gaze patterns in videos
|
| 582 |
+
"""
|
| 583 |
+
|
| 584 |
+
# Class variable for singleton pattern
|
| 585 |
+
_tracker = None
|
| 586 |
+
|
| 587 |
+
def __init__(self):
|
| 588 |
+
"""Initialize service"""
|
| 589 |
+
if EyeTrackingService._tracker is None:
|
| 590 |
+
logger.info("Initializing Eye Tracking Service...")
|
| 591 |
+
EyeTrackingService._tracker = EyeTracker()
|
| 592 |
+
logger.info("β Eye Tracking Service initialized")
|
| 593 |
+
|
| 594 |
+
def calculate_score(self, gaze_away_time: float) -> Tuple[int, str]:
|
| 595 |
+
"""Calculate score based on gaze away time"""
|
| 596 |
+
config = EyeTrackingConfig()
|
| 597 |
+
for score, (threshold, rating) in sorted(
|
| 598 |
+
config.SCORE_THRESHOLDS.items(), reverse=True
|
| 599 |
+
):
|
| 600 |
+
if gaze_away_time <= threshold:
|
| 601 |
+
return score, rating
|
| 602 |
+
return 1, "Perlu Ditingkatkan"
|
| 603 |
+
|
| 604 |
+
def _annotate_frame(
|
| 605 |
+
self,
|
| 606 |
+
frame: np.ndarray,
|
| 607 |
+
result: Dict,
|
| 608 |
+
frame_number: int,
|
| 609 |
+
total_blinks: int,
|
| 610 |
+
gaze_position: str
|
| 611 |
+
) -> np.ndarray:
|
| 612 |
+
"""
|
| 613 |
+
Annotate frame with eye tracking information
|
| 614 |
+
|
| 615 |
+
Args:
|
| 616 |
+
frame: Original frame
|
| 617 |
+
result: Analysis result from process_frame
|
| 618 |
+
frame_number: Current frame number
|
| 619 |
+
total_blinks: Total blinks detected so far
|
| 620 |
+
gaze_position: Current gaze position
|
| 621 |
+
|
| 622 |
+
Returns:
|
| 623 |
+
Annotated frame
|
| 624 |
+
"""
|
| 625 |
+
annotated = frame.copy()
|
| 626 |
+
|
| 627 |
+
# Define colors
|
| 628 |
+
COLOR_GREEN = (0, 255, 0)
|
| 629 |
+
COLOR_RED = (0, 0, 255)
|
| 630 |
+
COLOR_YELLOW = (0, 255, 255)
|
| 631 |
+
COLOR_BLUE = (255, 0, 0)
|
| 632 |
+
COLOR_WHITE = (255, 255, 255)
|
| 633 |
+
|
| 634 |
+
# Semi-transparent overlay for info box
|
| 635 |
+
overlay = annotated.copy()
|
| 636 |
+
cv.rectangle(overlay, (10, 10), (400, 180), (0, 0, 0), -1)
|
| 637 |
+
cv.addWeighted(overlay, 0.6, annotated, 0.4, 0, annotated)
|
| 638 |
+
|
| 639 |
+
# Frame info
|
| 640 |
+
cv.putText(annotated, f"Frame: {frame_number}", (20, 35),
|
| 641 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.6, COLOR_WHITE, 2)
|
| 642 |
+
|
| 643 |
+
# Face detection status
|
| 644 |
+
face_status = "DETECTED" if result['face_detected'] else "NOT DETECTED"
|
| 645 |
+
face_color = COLOR_GREEN if result['face_detected'] else COLOR_RED
|
| 646 |
+
cv.putText(annotated, f"Face: {face_status}", (20, 60),
|
| 647 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.6, face_color, 2)
|
| 648 |
+
|
| 649 |
+
# Blink info
|
| 650 |
+
blink_status = "BLINKING" if result['blink_detected'] else "OPEN"
|
| 651 |
+
blink_color = COLOR_YELLOW if result['blink_detected'] else COLOR_GREEN
|
| 652 |
+
cv.putText(annotated, f"Eyes: {blink_status} | Ratio: {result['blink_ratio']:.2f}",
|
| 653 |
+
(20, 85), cv.FONT_HERSHEY_SIMPLEX, 0.6, blink_color, 2)
|
| 654 |
+
cv.putText(annotated, f"Total Blinks: {total_blinks}", (20, 110),
|
| 655 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.6, COLOR_WHITE, 2)
|
| 656 |
+
|
| 657 |
+
# Gaze position
|
| 658 |
+
if gaze_position == 'CENTER':
|
| 659 |
+
gaze_color = COLOR_GREEN
|
| 660 |
+
elif gaze_position in ['LEFT', 'RIGHT']:
|
| 661 |
+
gaze_color = COLOR_YELLOW
|
| 662 |
+
else:
|
| 663 |
+
gaze_color = COLOR_RED
|
| 664 |
+
|
| 665 |
+
cv.putText(annotated, f"Gaze: {gaze_position}", (20, 135),
|
| 666 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.7, gaze_color, 2)
|
| 667 |
+
|
| 668 |
+
# Eye positions
|
| 669 |
+
if result['face_detected'] and not result['blink_detected']:
|
| 670 |
+
left_pos = result['left_eye']['position']
|
| 671 |
+
right_pos = result['right_eye']['position']
|
| 672 |
+
cv.putText(annotated, f"L:{left_pos} | R:{right_pos}", (20, 160),
|
| 673 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.5, COLOR_BLUE, 2)
|
| 674 |
+
|
| 675 |
+
# Gaze indicator (big display)
|
| 676 |
+
h, w = annotated.shape[:2]
|
| 677 |
+
indicator_y = h - 60
|
| 678 |
+
|
| 679 |
+
# Draw gaze direction indicator
|
| 680 |
+
if gaze_position == 'CENTER':
|
| 681 |
+
cv.circle(annotated, (w // 2, indicator_y), 30, COLOR_GREEN, -1)
|
| 682 |
+
cv.putText(annotated, "CENTER", (w // 2 - 50, indicator_y + 10),
|
| 683 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 0), 2)
|
| 684 |
+
elif gaze_position == 'LEFT':
|
| 685 |
+
cv.arrowedLine(annotated, (w // 2, indicator_y), (w // 2 - 80, indicator_y),
|
| 686 |
+
COLOR_YELLOW, 5, tipLength=0.3)
|
| 687 |
+
cv.putText(annotated, "LEFT", (w // 2 - 150, indicator_y + 10),
|
| 688 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.8, COLOR_YELLOW, 2)
|
| 689 |
+
elif gaze_position == 'RIGHT':
|
| 690 |
+
cv.arrowedLine(annotated, (w // 2, indicator_y), (w // 2 + 80, indicator_y),
|
| 691 |
+
COLOR_YELLOW, 5, tipLength=0.3)
|
| 692 |
+
cv.putText(annotated, "RIGHT", (w // 2 + 50, indicator_y + 10),
|
| 693 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.8, COLOR_YELLOW, 2)
|
| 694 |
+
else:
|
| 695 |
+
cv.putText(annotated, "UNKNOWN", (w // 2 - 60, indicator_y + 10),
|
| 696 |
+
cv.FONT_HERSHEY_SIMPLEX, 0.8, COLOR_RED, 2)
|
| 697 |
+
|
| 698 |
+
return annotated
|
| 699 |
+
|
| 700 |
+
def analyze_video(
|
| 701 |
+
self,
|
| 702 |
+
video_path: str,
|
| 703 |
+
progress_callback: Optional[callable] = None,
|
| 704 |
+
save_annotated_video: bool = False,
|
| 705 |
+
output_path: Optional[str] = None
|
| 706 |
+
) -> Dict[str, Any]:
|
| 707 |
+
"""
|
| 708 |
+
Analyze video for eye contact
|
| 709 |
+
|
| 710 |
+
Args:
|
| 711 |
+
video_path: Path to video file
|
| 712 |
+
progress_callback: Optional callback for progress updates
|
| 713 |
+
save_annotated_video: Whether to save annotated video
|
| 714 |
+
output_path: Path for output video (default: 'output/eye_tracking_annotated.mp4')
|
| 715 |
+
|
| 716 |
+
Returns:
|
| 717 |
+
Dict containing eye tracking analysis results
|
| 718 |
+
"""
|
| 719 |
+
try:
|
| 720 |
+
logger.info(f"Analyzing video with Eye Tracking Service: {video_path}")
|
| 721 |
+
logger.info(f"Save annotated video: {save_annotated_video}")
|
| 722 |
+
|
| 723 |
+
cap = cv.VideoCapture(video_path)
|
| 724 |
+
if not cap.isOpened():
|
| 725 |
+
raise ValueError(f"Cannot open video: {video_path}")
|
| 726 |
+
|
| 727 |
+
# Video properties
|
| 728 |
+
fps = int(cap.get(cv.CAP_PROP_FPS)) or 30
|
| 729 |
+
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
|
| 730 |
+
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
|
| 731 |
+
total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
|
| 732 |
+
|
| 733 |
+
logger.info(f"Video properties: {width}x{height} @ {fps}FPS, {total_frames} frames")
|
| 734 |
+
logger.info(f"Video properties: {width}x{height} @ {fps}FPS, {total_frames} frames")
|
| 735 |
+
|
| 736 |
+
# Setup video writer if needed
|
| 737 |
+
out = None
|
| 738 |
+
if save_annotated_video:
|
| 739 |
+
if output_path is None:
|
| 740 |
+
import os
|
| 741 |
+
os.makedirs('output', exist_ok=True)
|
| 742 |
+
output_path = 'output/eye_tracking_annotated.mp4'
|
| 743 |
+
|
| 744 |
+
fourcc = cv.VideoWriter_fourcc(*'mp4v')
|
| 745 |
+
out = cv.VideoWriter(output_path, fourcc, fps, (width, height))
|
| 746 |
+
logger.info(f"Output video will be saved to: {output_path}")
|
| 747 |
+
|
| 748 |
+
# Initialize counters
|
| 749 |
+
frame_count = 0
|
| 750 |
+
gaze_away_frames = 0
|
| 751 |
+
blink_count = 0
|
| 752 |
+
position_counts = {'CENTER': 0, 'LEFT': 0, 'RIGHT': 0, 'UNKNOWN': 0}
|
| 753 |
+
|
| 754 |
+
prev_blink = False
|
| 755 |
+
|
| 756 |
+
# Debug counters
|
| 757 |
+
debug_stats = {
|
| 758 |
+
'face_detected_frames': 0,
|
| 759 |
+
'pupil_detected_frames': 0,
|
| 760 |
+
'center_gaze_frames': 0,
|
| 761 |
+
'left_gaze_frames': 0,
|
| 762 |
+
'right_gaze_frames': 0,
|
| 763 |
+
'unknown_frames': 0
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
logger.info("Starting frame processing...")
|
| 767 |
+
|
| 768 |
+
# Process frames
|
| 769 |
+
while True:
|
| 770 |
+
ret, frame = cap.read()
|
| 771 |
+
if not ret:
|
| 772 |
+
break
|
| 773 |
+
|
| 774 |
+
frame_count += 1
|
| 775 |
+
|
| 776 |
+
# Progress callback
|
| 777 |
+
if progress_callback and frame_count % 30 == 0:
|
| 778 |
+
progress = int((frame_count / total_frames) * 100)
|
| 779 |
+
progress_callback(frame_count, total_frames, f"Eye tracking: {progress}%")
|
| 780 |
+
|
| 781 |
+
# Process frame
|
| 782 |
+
result = self._tracker.process_frame(frame)
|
| 783 |
+
|
| 784 |
+
# Debug stats
|
| 785 |
+
if result['face_detected']:
|
| 786 |
+
debug_stats['face_detected_frames'] += 1
|
| 787 |
+
|
| 788 |
+
# Count blinks
|
| 789 |
+
if result['blink_detected'] and not prev_blink:
|
| 790 |
+
blink_count += 1
|
| 791 |
+
logger.debug(f"Frame {frame_count}: Blink detected (total: {blink_count})")
|
| 792 |
+
prev_blink = result['blink_detected']
|
| 793 |
+
|
| 794 |
+
# Track gaze position
|
| 795 |
+
gaze_pos = result['gaze_position']
|
| 796 |
+
position_counts[gaze_pos] = position_counts.get(gaze_pos, 0) + 1
|
| 797 |
+
|
| 798 |
+
# Update debug stats
|
| 799 |
+
if gaze_pos == 'CENTER':
|
| 800 |
+
debug_stats['center_gaze_frames'] += 1
|
| 801 |
+
elif gaze_pos == 'LEFT':
|
| 802 |
+
debug_stats['left_gaze_frames'] += 1
|
| 803 |
+
elif gaze_pos == 'RIGHT':
|
| 804 |
+
debug_stats['right_gaze_frames'] += 1
|
| 805 |
+
else:
|
| 806 |
+
debug_stats['unknown_frames'] += 1
|
| 807 |
+
|
| 808 |
+
# Count gaze away frames (not CENTER)
|
| 809 |
+
if gaze_pos != 'CENTER' and gaze_pos != 'UNKNOWN':
|
| 810 |
+
gaze_away_frames += 1
|
| 811 |
+
|
| 812 |
+
# Annotate frame if video output enabled
|
| 813 |
+
if save_annotated_video and out is not None:
|
| 814 |
+
annotated_frame = self._annotate_frame(frame, result, frame_count, blink_count, gaze_pos)
|
| 815 |
+
out.write(annotated_frame)
|
| 816 |
+
|
| 817 |
+
# Log every 100 frames
|
| 818 |
+
if frame_count % 100 == 0:
|
| 819 |
+
logger.info(f"Processed {frame_count}/{total_frames} frames | "
|
| 820 |
+
f"Gaze: C:{debug_stats['center_gaze_frames']} "
|
| 821 |
+
f"L:{debug_stats['left_gaze_frames']} "
|
| 822 |
+
f"R:{debug_stats['right_gaze_frames']} | "
|
| 823 |
+
f"Blinks: {blink_count}")
|
| 824 |
+
|
| 825 |
+
cap.release()
|
| 826 |
+
if out is not None:
|
| 827 |
+
out.release()
|
| 828 |
+
logger.info(f"β Annotated video saved: {output_path}")
|
| 829 |
+
|
| 830 |
+
# Calculate metrics
|
| 831 |
+
duration = frame_count / fps
|
| 832 |
+
gaze_away_time = gaze_away_frames / fps
|
| 833 |
+
score, rating = self.calculate_score(gaze_away_time)
|
| 834 |
+
|
| 835 |
+
# Log summary statistics
|
| 836 |
+
logger.info("="*60)
|
| 837 |
+
logger.info("EYE TRACKING ANALYSIS SUMMARY")
|
| 838 |
+
logger.info("="*60)
|
| 839 |
+
logger.info(f"Total Frames Processed: {frame_count}")
|
| 840 |
+
logger.info(f"Face Detection Rate: {debug_stats['face_detected_frames']}/{frame_count} "
|
| 841 |
+
f"({debug_stats['face_detected_frames']/frame_count*100:.1f}%)")
|
| 842 |
+
logger.info(f"\nGaze Distribution:")
|
| 843 |
+
logger.info(f" CENTER: {debug_stats['center_gaze_frames']} frames "
|
| 844 |
+
f"({debug_stats['center_gaze_frames']/frame_count*100:.1f}%)")
|
| 845 |
+
logger.info(f" LEFT: {debug_stats['left_gaze_frames']} frames "
|
| 846 |
+
f"({debug_stats['left_gaze_frames']/frame_count*100:.1f}%)")
|
| 847 |
+
logger.info(f" RIGHT: {debug_stats['right_gaze_frames']} frames "
|
| 848 |
+
f"({debug_stats['right_gaze_frames']/frame_count*100:.1f}%)")
|
| 849 |
+
logger.info(f" UNKNOWN: {debug_stats['unknown_frames']} frames "
|
| 850 |
+
f"({debug_stats['unknown_frames']/frame_count*100:.1f}%)")
|
| 851 |
+
logger.info(f"\nGaze Away Time: {gaze_away_time:.2f}s / {duration:.2f}s "
|
| 852 |
+
f"({gaze_away_time/duration*100:.1f}%)")
|
| 853 |
+
logger.info(f"Total Blinks: {blink_count} ({blink_count/duration*60:.1f} blinks/minute)")
|
| 854 |
+
logger.info(f"\nFinal Score: {score}/5 - {rating}")
|
| 855 |
+
logger.info("="*60)
|
| 856 |
+
|
| 857 |
+
# Build result
|
| 858 |
+
result = {
|
| 859 |
+
'success': True,
|
| 860 |
+
'video_info': {
|
| 861 |
+
'duration': round(duration, 2),
|
| 862 |
+
'fps': fps,
|
| 863 |
+
'total_frames': frame_count,
|
| 864 |
+
'resolution': f"{width}x{height}"
|
| 865 |
+
},
|
| 866 |
+
'eye_contact_analysis': {
|
| 867 |
+
'total_gaze_away_time': round(gaze_away_time, 2),
|
| 868 |
+
'gaze_away_percentage': round((gaze_away_time / duration) * 100, 2) if duration > 0 else 0,
|
| 869 |
+
'score': score,
|
| 870 |
+
'rating': rating,
|
| 871 |
+
'position_distribution': {
|
| 872 |
+
k: {
|
| 873 |
+
'frames': v,
|
| 874 |
+
'percentage': round((v / frame_count) * 100, 2) if frame_count > 0 else 0
|
| 875 |
+
}
|
| 876 |
+
for k, v in position_counts.items()
|
| 877 |
+
}
|
| 878 |
+
},
|
| 879 |
+
'blink_analysis': {
|
| 880 |
+
'total_blinks': blink_count,
|
| 881 |
+
'blinks_per_minute': round((blink_count / duration) * 60, 2) if duration > 0 else 0
|
| 882 |
+
},
|
| 883 |
+
'debug_stats': debug_stats
|
| 884 |
+
}
|
| 885 |
+
|
| 886 |
+
if save_annotated_video and output_path:
|
| 887 |
+
result['annotated_video_path'] = output_path
|
| 888 |
+
|
| 889 |
+
logger.info(f"β Eye Tracking analysis completed: Score {score}/5 - {rating}")
|
| 890 |
+
return result
|
| 891 |
+
|
| 892 |
+
except Exception as e:
|
| 893 |
+
logger.error(f"β Eye Tracking analysis failed: {e}")
|
| 894 |
+
raise
|
app/services/eye_tracking_production.py
ADDED
|
@@ -0,0 +1,873 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""eye-tracking-production.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/13Z0FJCvPUstAc77sypU_QJFfuDOycDDe
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
!pip install mediapipe
|
| 11 |
+
|
| 12 |
+
# ===== SWARA EYE TRACKING MODULE (PRODUCTION) =====
|
| 13 |
+
# Production-ready eye tracking untuk website SWARA
|
| 14 |
+
# Optimized untuk performa, error handling, dan integrasi
|
| 15 |
+
# ====================================================
|
| 16 |
+
|
| 17 |
+
import cv2 as cv
|
| 18 |
+
import mediapipe as mp
|
| 19 |
+
import numpy as np
|
| 20 |
+
import math
|
| 21 |
+
import json
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
from typing import Dict, List, Tuple, Optional
|
| 24 |
+
import logging
|
| 25 |
+
|
| 26 |
+
# ===== SETUP LOGGING =====
|
| 27 |
+
logging.basicConfig(
|
| 28 |
+
level=logging.INFO,
|
| 29 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 30 |
+
)
|
| 31 |
+
logger = logging.getLogger('SWARA_EyeTracking')
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class EyeTrackingConfig:
|
| 35 |
+
"""Configuration class untuk eye tracking parameters"""
|
| 36 |
+
|
| 37 |
+
# MediaPipe landmarks indices
|
| 38 |
+
LEFT_EYE = [362, 382, 381, 380, 374, 373, 390, 249, 263, 466, 388, 387, 386, 385, 384, 398]
|
| 39 |
+
RIGHT_EYE = [33, 7, 163, 144, 145, 153, 154, 155, 133, 173, 157, 158, 159, 160, 161, 246]
|
| 40 |
+
|
| 41 |
+
# Eye size classification thresholds
|
| 42 |
+
SMALL_EYE_THRESHOLD = 600
|
| 43 |
+
MEDIUM_EYE_THRESHOLD = 1500
|
| 44 |
+
|
| 45 |
+
# Position boundaries (optimized)
|
| 46 |
+
LEFT_BOUNDARY = 0.35
|
| 47 |
+
RIGHT_BOUNDARY = 0.65
|
| 48 |
+
|
| 49 |
+
# Temporal smoothing zone
|
| 50 |
+
SMOOTHING_LEFT_MIN = 0.33
|
| 51 |
+
SMOOTHING_LEFT_MAX = 0.37
|
| 52 |
+
SMOOTHING_RIGHT_MIN = 0.63
|
| 53 |
+
SMOOTHING_RIGHT_MAX = 0.67
|
| 54 |
+
|
| 55 |
+
# Blink ratio threshold
|
| 56 |
+
BLINK_THRESHOLD = 5.5
|
| 57 |
+
|
| 58 |
+
# Score thresholds (dalam detik)
|
| 59 |
+
SCORE_THRESHOLDS = {
|
| 60 |
+
5: (5, "Sangat Baik"),
|
| 61 |
+
4: (8, "Baik"),
|
| 62 |
+
3: (10, "Cukup Baik"),
|
| 63 |
+
2: (12, "Buruk"),
|
| 64 |
+
1: (float('inf'), "Perlu Ditingkatkan")
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# Adaptive parameters by eye size
|
| 68 |
+
ADAPTIVE_PARAMS = {
|
| 69 |
+
'SMALL': {
|
| 70 |
+
'scale_factor': 3.0,
|
| 71 |
+
'interpolation': cv.INTER_LANCZOS4,
|
| 72 |
+
'clahe_clip': 4.0,
|
| 73 |
+
'clahe_grid': (4, 4),
|
| 74 |
+
'bilateral_d': 7,
|
| 75 |
+
'bilateral_sigma': 75,
|
| 76 |
+
'thresholds': [20, 25, 30, 35, 40, 45, 50, 55],
|
| 77 |
+
'min_area_ratio': 0.001,
|
| 78 |
+
'max_area_ratio': 0.50,
|
| 79 |
+
'min_circularity': 0.3,
|
| 80 |
+
'min_solidity': 0.5,
|
| 81 |
+
'morph_kernel': 5,
|
| 82 |
+
'morph_close_iter': 3,
|
| 83 |
+
'morph_open_iter': 2
|
| 84 |
+
},
|
| 85 |
+
'MEDIUM': {
|
| 86 |
+
'scale_factor': 2.0,
|
| 87 |
+
'interpolation': cv.INTER_CUBIC,
|
| 88 |
+
'clahe_clip': 3.0,
|
| 89 |
+
'clahe_grid': (8, 8),
|
| 90 |
+
'bilateral_d': 5,
|
| 91 |
+
'bilateral_sigma': 50,
|
| 92 |
+
'thresholds': [30, 35, 40, 45, 50, 55, 60],
|
| 93 |
+
'min_area_ratio': 0.005,
|
| 94 |
+
'max_area_ratio': 0.45,
|
| 95 |
+
'min_circularity': 0.4,
|
| 96 |
+
'min_solidity': 0.6,
|
| 97 |
+
'morph_kernel': 3,
|
| 98 |
+
'morph_close_iter': 2,
|
| 99 |
+
'morph_open_iter': 1
|
| 100 |
+
},
|
| 101 |
+
'LARGE': {
|
| 102 |
+
'scale_factor': 1.5,
|
| 103 |
+
'interpolation': cv.INTER_CUBIC,
|
| 104 |
+
'clahe_clip': 2.0,
|
| 105 |
+
'clahe_grid': (8, 8),
|
| 106 |
+
'bilateral_d': 3,
|
| 107 |
+
'bilateral_sigma': 30,
|
| 108 |
+
'thresholds': [35, 40, 45, 50, 55, 60, 65],
|
| 109 |
+
'min_area_ratio': 0.01,
|
| 110 |
+
'max_area_ratio': 0.40,
|
| 111 |
+
'min_circularity': 0.5,
|
| 112 |
+
'min_solidity': 0.7,
|
| 113 |
+
'morph_kernel': 3,
|
| 114 |
+
'morph_close_iter': 2,
|
| 115 |
+
'morph_open_iter': 1
|
| 116 |
+
}
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
class EyeTracker:
|
| 121 |
+
"""Main class untuk eye tracking"""
|
| 122 |
+
|
| 123 |
+
def __init__(self, config: EyeTrackingConfig = None):
|
| 124 |
+
self.config = config or EyeTrackingConfig()
|
| 125 |
+
self.face_mesh = mp.solutions.face_mesh.FaceMesh(
|
| 126 |
+
min_detection_confidence=0.5,
|
| 127 |
+
min_tracking_confidence=0.5
|
| 128 |
+
)
|
| 129 |
+
self.prev_position_right = None
|
| 130 |
+
self.prev_position_left = None
|
| 131 |
+
logger.info("EyeTracker initialized successfully")
|
| 132 |
+
|
| 133 |
+
def __del__(self):
|
| 134 |
+
"""Cleanup resources"""
|
| 135 |
+
if hasattr(self, 'face_mesh') and self.face_mesh:
|
| 136 |
+
self.face_mesh.close()
|
| 137 |
+
|
| 138 |
+
@staticmethod
|
| 139 |
+
def euclidean_distance(point1: Tuple[int, int], point2: Tuple[int, int]) -> float:
|
| 140 |
+
"""Calculate Euclidean distance between two points"""
|
| 141 |
+
return math.sqrt((point2[0] - point1[0])**2 + (point2[1] - point1[1])**2)
|
| 142 |
+
|
| 143 |
+
def detect_landmarks(self, frame: np.ndarray) -> Optional[List[Tuple[int, int]]]:
|
| 144 |
+
"""Detect facial landmarks"""
|
| 145 |
+
try:
|
| 146 |
+
rgb_frame = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
|
| 147 |
+
results = self.face_mesh.process(rgb_frame)
|
| 148 |
+
|
| 149 |
+
if not results.multi_face_landmarks:
|
| 150 |
+
return None
|
| 151 |
+
|
| 152 |
+
img_height, img_width = frame.shape[:2]
|
| 153 |
+
mesh_coords = [
|
| 154 |
+
(int(point.x * img_width), int(point.y * img_height))
|
| 155 |
+
for point in results.multi_face_landmarks[0].landmark
|
| 156 |
+
]
|
| 157 |
+
return mesh_coords
|
| 158 |
+
except Exception as e:
|
| 159 |
+
logger.error(f"Error detecting landmarks: {e}")
|
| 160 |
+
return None
|
| 161 |
+
|
| 162 |
+
def calculate_blink_ratio(self, landmarks: List[Tuple[int, int]]) -> float:
|
| 163 |
+
"""Calculate blink ratio from eye landmarks"""
|
| 164 |
+
try:
|
| 165 |
+
# Right eye
|
| 166 |
+
rh_distance = self.euclidean_distance(
|
| 167 |
+
landmarks[self.config.RIGHT_EYE[0]],
|
| 168 |
+
landmarks[self.config.RIGHT_EYE[8]]
|
| 169 |
+
)
|
| 170 |
+
rv_distance = self.euclidean_distance(
|
| 171 |
+
landmarks[self.config.RIGHT_EYE[12]],
|
| 172 |
+
landmarks[self.config.RIGHT_EYE[4]]
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
# Left eye
|
| 176 |
+
lh_distance = self.euclidean_distance(
|
| 177 |
+
landmarks[self.config.LEFT_EYE[0]],
|
| 178 |
+
landmarks[self.config.LEFT_EYE[8]]
|
| 179 |
+
)
|
| 180 |
+
lv_distance = self.euclidean_distance(
|
| 181 |
+
landmarks[self.config.LEFT_EYE[12]],
|
| 182 |
+
landmarks[self.config.LEFT_EYE[4]]
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
if rv_distance == 0 or lv_distance == 0:
|
| 186 |
+
return 0
|
| 187 |
+
|
| 188 |
+
re_ratio = rh_distance / rv_distance
|
| 189 |
+
le_ratio = lh_distance / lv_distance
|
| 190 |
+
ratio = (re_ratio + le_ratio) / 2
|
| 191 |
+
|
| 192 |
+
return ratio
|
| 193 |
+
except Exception as e:
|
| 194 |
+
logger.error(f"Error calculating blink ratio: {e}")
|
| 195 |
+
return 0
|
| 196 |
+
|
| 197 |
+
def extract_eye_region(self, frame: np.ndarray, eye_coords: List[Tuple[int, int]]) -> Optional[np.ndarray]:
|
| 198 |
+
"""Extract and crop eye region from frame"""
|
| 199 |
+
try:
|
| 200 |
+
gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
|
| 201 |
+
mask = np.zeros(gray.shape, dtype=np.uint8)
|
| 202 |
+
|
| 203 |
+
cv.fillPoly(mask, [np.array(eye_coords, dtype=np.int32)], 255)
|
| 204 |
+
eye = cv.bitwise_and(gray, gray, mask=mask)
|
| 205 |
+
eye[mask == 0] = 155
|
| 206 |
+
|
| 207 |
+
# Get bounding box
|
| 208 |
+
x_coords = [coord[0] for coord in eye_coords]
|
| 209 |
+
y_coords = [coord[1] for coord in eye_coords]
|
| 210 |
+
|
| 211 |
+
min_x, max_x = min(x_coords), max(x_coords)
|
| 212 |
+
min_y, max_y = min(y_coords), max(y_coords)
|
| 213 |
+
|
| 214 |
+
cropped = eye[min_y:max_y, min_x:max_x]
|
| 215 |
+
return cropped if cropped.size > 0 else None
|
| 216 |
+
except Exception as e:
|
| 217 |
+
logger.error(f"Error extracting eye region: {e}")
|
| 218 |
+
return None
|
| 219 |
+
|
| 220 |
+
def classify_eye_size(self, eye_region: np.ndarray) -> str:
|
| 221 |
+
"""Classify eye size (SMALL/MEDIUM/LARGE)"""
|
| 222 |
+
if eye_region is None or eye_region.size == 0:
|
| 223 |
+
return 'UNKNOWN'
|
| 224 |
+
|
| 225 |
+
h, w = eye_region.shape
|
| 226 |
+
area = h * w
|
| 227 |
+
|
| 228 |
+
if area < self.config.SMALL_EYE_THRESHOLD:
|
| 229 |
+
return 'SMALL'
|
| 230 |
+
elif area < self.config.MEDIUM_EYE_THRESHOLD:
|
| 231 |
+
return 'MEDIUM'
|
| 232 |
+
else:
|
| 233 |
+
return 'LARGE'
|
| 234 |
+
|
| 235 |
+
def adaptive_preprocessing(self, eye_region: np.ndarray, eye_size: str) -> Optional[np.ndarray]:
|
| 236 |
+
"""Apply adaptive preprocessing based on eye size"""
|
| 237 |
+
try:
|
| 238 |
+
params = self.config.ADAPTIVE_PARAMS[eye_size]
|
| 239 |
+
|
| 240 |
+
# Upscaling
|
| 241 |
+
upscaled = cv.resize(
|
| 242 |
+
eye_region, None,
|
| 243 |
+
fx=params['scale_factor'],
|
| 244 |
+
fy=params['scale_factor'],
|
| 245 |
+
interpolation=params['interpolation']
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
# CLAHE enhancement
|
| 249 |
+
clahe = cv.createCLAHE(
|
| 250 |
+
clipLimit=params['clahe_clip'],
|
| 251 |
+
tileGridSize=params['clahe_grid']
|
| 252 |
+
)
|
| 253 |
+
enhanced = clahe.apply(upscaled)
|
| 254 |
+
|
| 255 |
+
# Bilateral filter
|
| 256 |
+
enhanced = cv.bilateralFilter(
|
| 257 |
+
enhanced,
|
| 258 |
+
params['bilateral_d'],
|
| 259 |
+
params['bilateral_sigma'],
|
| 260 |
+
params['bilateral_sigma']
|
| 261 |
+
)
|
| 262 |
+
|
| 263 |
+
# Unsharp masking for SMALL eyes
|
| 264 |
+
if eye_size == 'SMALL':
|
| 265 |
+
gaussian = cv.GaussianBlur(enhanced, (3, 3), 2.0)
|
| 266 |
+
enhanced = cv.addWeighted(enhanced, 1.5, gaussian, -0.5, 0)
|
| 267 |
+
enhanced = np.clip(enhanced, 0, 255).astype(np.uint8)
|
| 268 |
+
|
| 269 |
+
return enhanced
|
| 270 |
+
except Exception as e:
|
| 271 |
+
logger.error(f"Error in adaptive preprocessing: {e}")
|
| 272 |
+
return None
|
| 273 |
+
|
| 274 |
+
def aggressive_morphology(self, mask: np.ndarray, eye_size: str) -> np.ndarray:
|
| 275 |
+
"""Apply aggressive morphology operations"""
|
| 276 |
+
params = self.config.ADAPTIVE_PARAMS[eye_size]
|
| 277 |
+
kernel = cv.getStructuringElement(
|
| 278 |
+
cv.MORPH_ELLIPSE,
|
| 279 |
+
(params['morph_kernel'], params['morph_kernel'])
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
# Close gaps
|
| 283 |
+
mask = cv.morphologyEx(
|
| 284 |
+
mask, cv.MORPH_CLOSE, kernel,
|
| 285 |
+
iterations=params['morph_close_iter']
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
# Remove noise
|
| 289 |
+
mask = cv.morphologyEx(
|
| 290 |
+
mask, cv.MORPH_OPEN, kernel,
|
| 291 |
+
iterations=params['morph_open_iter']
|
| 292 |
+
)
|
| 293 |
+
|
| 294 |
+
# Fill holes
|
| 295 |
+
if eye_size == 'SMALL':
|
| 296 |
+
kernel_dilate = cv.getStructuringElement(cv.MORPH_ELLIPSE, (3, 3))
|
| 297 |
+
mask = cv.dilate(mask, kernel_dilate, iterations=1)
|
| 298 |
+
|
| 299 |
+
return mask
|
| 300 |
+
|
| 301 |
+
def connected_components_analysis(self, mask: np.ndarray, params: Dict) -> Optional[Dict]:
|
| 302 |
+
"""Analyze connected components and find best pupil candidate"""
|
| 303 |
+
h, w = mask.shape
|
| 304 |
+
min_area = (h * w) * params['min_area_ratio']
|
| 305 |
+
max_area = (h * w) * params['max_area_ratio']
|
| 306 |
+
|
| 307 |
+
# Connected components
|
| 308 |
+
num_labels, labels, stats, centroids = cv.connectedComponentsWithStats(
|
| 309 |
+
mask, connectivity=8
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
candidates = []
|
| 313 |
+
|
| 314 |
+
for i in range(1, num_labels):
|
| 315 |
+
area = stats[i, cv.CC_STAT_AREA]
|
| 316 |
+
|
| 317 |
+
if area < min_area or area > max_area:
|
| 318 |
+
continue
|
| 319 |
+
|
| 320 |
+
# Create component mask
|
| 321 |
+
component_mask = np.zeros_like(mask)
|
| 322 |
+
component_mask[labels == i] = 255
|
| 323 |
+
|
| 324 |
+
# Get contour
|
| 325 |
+
contours, _ = cv.findContours(
|
| 326 |
+
component_mask, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE
|
| 327 |
+
)
|
| 328 |
+
if not contours:
|
| 329 |
+
continue
|
| 330 |
+
|
| 331 |
+
contour = contours[0]
|
| 332 |
+
|
| 333 |
+
# Circularity
|
| 334 |
+
perimeter = cv.arcLength(contour, True)
|
| 335 |
+
if perimeter == 0:
|
| 336 |
+
continue
|
| 337 |
+
circularity = 4 * math.pi * area / (perimeter * perimeter)
|
| 338 |
+
|
| 339 |
+
if circularity < params['min_circularity']:
|
| 340 |
+
continue
|
| 341 |
+
|
| 342 |
+
# Solidity
|
| 343 |
+
hull = cv.convexHull(contour)
|
| 344 |
+
hull_area = cv.contourArea(hull)
|
| 345 |
+
if hull_area == 0:
|
| 346 |
+
continue
|
| 347 |
+
solidity = area / hull_area
|
| 348 |
+
|
| 349 |
+
if solidity < params['min_solidity']:
|
| 350 |
+
continue
|
| 351 |
+
|
| 352 |
+
# Center score
|
| 353 |
+
center_x = w / 2
|
| 354 |
+
cx = centroids[i][0]
|
| 355 |
+
distance_from_center = abs(cx - center_x) / w
|
| 356 |
+
center_score = 1.0 - distance_from_center
|
| 357 |
+
|
| 358 |
+
# Aspect ratio
|
| 359 |
+
x, y, w_bbox, h_bbox = (
|
| 360 |
+
stats[i, cv.CC_STAT_LEFT],
|
| 361 |
+
stats[i, cv.CC_STAT_TOP],
|
| 362 |
+
stats[i, cv.CC_STAT_WIDTH],
|
| 363 |
+
stats[i, cv.CC_STAT_HEIGHT]
|
| 364 |
+
)
|
| 365 |
+
if h_bbox == 0:
|
| 366 |
+
continue
|
| 367 |
+
aspect_ratio = w_bbox / h_bbox
|
| 368 |
+
aspect_score = 1.0 - abs(aspect_ratio - 1.0)
|
| 369 |
+
|
| 370 |
+
# Combined score
|
| 371 |
+
score = area * circularity * solidity * center_score * aspect_score
|
| 372 |
+
|
| 373 |
+
candidates.append({
|
| 374 |
+
'mask': component_mask,
|
| 375 |
+
'contour': contour,
|
| 376 |
+
'area': area,
|
| 377 |
+
'circularity': circularity,
|
| 378 |
+
'solidity': solidity,
|
| 379 |
+
'center_score': center_score,
|
| 380 |
+
'aspect_ratio': aspect_ratio,
|
| 381 |
+
'score': score,
|
| 382 |
+
'centroid': centroids[i]
|
| 383 |
+
})
|
| 384 |
+
|
| 385 |
+
if not candidates:
|
| 386 |
+
return None
|
| 387 |
+
|
| 388 |
+
return max(candidates, key=lambda x: x['score'])
|
| 389 |
+
|
| 390 |
+
def distance_transform_refinement(self, mask: np.ndarray) -> Tuple[int, int]:
|
| 391 |
+
"""Refine centroid using distance transform"""
|
| 392 |
+
dist_transform = cv.distanceTransform(mask, cv.DIST_L2, 5)
|
| 393 |
+
_, _, _, max_loc = cv.minMaxLoc(dist_transform)
|
| 394 |
+
return max_loc
|
| 395 |
+
|
| 396 |
+
def detect_pupil(self, enhanced: np.ndarray, eye_size: str) -> Optional[Dict]:
|
| 397 |
+
"""Detect pupil using multi-stage pipeline"""
|
| 398 |
+
params = self.config.ADAPTIVE_PARAMS[eye_size]
|
| 399 |
+
|
| 400 |
+
best_candidate = None
|
| 401 |
+
best_score = 0
|
| 402 |
+
best_threshold = 0
|
| 403 |
+
|
| 404 |
+
for thresh_val in params['thresholds']:
|
| 405 |
+
# Threshold
|
| 406 |
+
_, mask = cv.threshold(enhanced, thresh_val, 255, cv.THRESH_BINARY_INV)
|
| 407 |
+
|
| 408 |
+
# Morphology
|
| 409 |
+
mask = self.aggressive_morphology(mask, eye_size)
|
| 410 |
+
|
| 411 |
+
# Connected components
|
| 412 |
+
candidate = self.connected_components_analysis(mask, params)
|
| 413 |
+
|
| 414 |
+
if candidate and candidate['score'] > best_score:
|
| 415 |
+
best_score = candidate['score']
|
| 416 |
+
best_candidate = candidate
|
| 417 |
+
best_threshold = thresh_val
|
| 418 |
+
best_candidate['refined_mask'] = mask
|
| 419 |
+
|
| 420 |
+
if not best_candidate:
|
| 421 |
+
return None
|
| 422 |
+
|
| 423 |
+
# Distance transform refinement
|
| 424 |
+
dt_center = self.distance_transform_refinement(best_candidate['mask'])
|
| 425 |
+
best_candidate['dt_center'] = dt_center
|
| 426 |
+
best_candidate['threshold'] = best_threshold
|
| 427 |
+
|
| 428 |
+
return best_candidate
|
| 429 |
+
|
| 430 |
+
def determine_gaze_position(self, centroid_x: int, width: int, prev_position: Optional[str]) -> str:
|
| 431 |
+
"""Determine gaze position (LEFT/CENTER/RIGHT)"""
|
| 432 |
+
ratio = centroid_x / width
|
| 433 |
+
|
| 434 |
+
# Base position
|
| 435 |
+
if ratio < self.config.LEFT_BOUNDARY:
|
| 436 |
+
position = "LEFT"
|
| 437 |
+
elif ratio > self.config.RIGHT_BOUNDARY:
|
| 438 |
+
position = "RIGHT"
|
| 439 |
+
else:
|
| 440 |
+
position = "CENTER"
|
| 441 |
+
|
| 442 |
+
# Temporal smoothing
|
| 443 |
+
if prev_position and prev_position != "UNKNOWN":
|
| 444 |
+
in_left_boundary = (
|
| 445 |
+
self.config.SMOOTHING_LEFT_MIN <= ratio <= self.config.SMOOTHING_LEFT_MAX
|
| 446 |
+
)
|
| 447 |
+
in_right_boundary = (
|
| 448 |
+
self.config.SMOOTHING_RIGHT_MIN <= ratio <= self.config.SMOOTHING_RIGHT_MAX
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
if in_left_boundary or in_right_boundary:
|
| 452 |
+
position = prev_position
|
| 453 |
+
|
| 454 |
+
return position
|
| 455 |
+
|
| 456 |
+
def estimate_eye_position(self, eye_region: np.ndarray, prev_position: Optional[str] = None) -> Tuple[str, Dict]:
|
| 457 |
+
"""Estimate eye gaze position"""
|
| 458 |
+
if eye_region is None or eye_region.size == 0:
|
| 459 |
+
return "UNKNOWN", {}
|
| 460 |
+
|
| 461 |
+
h, w = eye_region.shape
|
| 462 |
+
if h < 5 or w < 10:
|
| 463 |
+
return "UNKNOWN", {}
|
| 464 |
+
|
| 465 |
+
try:
|
| 466 |
+
# Classify eye size
|
| 467 |
+
eye_size = self.classify_eye_size(eye_region)
|
| 468 |
+
if eye_size == 'UNKNOWN':
|
| 469 |
+
return "UNKNOWN", {}
|
| 470 |
+
|
| 471 |
+
# Preprocessing
|
| 472 |
+
enhanced = self.adaptive_preprocessing(eye_region, eye_size)
|
| 473 |
+
if enhanced is None:
|
| 474 |
+
return "UNKNOWN", {}
|
| 475 |
+
|
| 476 |
+
# Detect pupil
|
| 477 |
+
pupil_data = self.detect_pupil(enhanced, eye_size)
|
| 478 |
+
if not pupil_data:
|
| 479 |
+
return "UNKNOWN", {}
|
| 480 |
+
|
| 481 |
+
# Get centroid
|
| 482 |
+
cx, cy = pupil_data['dt_center']
|
| 483 |
+
upscaled_h, upscaled_w = enhanced.shape
|
| 484 |
+
|
| 485 |
+
# Determine position
|
| 486 |
+
position = self.determine_gaze_position(cx, upscaled_w, prev_position)
|
| 487 |
+
|
| 488 |
+
# Return result
|
| 489 |
+
result = {
|
| 490 |
+
'position': position,
|
| 491 |
+
'centroid': (cx, cy),
|
| 492 |
+
'centroid_ratio': cx / upscaled_w,
|
| 493 |
+
'eye_size': eye_size,
|
| 494 |
+
'metrics': {
|
| 495 |
+
'circularity': pupil_data['circularity'],
|
| 496 |
+
'solidity': pupil_data['solidity'],
|
| 497 |
+
'area': pupil_data['area'],
|
| 498 |
+
'threshold': pupil_data['threshold']
|
| 499 |
+
}
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
return position, result
|
| 503 |
+
|
| 504 |
+
except Exception as e:
|
| 505 |
+
logger.error(f"Error estimating eye position: {e}")
|
| 506 |
+
return "UNKNOWN", {}
|
| 507 |
+
|
| 508 |
+
def process_frame(self, frame: np.ndarray) -> Dict:
|
| 509 |
+
"""Process single frame and return analysis"""
|
| 510 |
+
result = {
|
| 511 |
+
'timestamp': datetime.now().isoformat(),
|
| 512 |
+
'face_detected': False,
|
| 513 |
+
'blink_detected': False,
|
| 514 |
+
'blink_ratio': 0.0,
|
| 515 |
+
'right_eye': {'position': 'UNKNOWN', 'data': {}},
|
| 516 |
+
'left_eye': {'position': 'UNKNOWN', 'data': {}},
|
| 517 |
+
'gaze_position': 'UNKNOWN'
|
| 518 |
+
}
|
| 519 |
+
|
| 520 |
+
try:
|
| 521 |
+
# Detect landmarks
|
| 522 |
+
landmarks = self.detect_landmarks(frame)
|
| 523 |
+
if not landmarks:
|
| 524 |
+
return result
|
| 525 |
+
|
| 526 |
+
result['face_detected'] = True
|
| 527 |
+
|
| 528 |
+
# Calculate blink ratio
|
| 529 |
+
blink_ratio = self.calculate_blink_ratio(landmarks)
|
| 530 |
+
result['blink_ratio'] = round(blink_ratio, 2)
|
| 531 |
+
result['blink_detected'] = blink_ratio > self.config.BLINK_THRESHOLD
|
| 532 |
+
|
| 533 |
+
# Extract eye regions
|
| 534 |
+
right_coords = [landmarks[i] for i in self.config.RIGHT_EYE]
|
| 535 |
+
left_coords = [landmarks[i] for i in self.config.LEFT_EYE]
|
| 536 |
+
|
| 537 |
+
right_eye_region = self.extract_eye_region(frame, right_coords)
|
| 538 |
+
left_eye_region = self.extract_eye_region(frame, left_coords)
|
| 539 |
+
|
| 540 |
+
# Estimate positions
|
| 541 |
+
right_pos, right_data = self.estimate_eye_position(
|
| 542 |
+
right_eye_region, self.prev_position_right
|
| 543 |
+
)
|
| 544 |
+
left_pos, left_data = self.estimate_eye_position(
|
| 545 |
+
left_eye_region, self.prev_position_left
|
| 546 |
+
)
|
| 547 |
+
|
| 548 |
+
result['right_eye'] = {'position': right_pos, 'data': right_data}
|
| 549 |
+
result['left_eye'] = {'position': left_pos, 'data': left_data}
|
| 550 |
+
|
| 551 |
+
# Use right eye as primary (typically more stable)
|
| 552 |
+
result['gaze_position'] = right_pos
|
| 553 |
+
|
| 554 |
+
# Update previous positions
|
| 555 |
+
self.prev_position_right = right_pos
|
| 556 |
+
self.prev_position_left = left_pos
|
| 557 |
+
|
| 558 |
+
except Exception as e:
|
| 559 |
+
logger.error(f"Error processing frame: {e}")
|
| 560 |
+
|
| 561 |
+
return result
|
| 562 |
+
|
| 563 |
+
|
| 564 |
+
class VideoAnalyzer:
|
| 565 |
+
"""Analyze video and generate comprehensive report"""
|
| 566 |
+
|
| 567 |
+
def __init__(self, config: EyeTrackingConfig = None):
|
| 568 |
+
self.config = config or EyeTrackingConfig()
|
| 569 |
+
self.tracker = EyeTracker(config)
|
| 570 |
+
|
| 571 |
+
def calculate_score(self, gaze_away_time: float) -> Tuple[int, str]:
|
| 572 |
+
"""Calculate score based on gaze away time"""
|
| 573 |
+
for score, (threshold, rating) in sorted(
|
| 574 |
+
self.config.SCORE_THRESHOLDS.items(), reverse=True
|
| 575 |
+
):
|
| 576 |
+
if gaze_away_time <= threshold:
|
| 577 |
+
return score, rating
|
| 578 |
+
return 1, "Perlu Ditingkatkan"
|
| 579 |
+
|
| 580 |
+
def analyze_video(
|
| 581 |
+
self,
|
| 582 |
+
video_path: str,
|
| 583 |
+
output_path: Optional[str] = None,
|
| 584 |
+
progress_callback: Optional[callable] = None
|
| 585 |
+
) -> Dict:
|
| 586 |
+
"""
|
| 587 |
+
Analyze video and return comprehensive report
|
| 588 |
+
|
| 589 |
+
Args:
|
| 590 |
+
video_path: Path to input video
|
| 591 |
+
output_path: Optional path for output video with annotations
|
| 592 |
+
progress_callback: Optional callback function(current, total, status)
|
| 593 |
+
|
| 594 |
+
Returns:
|
| 595 |
+
Dictionary containing analysis results
|
| 596 |
+
"""
|
| 597 |
+
logger.info(f"Starting video analysis: {video_path}")
|
| 598 |
+
|
| 599 |
+
cap = cv.VideoCapture(video_path)
|
| 600 |
+
if not cap.isOpened():
|
| 601 |
+
logger.error(f"Failed to open video: {video_path}")
|
| 602 |
+
return {'error': 'Failed to open video file'}
|
| 603 |
+
|
| 604 |
+
# Video properties
|
| 605 |
+
fps = int(cap.get(cv.CAP_PROP_FPS)) or 30
|
| 606 |
+
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
|
| 607 |
+
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
|
| 608 |
+
total_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
|
| 609 |
+
|
| 610 |
+
# Initialize video writer if output requested
|
| 611 |
+
writer = None
|
| 612 |
+
if output_path:
|
| 613 |
+
fourcc = cv.VideoWriter_fourcc(*'mp4v')
|
| 614 |
+
writer = cv.VideoWriter(output_path, fourcc, fps, (width, height))
|
| 615 |
+
|
| 616 |
+
# Initialize counters
|
| 617 |
+
frame_count = 0
|
| 618 |
+
gaze_away_frames = 0
|
| 619 |
+
blink_count = 0
|
| 620 |
+
position_counts = {'CENTER': 0, 'LEFT': 0, 'RIGHT': 0, 'UNKNOWN': 0}
|
| 621 |
+
eye_size_counts = {'SMALL': 0, 'MEDIUM': 0, 'LARGE': 0}
|
| 622 |
+
|
| 623 |
+
prev_blink = False
|
| 624 |
+
|
| 625 |
+
# Process frames
|
| 626 |
+
while True:
|
| 627 |
+
ret, frame = cap.read()
|
| 628 |
+
if not ret:
|
| 629 |
+
break
|
| 630 |
+
|
| 631 |
+
frame_count += 1
|
| 632 |
+
|
| 633 |
+
# Process frame
|
| 634 |
+
result = self.tracker.process_frame(frame)
|
| 635 |
+
|
| 636 |
+
# Update counters
|
| 637 |
+
position = result['gaze_position']
|
| 638 |
+
position_counts[position] += 1
|
| 639 |
+
|
| 640 |
+
if position not in ['CENTER', 'UNKNOWN']:
|
| 641 |
+
gaze_away_frames += 1
|
| 642 |
+
|
| 643 |
+
# Count blinks (transition from non-blink to blink)
|
| 644 |
+
if result['blink_detected'] and not prev_blink:
|
| 645 |
+
blink_count += 1
|
| 646 |
+
prev_blink = result['blink_detected']
|
| 647 |
+
|
| 648 |
+
# Track eye size
|
| 649 |
+
if result['right_eye']['data']:
|
| 650 |
+
eye_size = result['right_eye']['data'].get('eye_size', 'UNKNOWN')
|
| 651 |
+
if eye_size in eye_size_counts:
|
| 652 |
+
eye_size_counts[eye_size] += 1
|
| 653 |
+
|
| 654 |
+
# Annotate frame if output requested
|
| 655 |
+
if writer:
|
| 656 |
+
# Add annotations here if needed
|
| 657 |
+
writer.write(frame)
|
| 658 |
+
|
| 659 |
+
# Progress callback
|
| 660 |
+
if progress_callback and frame_count % 10 == 0:
|
| 661 |
+
progress_callback(frame_count, total_frames, position)
|
| 662 |
+
|
| 663 |
+
# Cleanup
|
| 664 |
+
cap.release()
|
| 665 |
+
if writer:
|
| 666 |
+
writer.release()
|
| 667 |
+
|
| 668 |
+
# Calculate metrics
|
| 669 |
+
duration = frame_count / fps
|
| 670 |
+
gaze_away_time = gaze_away_frames / fps
|
| 671 |
+
score, rating = self.calculate_score(gaze_away_time)
|
| 672 |
+
|
| 673 |
+
# Generate report
|
| 674 |
+
report = {
|
| 675 |
+
'success': True,
|
| 676 |
+
'video_info': {
|
| 677 |
+
'path': video_path,
|
| 678 |
+
'duration': round(duration, 2),
|
| 679 |
+
'fps': fps,
|
| 680 |
+
'resolution': f"{width}x{height}",
|
| 681 |
+
'total_frames': frame_count
|
| 682 |
+
},
|
| 683 |
+
'eye_contact_analysis': {
|
| 684 |
+
'total_gaze_away_time': round(gaze_away_time, 2),
|
| 685 |
+
'gaze_away_percentage': round((gaze_away_time / duration) * 100, 2),
|
| 686 |
+
'score': score,
|
| 687 |
+
'rating': rating,
|
| 688 |
+
'position_distribution': {
|
| 689 |
+
k: {
|
| 690 |
+
'frames': v,
|
| 691 |
+
'percentage': round((v / frame_count) * 100, 2)
|
| 692 |
+
}
|
| 693 |
+
for k, v in position_counts.items()
|
| 694 |
+
}
|
| 695 |
+
},
|
| 696 |
+
'blink_analysis': {
|
| 697 |
+
'total_blinks': blink_count,
|
| 698 |
+
'blinks_per_minute': round((blink_count / duration) * 60, 2)
|
| 699 |
+
},
|
| 700 |
+
'eye_size_distribution': {
|
| 701 |
+
k: {
|
| 702 |
+
'frames': v,
|
| 703 |
+
'percentage': round((v / sum(eye_size_counts.values())) * 100, 2) if sum(eye_size_counts.values()) > 0 else 0
|
| 704 |
+
}
|
| 705 |
+
for k, v in eye_size_counts.items()
|
| 706 |
+
},
|
| 707 |
+
'timestamp': datetime.now().isoformat()
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
logger.info(f"Video analysis completed: {score}/5 - {rating}")
|
| 711 |
+
return report
|
| 712 |
+
|
| 713 |
+
def save_report(self, report: Dict, output_path: str):
|
| 714 |
+
"""Save report to JSON file"""
|
| 715 |
+
try:
|
| 716 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 717 |
+
json.dump(report, f, indent=2, ensure_ascii=False)
|
| 718 |
+
logger.info(f"Report saved to: {output_path}")
|
| 719 |
+
except Exception as e:
|
| 720 |
+
logger.error(f"Error saving report: {e}")
|
| 721 |
+
|
| 722 |
+
|
| 723 |
+
# ===== API FUNCTIONS FOR INTEGRATION =====
|
| 724 |
+
|
| 725 |
+
def analyze_video_file(
|
| 726 |
+
video_path: str,
|
| 727 |
+
output_video_path: Optional[str] = None,
|
| 728 |
+
output_report_path: Optional[str] = None,
|
| 729 |
+
progress_callback: Optional[callable] = None
|
| 730 |
+
) -> Dict:
|
| 731 |
+
"""
|
| 732 |
+
Main API function untuk analisis video
|
| 733 |
+
|
| 734 |
+
Args:
|
| 735 |
+
video_path: Path ke video input
|
| 736 |
+
output_video_path: Optional path untuk video output dengan anotasi
|
| 737 |
+
output_report_path: Optional path untuk JSON report
|
| 738 |
+
progress_callback: Optional callback untuk progress update
|
| 739 |
+
|
| 740 |
+
Returns:
|
| 741 |
+
Dictionary berisi hasil analisis
|
| 742 |
+
|
| 743 |
+
Example:
|
| 744 |
+
>>> result = analyze_video_file('video.mp4', 'output.mp4', 'report.json')
|
| 745 |
+
>>> print(f"Score: {result['eye_contact_analysis']['score']}/5")
|
| 746 |
+
"""
|
| 747 |
+
try:
|
| 748 |
+
analyzer = VideoAnalyzer()
|
| 749 |
+
report = analyzer.analyze_video(
|
| 750 |
+
video_path,
|
| 751 |
+
output_video_path,
|
| 752 |
+
progress_callback
|
| 753 |
+
)
|
| 754 |
+
|
| 755 |
+
if output_report_path and report.get('success'):
|
| 756 |
+
analyzer.save_report(report, output_report_path)
|
| 757 |
+
|
| 758 |
+
return report
|
| 759 |
+
|
| 760 |
+
except Exception as e:
|
| 761 |
+
logger.error(f"Error in analyze_video_file: {e}")
|
| 762 |
+
return {
|
| 763 |
+
'success': False,
|
| 764 |
+
'error': str(e)
|
| 765 |
+
}
|
| 766 |
+
|
| 767 |
+
|
| 768 |
+
def analyze_frame(frame: np.ndarray, tracker: Optional[EyeTracker] = None) -> Dict:
|
| 769 |
+
"""
|
| 770 |
+
Analyze single frame (untuk real-time processing)
|
| 771 |
+
|
| 772 |
+
Args:
|
| 773 |
+
frame: Frame image (numpy array)
|
| 774 |
+
tracker: Optional EyeTracker instance (untuk reuse)
|
| 775 |
+
|
| 776 |
+
Returns:
|
| 777 |
+
Dictionary berisi hasil analisis frame
|
| 778 |
+
|
| 779 |
+
Example:
|
| 780 |
+
>>> tracker = EyeTracker()
|
| 781 |
+
>>> cap = cv.VideoCapture(0)
|
| 782 |
+
>>> ret, frame = cap.read()
|
| 783 |
+
>>> result = analyze_frame(frame, tracker)
|
| 784 |
+
>>> print(result['gaze_position'])
|
| 785 |
+
"""
|
| 786 |
+
try:
|
| 787 |
+
if tracker is None:
|
| 788 |
+
tracker = EyeTracker()
|
| 789 |
+
|
| 790 |
+
return tracker.process_frame(frame)
|
| 791 |
+
|
| 792 |
+
except Exception as e:
|
| 793 |
+
logger.error(f"Error in analyze_frame: {e}")
|
| 794 |
+
return {
|
| 795 |
+
'error': str(e),
|
| 796 |
+
'gaze_position': 'UNKNOWN'
|
| 797 |
+
}
|
| 798 |
+
|
| 799 |
+
|
| 800 |
+
# ===== EXAMPLE USAGE =====
|
| 801 |
+
if __name__ == "__main__":
|
| 802 |
+
import sys
|
| 803 |
+
|
| 804 |
+
print("=" * 70)
|
| 805 |
+
print("SWARA - Eye Tracking Module (Production)")
|
| 806 |
+
print("=" * 70)
|
| 807 |
+
|
| 808 |
+
if len(sys.argv) < 2:
|
| 809 |
+
print("\nUsage:")
|
| 810 |
+
print(" python swara_eye_tracking.py <video_path> [output_video] [output_report]")
|
| 811 |
+
print("\nExample:")
|
| 812 |
+
print(" python swara_eye_tracking.py input.mp4")
|
| 813 |
+
print(" python swara_eye_tracking.py input.mp4 output.mp4 report.json")
|
| 814 |
+
sys.exit(1)
|
| 815 |
+
|
| 816 |
+
video_path = sys.argv[1]
|
| 817 |
+
output_video = sys.argv[2] if len(sys.argv) > 2 else None
|
| 818 |
+
output_report = sys.argv[3] if len(sys.argv) > 3 else None
|
| 819 |
+
|
| 820 |
+
# Progress callback
|
| 821 |
+
def progress(current, total, status):
|
| 822 |
+
percent = (current / total) * 100
|
| 823 |
+
print(f"\rProgress: {current}/{total} ({percent:.1f}%) - Status: {status}", end='')
|
| 824 |
+
|
| 825 |
+
print(f"\nπΉ Processing video: {video_path}")
|
| 826 |
+
print("-" * 70)
|
| 827 |
+
|
| 828 |
+
# Analyze
|
| 829 |
+
result = analyze_video_file(
|
| 830 |
+
video_path,
|
| 831 |
+
output_video,
|
| 832 |
+
output_report,
|
| 833 |
+
progress
|
| 834 |
+
)
|
| 835 |
+
|
| 836 |
+
print("\n")
|
| 837 |
+
|
| 838 |
+
if result.get('success'):
|
| 839 |
+
print("\n" + "=" * 70)
|
| 840 |
+
print("π HASIL ANALISIS")
|
| 841 |
+
print("=" * 70)
|
| 842 |
+
|
| 843 |
+
# Video info
|
| 844 |
+
print(f"\nπΉ Video Info:")
|
| 845 |
+
print(f" Duration: {result['video_info']['duration']} seconds")
|
| 846 |
+
print(f" FPS: {result['video_info']['fps']}")
|
| 847 |
+
print(f" Resolution: {result['video_info']['resolution']}")
|
| 848 |
+
|
| 849 |
+
# Eye contact
|
| 850 |
+
ec = result['eye_contact_analysis']
|
| 851 |
+
print(f"\nποΈ Eye Contact:")
|
| 852 |
+
print(f" Score: {ec['score']}/5 - {ec['rating']}")
|
| 853 |
+
print(f" Gaze Away Time: {ec['total_gaze_away_time']}s ({ec['gaze_away_percentage']}%)")
|
| 854 |
+
print(f"\n Position Distribution:")
|
| 855 |
+
for pos, data in ec['position_distribution'].items():
|
| 856 |
+
print(f" {pos}: {data['frames']} frames ({data['percentage']}%)")
|
| 857 |
+
|
| 858 |
+
# Blink
|
| 859 |
+
blink = result['blink_analysis']
|
| 860 |
+
print(f"\nποΈ Blink Analysis:")
|
| 861 |
+
print(f" Total Blinks: {blink['total_blinks']}")
|
| 862 |
+
print(f" Blinks/Minute: {blink['blinks_per_minute']}")
|
| 863 |
+
|
| 864 |
+
print("\n" + "=" * 70)
|
| 865 |
+
|
| 866 |
+
if output_report:
|
| 867 |
+
print(f"\nβ
Report saved to: {output_report}")
|
| 868 |
+
if output_video:
|
| 869 |
+
print(f"β
Video saved to: {output_video}")
|
| 870 |
+
|
| 871 |
+
else:
|
| 872 |
+
print(f"\nβ Error: {result.get('error', 'Unknown error')}")
|
| 873 |
+
sys.exit(1)
|
app/services/facial_expression.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Facial Expression Service
|
| 3 |
+
|
| 4 |
+
Refactored from facial_expression.py for production use.
|
| 5 |
+
Detects facial expressions using YOLO model
|
| 6 |
+
"""
|
| 7 |
+
import cv2
|
| 8 |
+
import numpy as np
|
| 9 |
+
from typing import Dict, Any
|
| 10 |
+
from loguru import logger
|
| 11 |
+
from ultralytics import YOLO
|
| 12 |
+
|
| 13 |
+
from app.config import settings
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class FacialExpressionService:
|
| 17 |
+
"""
|
| 18 |
+
Facial Expression Service for SWARA API
|
| 19 |
+
|
| 20 |
+
Analyzes facial expressions in videos using YOLO model
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
# Class variable for singleton pattern
|
| 24 |
+
_model = None
|
| 25 |
+
|
| 26 |
+
def __init__(self):
|
| 27 |
+
"""Initialize service and load model"""
|
| 28 |
+
if FacialExpressionService._model is None:
|
| 29 |
+
self._load_model()
|
| 30 |
+
|
| 31 |
+
def _load_model(self):
|
| 32 |
+
"""Load YOLO model (called once)"""
|
| 33 |
+
try:
|
| 34 |
+
logger.info("Loading Facial Expression YOLO model...")
|
| 35 |
+
model_path = settings.FACIAL_EXPRESSION_MODEL
|
| 36 |
+
FacialExpressionService._model = YOLO(model_path)
|
| 37 |
+
logger.info(f"β Facial Expression model loaded from {model_path}")
|
| 38 |
+
except Exception as e:
|
| 39 |
+
logger.error(f"β Failed to load Facial Expression model: {e}")
|
| 40 |
+
raise
|
| 41 |
+
|
| 42 |
+
def analyze_video(self, video_path: str) -> Dict[str, Any]:
|
| 43 |
+
"""
|
| 44 |
+
Analyze video for facial expressions
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
video_path: Path to video file
|
| 48 |
+
|
| 49 |
+
Returns:
|
| 50 |
+
Dict containing facial expression analysis results
|
| 51 |
+
"""
|
| 52 |
+
try:
|
| 53 |
+
logger.info(f"Analyzing video with Facial Expression Service: {video_path}")
|
| 54 |
+
|
| 55 |
+
# Open video
|
| 56 |
+
cap = cv2.VideoCapture(video_path)
|
| 57 |
+
if not cap.isOpened():
|
| 58 |
+
raise ValueError(f"Cannot open video: {video_path}")
|
| 59 |
+
|
| 60 |
+
# Get video properties
|
| 61 |
+
fps = int(cap.get(cv2.CAP_PROP_FPS))
|
| 62 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 63 |
+
|
| 64 |
+
# Data storage
|
| 65 |
+
frame_data = []
|
| 66 |
+
frame_number = 0
|
| 67 |
+
|
| 68 |
+
# Process each frame
|
| 69 |
+
while True:
|
| 70 |
+
ret, frame = cap.read()
|
| 71 |
+
if not ret:
|
| 72 |
+
break
|
| 73 |
+
|
| 74 |
+
frame_number += 1
|
| 75 |
+
timestamp_start = (frame_number - 1) / fps
|
| 76 |
+
timestamp_end = frame_number / fps
|
| 77 |
+
|
| 78 |
+
# Convert to grayscale and back to 3 channels
|
| 79 |
+
gray_image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
| 80 |
+
gray_image_3d = cv2.merge([gray_image, gray_image, gray_image])
|
| 81 |
+
|
| 82 |
+
# Run YOLO inference
|
| 83 |
+
results = self._model(gray_image_3d, verbose=False)
|
| 84 |
+
result = results[0]
|
| 85 |
+
|
| 86 |
+
# Get detections
|
| 87 |
+
if len(result.boxes) > 0:
|
| 88 |
+
# Get all detections
|
| 89 |
+
boxes = result.boxes.xyxy.cpu().numpy()
|
| 90 |
+
confidences = result.boxes.conf.cpu().numpy()
|
| 91 |
+
classes = result.boxes.cls.cpu().numpy()
|
| 92 |
+
|
| 93 |
+
# Find detection with highest confidence
|
| 94 |
+
max_conf_idx = np.argmax(confidences)
|
| 95 |
+
|
| 96 |
+
# Get data for highest confidence detection
|
| 97 |
+
box = boxes[max_conf_idx]
|
| 98 |
+
confidence = confidences[max_conf_idx]
|
| 99 |
+
class_id = int(classes[max_conf_idx])
|
| 100 |
+
|
| 101 |
+
# Get class name
|
| 102 |
+
expression = self._model.names[class_id]
|
| 103 |
+
|
| 104 |
+
# Store frame data
|
| 105 |
+
frame_data.append({
|
| 106 |
+
'frame_number': frame_number,
|
| 107 |
+
'timestamp_start': round(timestamp_start, 3),
|
| 108 |
+
'timestamp_end': round(timestamp_end, 3),
|
| 109 |
+
'expression': expression,
|
| 110 |
+
'confidence': round(float(confidence), 4),
|
| 111 |
+
'bbox_x': int(box[0]),
|
| 112 |
+
'bbox_y': int(box[1]),
|
| 113 |
+
'bbox_width': int(box[2] - box[0]),
|
| 114 |
+
'bbox_height': int(box[3] - box[1])
|
| 115 |
+
})
|
| 116 |
+
else:
|
| 117 |
+
# No face detected
|
| 118 |
+
frame_data.append({
|
| 119 |
+
'frame_number': frame_number,
|
| 120 |
+
'timestamp_start': round(timestamp_start, 3),
|
| 121 |
+
'timestamp_end': round(timestamp_end, 3),
|
| 122 |
+
'expression': 'no_face',
|
| 123 |
+
'confidence': 0.0,
|
| 124 |
+
'bbox_x': 0,
|
| 125 |
+
'bbox_y': 0,
|
| 126 |
+
'bbox_width': 0,
|
| 127 |
+
'bbox_height': 0
|
| 128 |
+
})
|
| 129 |
+
|
| 130 |
+
cap.release()
|
| 131 |
+
|
| 132 |
+
logger.info(f"β Processed {frame_number} frames")
|
| 133 |
+
|
| 134 |
+
# Analyze expressions
|
| 135 |
+
df_faces = [f for f in frame_data if f['expression'] not in ['no_face', 'background']]
|
| 136 |
+
|
| 137 |
+
# Calculate expression distribution
|
| 138 |
+
if len(df_faces) > 0:
|
| 139 |
+
expression_counts = {}
|
| 140 |
+
for f in df_faces:
|
| 141 |
+
expr = f['expression']
|
| 142 |
+
expression_counts[expr] = expression_counts.get(expr, 0) + 1
|
| 143 |
+
|
| 144 |
+
total_face_frames = len(df_faces)
|
| 145 |
+
expression_distribution = {}
|
| 146 |
+
for expr, count in expression_counts.items():
|
| 147 |
+
percentage = (count / total_face_frames) * 100
|
| 148 |
+
expression_distribution[expr] = round(percentage, 2)
|
| 149 |
+
|
| 150 |
+
# Find dominant expression
|
| 151 |
+
dominant_expression = max(expression_counts.items(), key=lambda x: x[1])[0]
|
| 152 |
+
else:
|
| 153 |
+
expression_distribution = {}
|
| 154 |
+
dominant_expression = 'no_face'
|
| 155 |
+
|
| 156 |
+
# Opening smile analysis (first 10 seconds)
|
| 157 |
+
opening_frames = [f for f in frame_data if f['timestamp_start'] < 10.0]
|
| 158 |
+
opening_faces = [f for f in opening_frames if f['expression'] not in ['no_face', 'background']]
|
| 159 |
+
|
| 160 |
+
if len(opening_faces) > 0:
|
| 161 |
+
# Count happy expressions in opening
|
| 162 |
+
opening_happy_count = len([f for f in opening_faces if f['expression'].lower() == 'happy'])
|
| 163 |
+
opening_smile_percentage = (opening_happy_count / len(opening_faces)) * 100
|
| 164 |
+
opening_smile_detected = opening_smile_percentage > 50.0
|
| 165 |
+
|
| 166 |
+
# Get all expressions in opening period
|
| 167 |
+
opening_expressions = [f['expression'] for f in opening_faces]
|
| 168 |
+
|
| 169 |
+
# Count each expression in opening
|
| 170 |
+
opening_expr_counts = {}
|
| 171 |
+
for f in opening_faces:
|
| 172 |
+
expr = f['expression']
|
| 173 |
+
opening_expr_counts[expr] = opening_expr_counts.get(expr, 0) + 1
|
| 174 |
+
else:
|
| 175 |
+
opening_smile_percentage = 0.0
|
| 176 |
+
opening_smile_detected = False
|
| 177 |
+
opening_expressions = []
|
| 178 |
+
opening_expr_counts = {}
|
| 179 |
+
|
| 180 |
+
# Build result
|
| 181 |
+
result = {
|
| 182 |
+
'success': True,
|
| 183 |
+
'statistics_df': frame_data, # Raw frame data
|
| 184 |
+
'summary': {
|
| 185 |
+
'total_frames': total_frames,
|
| 186 |
+
'frames_with_face': len(df_faces),
|
| 187 |
+
'dominant_expression': dominant_expression,
|
| 188 |
+
'expression_distribution': expression_distribution,
|
| 189 |
+
'opening_smile_detected': opening_smile_detected,
|
| 190 |
+
'opening_period_expressions': opening_expressions,
|
| 191 |
+
'opening_smile_percentage': round(opening_smile_percentage, 2),
|
| 192 |
+
'opening_expression_counts': opening_expr_counts,
|
| 193 |
+
'video_duration_seconds': round(total_frames / fps, 2),
|
| 194 |
+
'fps': fps
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
logger.info(f"β Facial Expression analysis completed")
|
| 199 |
+
logger.info(f" Dominant expression: {dominant_expression}")
|
| 200 |
+
logger.info(f" Opening smile: {'YES' if opening_smile_detected else 'NO'} ({opening_smile_percentage:.1f}%)")
|
| 201 |
+
|
| 202 |
+
return result
|
| 203 |
+
|
| 204 |
+
except Exception as e:
|
| 205 |
+
logger.error(f"β Facial Expression analysis failed: {e}")
|
| 206 |
+
raise
|
app/services/gesture_detection.py
ADDED
|
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gesture Detection Service
|
| 3 |
+
|
| 4 |
+
Refactored from Colab notebook for production use.
|
| 5 |
+
Detects body gestures and movements using MediaPipe Pose.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import cv2
|
| 9 |
+
import numpy as np
|
| 10 |
+
import mediapipe as mp
|
| 11 |
+
from typing import Dict, Any, List, Optional, Tuple
|
| 12 |
+
from loguru import logger
|
| 13 |
+
from scipy.signal import savgol_filter
|
| 14 |
+
from collections import Counter
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class GestureConfig:
|
| 18 |
+
"""Configuration untuk gesture detection thresholds"""
|
| 19 |
+
|
| 20 |
+
# Movement thresholds (dalam pixel)
|
| 21 |
+
EXCESSIVE_MOVEMENT_THRESHOLD = 50 # pixel/frame
|
| 22 |
+
MINIMAL_MOVEMENT_THRESHOLD = 5 # pixel/frame
|
| 23 |
+
|
| 24 |
+
# Frequency thresholds (gestures per second)
|
| 25 |
+
HIGH_FREQUENCY = 3.0
|
| 26 |
+
LOW_FREQUENCY = 0.5
|
| 27 |
+
|
| 28 |
+
# Stability thresholds
|
| 29 |
+
JITTER_THRESHOLD = 15 # pixel variance
|
| 30 |
+
|
| 31 |
+
# Hand position zones (relative to body)
|
| 32 |
+
FRONT_ZONE_THRESHOLD = 0.15 # 15cm di depan bahu
|
| 33 |
+
|
| 34 |
+
# Landmark indices
|
| 35 |
+
SHOULDER_LEFT = 11
|
| 36 |
+
SHOULDER_RIGHT = 12
|
| 37 |
+
ELBOW_LEFT = 13
|
| 38 |
+
ELBOW_RIGHT = 14
|
| 39 |
+
WRIST_LEFT = 15
|
| 40 |
+
WRIST_RIGHT = 16
|
| 41 |
+
HIP_LEFT = 23
|
| 42 |
+
HIP_RIGHT = 24
|
| 43 |
+
NOSE = 0
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class GestureDetectionService:
|
| 47 |
+
"""
|
| 48 |
+
Gesture Detection Service for SWARA API
|
| 49 |
+
|
| 50 |
+
Analyzes hand movements, body stability, and gesture patterns
|
| 51 |
+
using MediaPipe Pose landmarks.
|
| 52 |
+
"""
|
| 53 |
+
|
| 54 |
+
_instance = None
|
| 55 |
+
_pose = None
|
| 56 |
+
|
| 57 |
+
def __new__(cls):
|
| 58 |
+
"""Singleton pattern to avoid reloading MediaPipe multiple times"""
|
| 59 |
+
if cls._instance is None:
|
| 60 |
+
cls._instance = super().__new__(cls)
|
| 61 |
+
cls._pose = mp.solutions.pose.Pose(
|
| 62 |
+
static_image_mode=False,
|
| 63 |
+
model_complexity=1,
|
| 64 |
+
smooth_landmarks=True,
|
| 65 |
+
min_detection_confidence=0.5,
|
| 66 |
+
min_tracking_confidence=0.5
|
| 67 |
+
)
|
| 68 |
+
logger.info("GestureDetectionService initialized with MediaPipe Pose")
|
| 69 |
+
return cls._instance
|
| 70 |
+
|
| 71 |
+
def __init__(self):
|
| 72 |
+
"""Initialize service (called after __new__)"""
|
| 73 |
+
self.config = GestureConfig()
|
| 74 |
+
|
| 75 |
+
def analyze_video(
|
| 76 |
+
self,
|
| 77 |
+
video_path: str,
|
| 78 |
+
progress_callback: Optional[callable] = None
|
| 79 |
+
) -> Dict[str, Any]:
|
| 80 |
+
"""
|
| 81 |
+
Analyze gestures in a video file
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
video_path: Path to video file
|
| 85 |
+
progress_callback: Optional callback function(current, total, message)
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
Dictionary containing gesture analysis results
|
| 89 |
+
"""
|
| 90 |
+
try:
|
| 91 |
+
logger.info(f"Starting gesture analysis for: {video_path}")
|
| 92 |
+
|
| 93 |
+
cap = cv2.VideoCapture(video_path)
|
| 94 |
+
if not cap.isOpened():
|
| 95 |
+
raise ValueError(f"Cannot open video file: {video_path}")
|
| 96 |
+
|
| 97 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 98 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 99 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 100 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 101 |
+
|
| 102 |
+
logger.info(f"Video Info: {width}x{height} @ {fps}FPS, Total frames: {total_frames}")
|
| 103 |
+
|
| 104 |
+
# Data storage
|
| 105 |
+
frame_data = []
|
| 106 |
+
frame_count = 0
|
| 107 |
+
prev_landmarks = None
|
| 108 |
+
|
| 109 |
+
while True:
|
| 110 |
+
ret, frame = cap.read()
|
| 111 |
+
if not ret:
|
| 112 |
+
break
|
| 113 |
+
|
| 114 |
+
frame_count += 1
|
| 115 |
+
|
| 116 |
+
# Progress callback
|
| 117 |
+
if progress_callback and frame_count % 30 == 0:
|
| 118 |
+
progress = int((frame_count / total_frames) * 100)
|
| 119 |
+
progress_callback(frame_count, total_frames, f"Processing gestures: {progress}%")
|
| 120 |
+
|
| 121 |
+
# Convert to RGB for MediaPipe
|
| 122 |
+
rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 123 |
+
results = self._pose.process(rgb_frame)
|
| 124 |
+
|
| 125 |
+
# Initialize frame metrics
|
| 126 |
+
frame_metrics = {
|
| 127 |
+
'frame_number': frame_count,
|
| 128 |
+
'timestamp_start': (frame_count - 1) / fps,
|
| 129 |
+
'timestamp_end': frame_count / fps,
|
| 130 |
+
'pose_detected': False,
|
| 131 |
+
'left_hand_movement': 0.0,
|
| 132 |
+
'right_hand_movement': 0.0,
|
| 133 |
+
'body_movement': 0.0,
|
| 134 |
+
'left_hand_position': 'unknown',
|
| 135 |
+
'right_hand_position': 'unknown'
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
if results.pose_landmarks:
|
| 139 |
+
frame_metrics['pose_detected'] = True
|
| 140 |
+
landmarks = results.pose_landmarks.landmark
|
| 141 |
+
|
| 142 |
+
# Get key landmarks
|
| 143 |
+
l_wrist = self._get_landmark_coords(landmarks, self.config.WRIST_LEFT, width, height)
|
| 144 |
+
r_wrist = self._get_landmark_coords(landmarks, self.config.WRIST_RIGHT, width, height)
|
| 145 |
+
l_shoulder = self._get_landmark_coords(landmarks, self.config.SHOULDER_LEFT, width, height)
|
| 146 |
+
r_shoulder = self._get_landmark_coords(landmarks, self.config.SHOULDER_RIGHT, width, height)
|
| 147 |
+
|
| 148 |
+
# Calculate movements if previous frame exists
|
| 149 |
+
if prev_landmarks is not None:
|
| 150 |
+
if l_wrist and prev_landmarks.get('l_wrist'):
|
| 151 |
+
frame_metrics['left_hand_movement'] = self._calculate_movement_speed(
|
| 152 |
+
prev_landmarks['l_wrist'], l_wrist
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
if r_wrist and prev_landmarks.get('r_wrist'):
|
| 156 |
+
frame_metrics['right_hand_movement'] = self._calculate_movement_speed(
|
| 157 |
+
prev_landmarks['r_wrist'], r_wrist
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# Body movement (center of shoulders)
|
| 161 |
+
if l_shoulder and r_shoulder and prev_landmarks.get('shoulder_center'):
|
| 162 |
+
shoulder_center = (
|
| 163 |
+
(l_shoulder[0] + r_shoulder[0]) / 2,
|
| 164 |
+
(l_shoulder[1] + r_shoulder[1]) / 2
|
| 165 |
+
)
|
| 166 |
+
frame_metrics['body_movement'] = self._calculate_movement_speed(
|
| 167 |
+
prev_landmarks['shoulder_center'], shoulder_center
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
# Determine hand positions (front/side/back)
|
| 171 |
+
if l_wrist and l_shoulder:
|
| 172 |
+
if l_wrist[0] < l_shoulder[0] - width * 0.05:
|
| 173 |
+
frame_metrics['left_hand_position'] = 'front'
|
| 174 |
+
elif l_wrist[0] > l_shoulder[0] + width * 0.05:
|
| 175 |
+
frame_metrics['left_hand_position'] = 'back'
|
| 176 |
+
else:
|
| 177 |
+
frame_metrics['left_hand_position'] = 'side'
|
| 178 |
+
|
| 179 |
+
if r_wrist and r_shoulder:
|
| 180 |
+
if r_wrist[0] > r_shoulder[0] + width * 0.05:
|
| 181 |
+
frame_metrics['right_hand_position'] = 'front'
|
| 182 |
+
elif r_wrist[0] < r_shoulder[0] - width * 0.05:
|
| 183 |
+
frame_metrics['right_hand_position'] = 'back'
|
| 184 |
+
else:
|
| 185 |
+
frame_metrics['right_hand_position'] = 'side'
|
| 186 |
+
|
| 187 |
+
# Store current landmarks for next frame
|
| 188 |
+
prev_landmarks = {
|
| 189 |
+
'l_wrist': l_wrist,
|
| 190 |
+
'r_wrist': r_wrist,
|
| 191 |
+
'l_shoulder': l_shoulder,
|
| 192 |
+
'r_shoulder': r_shoulder,
|
| 193 |
+
'shoulder_center': (
|
| 194 |
+
(l_shoulder[0] + r_shoulder[0]) / 2,
|
| 195 |
+
(l_shoulder[1] + r_shoulder[1]) / 2
|
| 196 |
+
) if l_shoulder and r_shoulder else None
|
| 197 |
+
}
|
| 198 |
+
else:
|
| 199 |
+
prev_landmarks = None
|
| 200 |
+
|
| 201 |
+
frame_data.append(frame_metrics)
|
| 202 |
+
|
| 203 |
+
cap.release()
|
| 204 |
+
|
| 205 |
+
if not frame_data:
|
| 206 |
+
logger.warning("No frames processed")
|
| 207 |
+
return self._create_empty_result("No frames processed")
|
| 208 |
+
|
| 209 |
+
# Filter frames with detected pose
|
| 210 |
+
pose_frames = [f for f in frame_data if f['pose_detected']]
|
| 211 |
+
|
| 212 |
+
if len(pose_frames) < 10:
|
| 213 |
+
logger.warning(f"Insufficient pose landmarks detected: {len(pose_frames)} frames")
|
| 214 |
+
return self._create_empty_result("Insufficient pose data")
|
| 215 |
+
|
| 216 |
+
logger.info(f"Frames with pose detected: {len(pose_frames)} / {len(frame_data)} ({len(pose_frames)/len(frame_data)*100:.1f}%)")
|
| 217 |
+
|
| 218 |
+
# Analyze gestures
|
| 219 |
+
analysis_result = self._analyze_gestures(pose_frames, fps, total_frames)
|
| 220 |
+
|
| 221 |
+
logger.info(f"Gesture analysis complete: Score {analysis_result['gesture_analysis']['movement_score']:.1f}/10")
|
| 222 |
+
return analysis_result
|
| 223 |
+
|
| 224 |
+
except Exception as e:
|
| 225 |
+
logger.error(f"Error in gesture analysis: {str(e)}")
|
| 226 |
+
raise
|
| 227 |
+
|
| 228 |
+
def _get_landmark_coords(
|
| 229 |
+
self,
|
| 230 |
+
landmarks: Any,
|
| 231 |
+
idx: int,
|
| 232 |
+
width: int,
|
| 233 |
+
height: int
|
| 234 |
+
) -> Optional[Tuple[int, int, float]]:
|
| 235 |
+
"""Get landmark coordinates in pixel space with visibility"""
|
| 236 |
+
if landmarks:
|
| 237 |
+
lm = landmarks[idx]
|
| 238 |
+
return (int(lm.x * width), int(lm.y * height), lm.visibility)
|
| 239 |
+
return None
|
| 240 |
+
|
| 241 |
+
def _calculate_movement_speed(
|
| 242 |
+
self,
|
| 243 |
+
prev_point: Tuple,
|
| 244 |
+
curr_point: Tuple
|
| 245 |
+
) -> float:
|
| 246 |
+
"""Calculate movement speed between frames"""
|
| 247 |
+
if prev_point is None or curr_point is None:
|
| 248 |
+
return 0.0
|
| 249 |
+
return np.sqrt(
|
| 250 |
+
(curr_point[0] - prev_point[0])**2 +
|
| 251 |
+
(curr_point[1] - prev_point[1])**2
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
def _smooth_data(self, data: List[float], window_size: int = 5) -> np.ndarray:
|
| 255 |
+
"""Smooth data using Savitzky-Golay filter"""
|
| 256 |
+
if len(data) < window_size:
|
| 257 |
+
return np.array(data)
|
| 258 |
+
try:
|
| 259 |
+
return savgol_filter(data, window_size, 2)
|
| 260 |
+
except:
|
| 261 |
+
return np.array(data)
|
| 262 |
+
|
| 263 |
+
def _analyze_gestures(
|
| 264 |
+
self,
|
| 265 |
+
pose_frames: List[Dict],
|
| 266 |
+
fps: float,
|
| 267 |
+
total_frames: int
|
| 268 |
+
) -> Dict[str, Any]:
|
| 269 |
+
"""Analyze gesture patterns and calculate scores"""
|
| 270 |
+
|
| 271 |
+
# Extract movement data
|
| 272 |
+
left_hand_movements = [f['left_hand_movement'] for f in pose_frames]
|
| 273 |
+
right_hand_movements = [f['right_hand_movement'] for f in pose_frames]
|
| 274 |
+
body_movements = [f['body_movement'] for f in pose_frames]
|
| 275 |
+
|
| 276 |
+
# Calculate statistics
|
| 277 |
+
avg_left_hand_speed = np.mean(left_hand_movements)
|
| 278 |
+
avg_right_hand_speed = np.mean(right_hand_movements)
|
| 279 |
+
avg_hand_speed = (avg_left_hand_speed + avg_right_hand_speed) / 2
|
| 280 |
+
|
| 281 |
+
max_left_hand_speed = np.max(left_hand_movements)
|
| 282 |
+
max_right_hand_speed = np.max(right_hand_movements)
|
| 283 |
+
max_hand_speed = max(max_left_hand_speed, max_right_hand_speed)
|
| 284 |
+
|
| 285 |
+
avg_body_movement = np.mean(body_movements)
|
| 286 |
+
max_body_movement = np.max(body_movements)
|
| 287 |
+
|
| 288 |
+
# Hand activity percentage
|
| 289 |
+
active_frames = [
|
| 290 |
+
f for f in pose_frames
|
| 291 |
+
if f['left_hand_movement'] > self.config.MINIMAL_MOVEMENT_THRESHOLD or
|
| 292 |
+
f['right_hand_movement'] > self.config.MINIMAL_MOVEMENT_THRESHOLD
|
| 293 |
+
]
|
| 294 |
+
hand_activity_percentage = (len(active_frames) / len(pose_frames)) * 100
|
| 295 |
+
|
| 296 |
+
# Gesture frequency (peak detection)
|
| 297 |
+
combined_movement = [
|
| 298 |
+
left_hand_movements[i] + right_hand_movements[i]
|
| 299 |
+
for i in range(len(left_hand_movements))
|
| 300 |
+
]
|
| 301 |
+
smooth_movement = self._smooth_data(combined_movement)
|
| 302 |
+
|
| 303 |
+
peaks = 0
|
| 304 |
+
threshold = self.config.MINIMAL_MOVEMENT_THRESHOLD * 2
|
| 305 |
+
for i in range(1, len(smooth_movement) - 1):
|
| 306 |
+
if (smooth_movement[i] > threshold and
|
| 307 |
+
smooth_movement[i] > smooth_movement[i-1] and
|
| 308 |
+
smooth_movement[i] > smooth_movement[i+1]):
|
| 309 |
+
peaks += 1
|
| 310 |
+
|
| 311 |
+
video_duration = total_frames / fps
|
| 312 |
+
gesture_frequency = peaks / video_duration if video_duration > 0 else 0
|
| 313 |
+
|
| 314 |
+
# Body stability
|
| 315 |
+
body_movement_variance = np.var(body_movements)
|
| 316 |
+
if body_movement_variance < self.config.JITTER_THRESHOLD:
|
| 317 |
+
jitter_level = 'low'
|
| 318 |
+
elif body_movement_variance < self.config.JITTER_THRESHOLD * 2:
|
| 319 |
+
jitter_level = 'medium'
|
| 320 |
+
else:
|
| 321 |
+
jitter_level = 'high'
|
| 322 |
+
|
| 323 |
+
# Hand position distribution
|
| 324 |
+
hand_positions = []
|
| 325 |
+
for f in pose_frames:
|
| 326 |
+
if f['left_hand_position'] != 'unknown':
|
| 327 |
+
hand_positions.append(f['left_hand_position'])
|
| 328 |
+
if f['right_hand_position'] != 'unknown':
|
| 329 |
+
hand_positions.append(f['right_hand_position'])
|
| 330 |
+
|
| 331 |
+
if hand_positions:
|
| 332 |
+
pos_counts = Counter(hand_positions)
|
| 333 |
+
total_pos = len(hand_positions)
|
| 334 |
+
hand_position_dist = {
|
| 335 |
+
'front': (pos_counts.get('front', 0) / total_pos) * 100,
|
| 336 |
+
'side': (pos_counts.get('side', 0) / total_pos) * 100,
|
| 337 |
+
'back': (pos_counts.get('back', 0) / total_pos) * 100
|
| 338 |
+
}
|
| 339 |
+
else:
|
| 340 |
+
hand_position_dist = {'front': 0.0, 'side': 0.0, 'back': 0.0}
|
| 341 |
+
|
| 342 |
+
# Calculate movement score
|
| 343 |
+
movement_score = self._calculate_movement_score(
|
| 344 |
+
avg_hand_speed, max_hand_speed, gesture_frequency,
|
| 345 |
+
body_movement_variance, jitter_level, hand_activity_percentage,
|
| 346 |
+
hand_position_dist
|
| 347 |
+
)
|
| 348 |
+
|
| 349 |
+
# Movement category
|
| 350 |
+
if (avg_hand_speed > self.config.EXCESSIVE_MOVEMENT_THRESHOLD or
|
| 351 |
+
gesture_frequency > self.config.HIGH_FREQUENCY or
|
| 352 |
+
hand_activity_percentage > 80):
|
| 353 |
+
movement_category = 'excessive'
|
| 354 |
+
elif (avg_hand_speed < self.config.MINIMAL_MOVEMENT_THRESHOLD or
|
| 355 |
+
gesture_frequency < self.config.LOW_FREQUENCY or
|
| 356 |
+
hand_activity_percentage < 35):
|
| 357 |
+
movement_category = 'minimal'
|
| 358 |
+
else:
|
| 359 |
+
movement_category = 'balanced'
|
| 360 |
+
|
| 361 |
+
# Body stability score
|
| 362 |
+
if jitter_level == 'low':
|
| 363 |
+
body_stability_score = 9.0
|
| 364 |
+
elif jitter_level == 'medium':
|
| 365 |
+
body_stability_score = 6.0
|
| 366 |
+
else:
|
| 367 |
+
body_stability_score = 3.0
|
| 368 |
+
|
| 369 |
+
if avg_body_movement > 20:
|
| 370 |
+
body_stability_score -= 2.0
|
| 371 |
+
body_stability_score = max(0, min(10, body_stability_score))
|
| 372 |
+
|
| 373 |
+
# Detect nervous gestures
|
| 374 |
+
nervous_gestures_detected = (
|
| 375 |
+
gesture_frequency > self.config.HIGH_FREQUENCY or
|
| 376 |
+
jitter_level == 'high' or
|
| 377 |
+
hand_activity_percentage > 85 or
|
| 378 |
+
max_hand_speed > 300
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
# Generate recommendations
|
| 382 |
+
recommendations = self._generate_recommendations(
|
| 383 |
+
gesture_frequency, hand_position_dist, max_hand_speed,
|
| 384 |
+
hand_activity_percentage, jitter_level, avg_hand_speed,
|
| 385 |
+
movement_score
|
| 386 |
+
)
|
| 387 |
+
|
| 388 |
+
# Log analysis
|
| 389 |
+
logger.info(f"Movement Metrics - Avg Speed: {avg_hand_speed:.2f}px, "
|
| 390 |
+
f"Frequency: {gesture_frequency:.2f}/s, "
|
| 391 |
+
f"Activity: {hand_activity_percentage:.1f}%, "
|
| 392 |
+
f"Stability: {jitter_level}")
|
| 393 |
+
|
| 394 |
+
return {
|
| 395 |
+
'gesture_analysis': {
|
| 396 |
+
'movement_score': round(movement_score, 1),
|
| 397 |
+
'movement_category': movement_category,
|
| 398 |
+
'gesture_frequency': round(gesture_frequency, 2),
|
| 399 |
+
'hand_activity_percentage': round(hand_activity_percentage, 1),
|
| 400 |
+
'body_stability_score': round(body_stability_score, 1),
|
| 401 |
+
'nervous_gestures_detected': nervous_gestures_detected,
|
| 402 |
+
'recommendations': recommendations,
|
| 403 |
+
'detailed_metrics': {
|
| 404 |
+
'avg_hand_movement_speed': round(avg_hand_speed, 2),
|
| 405 |
+
'max_hand_movement_speed': round(max_hand_speed, 2),
|
| 406 |
+
'avg_body_movement': round(avg_body_movement, 2),
|
| 407 |
+
'max_body_movement': round(max_body_movement, 2),
|
| 408 |
+
'body_sway_intensity': jitter_level,
|
| 409 |
+
'hand_position_distribution': {
|
| 410 |
+
'front': round(hand_position_dist['front'], 1),
|
| 411 |
+
'side': round(hand_position_dist['side'], 1),
|
| 412 |
+
'back': round(hand_position_dist['back'], 1)
|
| 413 |
+
},
|
| 414 |
+
'gesture_peaks_detected': peaks
|
| 415 |
+
},
|
| 416 |
+
'total_frames_analyzed': len(pose_frames),
|
| 417 |
+
'video_duration': round(video_duration, 2)
|
| 418 |
+
}
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
def _calculate_movement_score(
|
| 422 |
+
self,
|
| 423 |
+
avg_hand_speed: float,
|
| 424 |
+
max_hand_speed: float,
|
| 425 |
+
gesture_frequency: float,
|
| 426 |
+
body_variance: float,
|
| 427 |
+
jitter_level: str,
|
| 428 |
+
hand_activity: float,
|
| 429 |
+
hand_position_dist: Dict[str, float]
|
| 430 |
+
) -> float:
|
| 431 |
+
"""Calculate movement score (0-10) based on multiple factors"""
|
| 432 |
+
|
| 433 |
+
score = 10.0
|
| 434 |
+
|
| 435 |
+
# Penalty #1: Average Movement Speed
|
| 436 |
+
if avg_hand_speed > self.config.EXCESSIVE_MOVEMENT_THRESHOLD:
|
| 437 |
+
score -= 3.0
|
| 438 |
+
elif avg_hand_speed < self.config.MINIMAL_MOVEMENT_THRESHOLD:
|
| 439 |
+
score -= 2.5
|
| 440 |
+
|
| 441 |
+
# Penalty #2: Max Speed Spikes
|
| 442 |
+
if max_hand_speed > 300:
|
| 443 |
+
score -= 2.0
|
| 444 |
+
elif max_hand_speed > 200:
|
| 445 |
+
score -= 1.0
|
| 446 |
+
|
| 447 |
+
# Penalty #3: Gesture Frequency
|
| 448 |
+
if gesture_frequency > 4.0:
|
| 449 |
+
score -= 3.5
|
| 450 |
+
elif gesture_frequency > self.config.HIGH_FREQUENCY:
|
| 451 |
+
score -= 2.5
|
| 452 |
+
elif gesture_frequency < self.config.LOW_FREQUENCY:
|
| 453 |
+
score -= 2.0
|
| 454 |
+
|
| 455 |
+
# Penalty #4: Body Instability
|
| 456 |
+
if jitter_level == 'high':
|
| 457 |
+
score -= 2.0
|
| 458 |
+
elif jitter_level == 'medium':
|
| 459 |
+
score -= 1.0
|
| 460 |
+
else:
|
| 461 |
+
score += 0.5 # Bonus for stability
|
| 462 |
+
|
| 463 |
+
# Penalty #5: Hand Position - Back
|
| 464 |
+
if hand_position_dist['back'] > 35:
|
| 465 |
+
score -= 2.5
|
| 466 |
+
elif hand_position_dist['back'] > 25:
|
| 467 |
+
score -= 1.5
|
| 468 |
+
elif hand_position_dist['back'] > 15:
|
| 469 |
+
score -= 0.5
|
| 470 |
+
|
| 471 |
+
# Penalty #6: Hand Position - Front
|
| 472 |
+
if hand_position_dist['front'] < 40:
|
| 473 |
+
score -= 2.0
|
| 474 |
+
elif hand_position_dist['front'] < 50:
|
| 475 |
+
score -= 1.0
|
| 476 |
+
elif hand_position_dist['front'] > 60:
|
| 477 |
+
score += 1.0 # Bonus
|
| 478 |
+
|
| 479 |
+
# Penalty #7: Hand Activity
|
| 480 |
+
if hand_activity > 85:
|
| 481 |
+
score -= 1.5
|
| 482 |
+
elif hand_activity > 75:
|
| 483 |
+
score -= 0.5
|
| 484 |
+
elif hand_activity < 30:
|
| 485 |
+
score -= 1.5
|
| 486 |
+
|
| 487 |
+
return max(0, min(10, score))
|
| 488 |
+
|
| 489 |
+
def _generate_recommendations(
|
| 490 |
+
self,
|
| 491 |
+
gesture_frequency: float,
|
| 492 |
+
hand_position_dist: Dict[str, float],
|
| 493 |
+
max_hand_speed: float,
|
| 494 |
+
hand_activity: float,
|
| 495 |
+
jitter_level: str,
|
| 496 |
+
avg_hand_speed: float,
|
| 497 |
+
movement_score: float
|
| 498 |
+
) -> List[str]:
|
| 499 |
+
"""Generate actionable recommendations"""
|
| 500 |
+
|
| 501 |
+
recommendations = []
|
| 502 |
+
|
| 503 |
+
if gesture_frequency > 4.0:
|
| 504 |
+
recommendations.append("Reduce gesture frequency significantly (currently very high)")
|
| 505 |
+
elif gesture_frequency > 3.0:
|
| 506 |
+
recommendations.append("Reduce gesture frequency slightly")
|
| 507 |
+
elif gesture_frequency < 0.5:
|
| 508 |
+
recommendations.append("Increase gesture frequency for more expressiveness")
|
| 509 |
+
|
| 510 |
+
if hand_position_dist['back'] > 30:
|
| 511 |
+
recommendations.append("Keep hands visible in front - avoid hiding behind body")
|
| 512 |
+
elif hand_position_dist['back'] > 20:
|
| 513 |
+
recommendations.append("Try to position hands more in front for better engagement")
|
| 514 |
+
|
| 515 |
+
if hand_position_dist['front'] < 45:
|
| 516 |
+
recommendations.append("Bring hands forward more often - increases audience connection")
|
| 517 |
+
|
| 518 |
+
if max_hand_speed > 300:
|
| 519 |
+
recommendations.append("Avoid sudden explosive movements - use smooth gestures")
|
| 520 |
+
|
| 521 |
+
if hand_activity > 80:
|
| 522 |
+
recommendations.append("Add strategic pauses - let hands rest between key points")
|
| 523 |
+
elif hand_activity < 35:
|
| 524 |
+
recommendations.append("Increase hand activity - use more gestures to emphasize points")
|
| 525 |
+
|
| 526 |
+
if jitter_level == 'high':
|
| 527 |
+
recommendations.append("Work on body stability - reduce nervous movements and sway")
|
| 528 |
+
|
| 529 |
+
if avg_hand_speed > 50:
|
| 530 |
+
recommendations.append("Slow down hand movements - make gestures more deliberate")
|
| 531 |
+
elif avg_hand_speed < 5:
|
| 532 |
+
recommendations.append("Make gestures more dynamic - increase movement speed slightly")
|
| 533 |
+
|
| 534 |
+
if movement_score >= 8.0:
|
| 535 |
+
recommendations.append("Excellent gesture control! Very natural and professional.")
|
| 536 |
+
|
| 537 |
+
if not recommendations:
|
| 538 |
+
recommendations.append("Keep up the great work!")
|
| 539 |
+
|
| 540 |
+
return recommendations
|
| 541 |
+
|
| 542 |
+
def _create_empty_result(self, reason: str) -> Dict[str, Any]:
|
| 543 |
+
"""Create empty result when analysis fails"""
|
| 544 |
+
return {
|
| 545 |
+
'gesture_analysis': {
|
| 546 |
+
'movement_score': 0.0,
|
| 547 |
+
'movement_category': 'unknown',
|
| 548 |
+
'gesture_frequency': 0.0,
|
| 549 |
+
'hand_activity_percentage': 0.0,
|
| 550 |
+
'body_stability_score': 0.0,
|
| 551 |
+
'nervous_gestures_detected': False,
|
| 552 |
+
'recommendations': [f"Analysis failed: {reason}"],
|
| 553 |
+
'detailed_metrics': {
|
| 554 |
+
'avg_hand_movement_speed': 0.0,
|
| 555 |
+
'max_hand_movement_speed': 0.0,
|
| 556 |
+
'avg_body_movement': 0.0,
|
| 557 |
+
'max_body_movement': 0.0,
|
| 558 |
+
'body_sway_intensity': 'unknown',
|
| 559 |
+
'hand_position_distribution': {
|
| 560 |
+
'front': 0.0,
|
| 561 |
+
'side': 0.0,
|
| 562 |
+
'back': 0.0
|
| 563 |
+
},
|
| 564 |
+
'gesture_peaks_detected': 0
|
| 565 |
+
},
|
| 566 |
+
'total_frames_analyzed': 0,
|
| 567 |
+
'video_duration': 0.0
|
| 568 |
+
}
|
| 569 |
+
}
|
app/services/struktur_berbicara_nlp.py
ADDED
|
@@ -0,0 +1,578 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""Struktur_Berbicara_NLP.ipynb
|
| 3 |
+
|
| 4 |
+
Automatically generated by Colab.
|
| 5 |
+
|
| 6 |
+
Original file is located at
|
| 7 |
+
https://colab.research.google.com/drive/13UJp10f4bAJGPoYw--ASnK-U0JLhYdJl
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
os.environ['WANDB_DISABLED'] = 'true'
|
| 12 |
+
|
| 13 |
+
"""
|
| 14 |
+
Fine-tuning IndoBERT untuk Klasifikasi Struktur Berbicara
|
| 15 |
+
(Pembuka, Isi, Penutup)
|
| 16 |
+
|
| 17 |
+
Requirements:
|
| 18 |
+
pip install transformers torch pandas scikit-learn datasets
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import pandas as pd
|
| 22 |
+
import torch
|
| 23 |
+
from torch.utils.data import Dataset, DataLoader
|
| 24 |
+
from transformers import (
|
| 25 |
+
AutoTokenizer,
|
| 26 |
+
AutoModelForSequenceClassification,
|
| 27 |
+
TrainingArguments,
|
| 28 |
+
Trainer
|
| 29 |
+
)
|
| 30 |
+
from sklearn.model_selection import train_test_split
|
| 31 |
+
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
|
| 32 |
+
import numpy as np
|
| 33 |
+
|
| 34 |
+
# ============ 1. LOAD DAN PREPROCESSING DATA ============
|
| 35 |
+
|
| 36 |
+
def load_and_prepare_data(csv_path):
|
| 37 |
+
"""Load data dari CSV dan siapkan untuk training"""
|
| 38 |
+
df = pd.read_csv(csv_path)
|
| 39 |
+
|
| 40 |
+
# Mapping label ke angka
|
| 41 |
+
label_map = {
|
| 42 |
+
'opening': 0,
|
| 43 |
+
'content': 1,
|
| 44 |
+
'closing': 2
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
df['label_id'] = df['label'].map(label_map)
|
| 48 |
+
|
| 49 |
+
# Split data: 80% train, 10% validation, 10% test
|
| 50 |
+
train_df, temp_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['label_id'])
|
| 51 |
+
val_df, test_df = train_test_split(temp_df, test_size=0.5, random_state=42, stratify=temp_df['label_id'])
|
| 52 |
+
|
| 53 |
+
print(f"Data Training: {len(train_df)}")
|
| 54 |
+
print(f"Data Validasi: {len(val_df)}")
|
| 55 |
+
print(f"Data Testing: {len(test_df)}")
|
| 56 |
+
print(f"\nDistribusi Label Training:")
|
| 57 |
+
print(train_df['label'].value_counts())
|
| 58 |
+
|
| 59 |
+
return train_df, val_df, test_df, label_map
|
| 60 |
+
|
| 61 |
+
# ============ 2. CUSTOM DATASET CLASS ============
|
| 62 |
+
|
| 63 |
+
class SpeechStructureDataset(Dataset):
|
| 64 |
+
"""Custom Dataset untuk handling data"""
|
| 65 |
+
|
| 66 |
+
def __init__(self, texts, labels, tokenizer, max_length=128):
|
| 67 |
+
self.texts = texts
|
| 68 |
+
self.labels = labels
|
| 69 |
+
self.tokenizer = tokenizer
|
| 70 |
+
self.max_length = max_length
|
| 71 |
+
|
| 72 |
+
def __len__(self):
|
| 73 |
+
return len(self.texts)
|
| 74 |
+
|
| 75 |
+
def __getitem__(self, idx):
|
| 76 |
+
text = str(self.texts[idx])
|
| 77 |
+
label = self.labels[idx]
|
| 78 |
+
|
| 79 |
+
encoding = self.tokenizer(
|
| 80 |
+
text,
|
| 81 |
+
add_special_tokens=True,
|
| 82 |
+
max_length=self.max_length,
|
| 83 |
+
padding='max_length',
|
| 84 |
+
truncation=True,
|
| 85 |
+
return_tensors='pt'
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
return {
|
| 89 |
+
'input_ids': encoding['input_ids'].flatten(),
|
| 90 |
+
'attention_mask': encoding['attention_mask'].flatten(),
|
| 91 |
+
'labels': torch.tensor(label, dtype=torch.long)
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
# ============ 3. METRICS UNTUK EVALUASI ============
|
| 95 |
+
|
| 96 |
+
def compute_metrics(pred):
|
| 97 |
+
"""Fungsi untuk menghitung metrics"""
|
| 98 |
+
labels = pred.label_ids
|
| 99 |
+
preds = pred.predictions.argmax(-1)
|
| 100 |
+
|
| 101 |
+
acc = accuracy_score(labels, preds)
|
| 102 |
+
|
| 103 |
+
return {
|
| 104 |
+
'accuracy': acc,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# ============ 4. TRAINING MODEL ============
|
| 108 |
+
|
| 109 |
+
def train_model(train_df, val_df, label_map, model_name='indobenchmark/indobert-base-p1'):
|
| 110 |
+
"""Fine-tune IndoBERT model"""
|
| 111 |
+
|
| 112 |
+
# Load tokenizer dan model
|
| 113 |
+
print(f"\nMemuat model: {model_name}")
|
| 114 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 115 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 116 |
+
model_name,
|
| 117 |
+
num_labels=len(label_map)
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
# Buat dataset
|
| 121 |
+
train_dataset = SpeechStructureDataset(
|
| 122 |
+
texts=train_df['text'].tolist(),
|
| 123 |
+
labels=train_df['label_id'].tolist(),
|
| 124 |
+
tokenizer=tokenizer
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
val_dataset = SpeechStructureDataset(
|
| 128 |
+
texts=val_df['text'].tolist(),
|
| 129 |
+
labels=val_df['label_id'].tolist(),
|
| 130 |
+
tokenizer=tokenizer
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
# Training arguments
|
| 134 |
+
training_args = TrainingArguments(
|
| 135 |
+
output_dir='./results',
|
| 136 |
+
num_train_epochs=30, # Untuk dataset kecil, epoch lebih banyak
|
| 137 |
+
per_device_train_batch_size=8,
|
| 138 |
+
per_device_eval_batch_size=8,
|
| 139 |
+
warmup_steps=100,
|
| 140 |
+
weight_decay=0.01,
|
| 141 |
+
logging_dir='./logs',
|
| 142 |
+
logging_steps=10,
|
| 143 |
+
eval_strategy="epoch",
|
| 144 |
+
save_strategy="epoch",
|
| 145 |
+
load_best_model_at_end=True,
|
| 146 |
+
metric_for_best_model="accuracy",
|
| 147 |
+
learning_rate=2e-5,
|
| 148 |
+
seed=42,
|
| 149 |
+
report_to="none", # Disable semua logging eksternal
|
| 150 |
+
save_total_limit=2
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Trainer
|
| 154 |
+
trainer = Trainer(
|
| 155 |
+
model=model,
|
| 156 |
+
args=training_args,
|
| 157 |
+
train_dataset=train_dataset,
|
| 158 |
+
eval_dataset=val_dataset,
|
| 159 |
+
compute_metrics=compute_metrics
|
| 160 |
+
)
|
| 161 |
+
|
| 162 |
+
# Training
|
| 163 |
+
print("\nπ Mulai training...")
|
| 164 |
+
trainer.train()
|
| 165 |
+
|
| 166 |
+
# Simpan model terbaik
|
| 167 |
+
trainer.save_model('./best_model')
|
| 168 |
+
tokenizer.save_pretrained('./best_model')
|
| 169 |
+
|
| 170 |
+
print("\nβ
Training selesai! Model disimpan di './best_model'")
|
| 171 |
+
|
| 172 |
+
return trainer, tokenizer, model
|
| 173 |
+
|
| 174 |
+
# ============ 5. EVALUASI MODEL ============
|
| 175 |
+
|
| 176 |
+
def evaluate_model(trainer, test_df, tokenizer, label_map):
|
| 177 |
+
"""Evaluasi model pada test set"""
|
| 178 |
+
|
| 179 |
+
test_dataset = SpeechStructureDataset(
|
| 180 |
+
texts=test_df['text'].tolist(),
|
| 181 |
+
labels=test_df['label_id'].tolist(),
|
| 182 |
+
tokenizer=tokenizer
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Prediksi
|
| 186 |
+
predictions = trainer.predict(test_dataset)
|
| 187 |
+
pred_labels = predictions.predictions.argmax(-1)
|
| 188 |
+
true_labels = test_df['label_id'].tolist()
|
| 189 |
+
|
| 190 |
+
# Reverse mapping untuk label
|
| 191 |
+
id_to_label = {v: k for k, v in label_map.items()}
|
| 192 |
+
|
| 193 |
+
# Classification report
|
| 194 |
+
print("\nπ HASIL EVALUASI:")
|
| 195 |
+
print("\nClassification Report:")
|
| 196 |
+
print(classification_report(
|
| 197 |
+
true_labels,
|
| 198 |
+
pred_labels,
|
| 199 |
+
target_names=list(label_map.keys())
|
| 200 |
+
))
|
| 201 |
+
|
| 202 |
+
# Confusion matrix
|
| 203 |
+
print("\nConfusion Matrix:")
|
| 204 |
+
cm = confusion_matrix(true_labels, pred_labels)
|
| 205 |
+
print(cm)
|
| 206 |
+
|
| 207 |
+
return predictions
|
| 208 |
+
|
| 209 |
+
# ============ 6. FUNGSI PREDIKSI ============
|
| 210 |
+
|
| 211 |
+
def predict_text(text, model_path='./best_model'):
|
| 212 |
+
"""Prediksi label untuk teks baru"""
|
| 213 |
+
|
| 214 |
+
# Load model dan tokenizer
|
| 215 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 216 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
| 217 |
+
model.eval()
|
| 218 |
+
|
| 219 |
+
# Tokenisasi
|
| 220 |
+
inputs = tokenizer(
|
| 221 |
+
text,
|
| 222 |
+
add_special_tokens=True,
|
| 223 |
+
max_length=128,
|
| 224 |
+
padding='max_length',
|
| 225 |
+
truncation=True,
|
| 226 |
+
return_tensors='pt'
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Prediksi
|
| 230 |
+
with torch.no_grad():
|
| 231 |
+
outputs = model(**inputs)
|
| 232 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 233 |
+
predicted_class = torch.argmax(predictions, dim=-1).item()
|
| 234 |
+
confidence = predictions[0][predicted_class].item()
|
| 235 |
+
|
| 236 |
+
# Mapping hasil
|
| 237 |
+
label_map = {0: 'opening', 1: 'content', 2: 'closing'}
|
| 238 |
+
predicted_label = label_map[predicted_class]
|
| 239 |
+
|
| 240 |
+
return {
|
| 241 |
+
'text': text,
|
| 242 |
+
'predicted_label': predicted_label,
|
| 243 |
+
'confidence': confidence,
|
| 244 |
+
'all_probabilities': {
|
| 245 |
+
'opening': predictions[0][0].item(),
|
| 246 |
+
'content': predictions[0][1].item(),
|
| 247 |
+
'closing': predictions[0][2].item()
|
| 248 |
+
}
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
# ============ 7. MAIN EXECUTION ============
|
| 252 |
+
|
| 253 |
+
if __name__ == "__main__":
|
| 254 |
+
# Path ke file CSV
|
| 255 |
+
CSV_PATH = '/content/drive/MyDrive/Colab Notebooks/dataset/struktur.csv'
|
| 256 |
+
|
| 257 |
+
print("="*60)
|
| 258 |
+
print("FINE-TUNING INDOBERT - KLASIFIKASI STRUKTUR BERBICARA")
|
| 259 |
+
print("="*60)
|
| 260 |
+
|
| 261 |
+
# 1. Load dan prepare data
|
| 262 |
+
train_df, val_df, test_df, label_map = load_and_prepare_data(CSV_PATH)
|
| 263 |
+
|
| 264 |
+
# 2. Training model
|
| 265 |
+
trainer, tokenizer, model = train_model(train_df, val_df, label_map)
|
| 266 |
+
|
| 267 |
+
# 3. Evaluasi model
|
| 268 |
+
evaluate_model(trainer, test_df, tokenizer, label_map)
|
| 269 |
+
|
| 270 |
+
# 4. Contoh prediksi
|
| 271 |
+
print("\n" + "="*60)
|
| 272 |
+
print("CONTOH PREDIKSI")
|
| 273 |
+
print("="*60)
|
| 274 |
+
|
| 275 |
+
test_texts = [
|
| 276 |
+
"Selamat pagi hadirin yang saya hormati",
|
| 277 |
+
"Berdasarkan data yang kami kumpulkan",
|
| 278 |
+
"Demikian yang dapat saya sampaikan terima kasih"
|
| 279 |
+
]
|
| 280 |
+
|
| 281 |
+
for text in test_texts:
|
| 282 |
+
result = predict_text(text)
|
| 283 |
+
print(f"\nTeks: {result['text']}")
|
| 284 |
+
print(f"Prediksi: {result['predicted_label']}")
|
| 285 |
+
print(f"Confidence: {result['confidence']:.2%}")
|
| 286 |
+
print(f"Probabilitas semua kelas: {result['all_probabilities']}")
|
| 287 |
+
|
| 288 |
+
print("\n⨠Selesai!")
|
| 289 |
+
|
| 290 |
+
"""
|
| 291 |
+
Analisis Struktur Public Speaking
|
| 292 |
+
Deteksi Opening, Content, Closing dari transkrip lengkap
|
| 293 |
+
dengan scoring otomatis untuk penilaian
|
| 294 |
+
"""
|
| 295 |
+
|
| 296 |
+
import os
|
| 297 |
+
os.environ['WANDB_DISABLED'] = 'true'
|
| 298 |
+
|
| 299 |
+
import pandas as pd
|
| 300 |
+
import torch
|
| 301 |
+
import re
|
| 302 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 303 |
+
from typing import List, Dict, Tuple
|
| 304 |
+
|
| 305 |
+
# ============ 1. SENTENCE SPLITTER ============
|
| 306 |
+
|
| 307 |
+
def split_into_sentences(text: str) -> List[str]:
|
| 308 |
+
"""Split text menjadi kalimat-kalimat"""
|
| 309 |
+
# Split berdasarkan tanda baca
|
| 310 |
+
sentences = re.split(r'[.!?,;\n]+', text)
|
| 311 |
+
# Bersihkan whitespace dan filter kalimat kosong
|
| 312 |
+
sentences = [s.strip() for s in sentences if s.strip()]
|
| 313 |
+
return sentences
|
| 314 |
+
|
| 315 |
+
# ============ 2. BATCH PREDICTION ============
|
| 316 |
+
|
| 317 |
+
def predict_sentences(sentences: List[str], model_path='./best_model') -> List[Dict]:
|
| 318 |
+
"""Prediksi label untuk list kalimat"""
|
| 319 |
+
|
| 320 |
+
# Load model
|
| 321 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
| 322 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
| 323 |
+
model.eval()
|
| 324 |
+
|
| 325 |
+
label_map = {0: 'opening', 1: 'content', 2: 'closing'}
|
| 326 |
+
results = []
|
| 327 |
+
|
| 328 |
+
for idx, sentence in enumerate(sentences):
|
| 329 |
+
# Tokenisasi
|
| 330 |
+
inputs = tokenizer(
|
| 331 |
+
sentence,
|
| 332 |
+
add_special_tokens=True,
|
| 333 |
+
max_length=128,
|
| 334 |
+
padding='max_length',
|
| 335 |
+
truncation=True,
|
| 336 |
+
return_tensors='pt'
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
# Prediksi
|
| 340 |
+
with torch.no_grad():
|
| 341 |
+
outputs = model(**inputs)
|
| 342 |
+
probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
| 343 |
+
predicted_class = torch.argmax(probs, dim=-1).item()
|
| 344 |
+
confidence = probs[0][predicted_class].item()
|
| 345 |
+
|
| 346 |
+
results.append({
|
| 347 |
+
'sentence_idx': idx,
|
| 348 |
+
'text': sentence,
|
| 349 |
+
'predicted_label': label_map[predicted_class],
|
| 350 |
+
'confidence': confidence,
|
| 351 |
+
'probs': {
|
| 352 |
+
'opening': probs[0][0].item(),
|
| 353 |
+
'content': probs[0][1].item(),
|
| 354 |
+
'closing': probs[0][2].item()
|
| 355 |
+
}
|
| 356 |
+
})
|
| 357 |
+
|
| 358 |
+
return results
|
| 359 |
+
|
| 360 |
+
# ============ 3. POST-PROCESSING & HEURISTICS ============
|
| 361 |
+
|
| 362 |
+
def apply_structure_rules(predictions: List[Dict]) -> List[Dict]:
|
| 363 |
+
"""
|
| 364 |
+
Terapkan rules untuk memperbaiki struktur:
|
| 365 |
+
- Opening di awal
|
| 366 |
+
- Closing di akhir
|
| 367 |
+
- Content di tengah
|
| 368 |
+
"""
|
| 369 |
+
|
| 370 |
+
if not predictions:
|
| 371 |
+
return predictions
|
| 372 |
+
|
| 373 |
+
n = len(predictions)
|
| 374 |
+
|
| 375 |
+
# Rule 1: 2 kalimat pertama cenderung opening (jika confidence tinggi)
|
| 376 |
+
for i in range(min(2, n)):
|
| 377 |
+
if predictions[i]['probs']['opening'] > 0.8: # Threshold
|
| 378 |
+
predictions[i]['predicted_label'] = 'opening'
|
| 379 |
+
predictions[i]['adjusted'] = True
|
| 380 |
+
|
| 381 |
+
# Rule 2: 2 kalimat terakhir cenderung closing (jika confidence tinggi)
|
| 382 |
+
for i in range(max(0, n-2), n):
|
| 383 |
+
if predictions[i]['probs']['closing'] > 0.8: # Threshold
|
| 384 |
+
predictions[i]['predicted_label'] = 'closing'
|
| 385 |
+
predictions[i]['adjusted'] = True
|
| 386 |
+
|
| 387 |
+
# Rule 3: Detect transisi berdasarkan keyword
|
| 388 |
+
closing_keywords = ['demikian', 'terima kasih', 'sekian', 'akhir kata',
|
| 389 |
+
'wassalam', 'selamat pagi dan', 'sampai jumpa']
|
| 390 |
+
opening_keywords = ['selamat pagi', 'selamat siang', 'assalamualaikum',
|
| 391 |
+
'hadirin', 'pertama-tama', 'izinkan saya']
|
| 392 |
+
|
| 393 |
+
for pred in predictions:
|
| 394 |
+
text_lower = pred['text'].lower()
|
| 395 |
+
|
| 396 |
+
# Check closing keywords
|
| 397 |
+
if any(kw in text_lower for kw in closing_keywords):
|
| 398 |
+
pred['predicted_label'] = 'closing'
|
| 399 |
+
pred['keyword_match'] = True
|
| 400 |
+
|
| 401 |
+
# Check opening keywords
|
| 402 |
+
elif any(kw in text_lower for kw in opening_keywords):
|
| 403 |
+
pred['predicted_label'] = 'opening'
|
| 404 |
+
pred['keyword_match'] = True
|
| 405 |
+
|
| 406 |
+
return predictions
|
| 407 |
+
|
| 408 |
+
# ============ 4. STRUCTURE SEGMENTATION ============
|
| 409 |
+
|
| 410 |
+
def segment_speech_structure(predictions: List[Dict]) -> Dict:
|
| 411 |
+
"""
|
| 412 |
+
Grouping kalimat berdasarkan struktur yang terdeteksi
|
| 413 |
+
"""
|
| 414 |
+
|
| 415 |
+
structure = {
|
| 416 |
+
'opening': [],
|
| 417 |
+
'content': [],
|
| 418 |
+
'closing': []
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
for pred in predictions:
|
| 422 |
+
label = pred['predicted_label']
|
| 423 |
+
structure[label].append(pred)
|
| 424 |
+
|
| 425 |
+
return structure
|
| 426 |
+
|
| 427 |
+
# ============ 5. SCORING SYSTEM ============
|
| 428 |
+
|
| 429 |
+
def calculate_structure_score(structure: Dict) -> Dict:
|
| 430 |
+
"""
|
| 431 |
+
Hitung skor berdasarkan kriteria:
|
| 432 |
+
- Poin 5: ada opening (1), content (1), closing (1)
|
| 433 |
+
- Poin 4: ada opening (1), content (1), closing (0)
|
| 434 |
+
- Poin 3: ada opening (1), content (0), closing (1)
|
| 435 |
+
- Poin 2: ada opening (0), content (1), closing (1)
|
| 436 |
+
- Poin 1: ada opening (1), content (0), closing (0)
|
| 437 |
+
- Poin 0: tidak ada struktur yang lengkap
|
| 438 |
+
"""
|
| 439 |
+
|
| 440 |
+
has_opening = len(structure['opening']) > 0
|
| 441 |
+
has_content = len(structure['content']) > 0
|
| 442 |
+
has_closing = len(structure['closing']) > 0
|
| 443 |
+
|
| 444 |
+
# Hitung poin
|
| 445 |
+
if has_opening and has_content and has_closing:
|
| 446 |
+
score = 5
|
| 447 |
+
description = "Sempurna! Struktur lengkap (Pembuka, Isi, Penutup)"
|
| 448 |
+
elif has_opening and has_content and not has_closing:
|
| 449 |
+
score = 4
|
| 450 |
+
description = "Baik. Ada pembuka dan isi, tapi kurang penutup"
|
| 451 |
+
elif has_opening and not has_content and has_closing:
|
| 452 |
+
score = 3
|
| 453 |
+
description = "Cukup. Ada pembuka dan penutup, tapi isi kurang jelas"
|
| 454 |
+
elif not has_opening and has_content and has_closing:
|
| 455 |
+
score = 2
|
| 456 |
+
description = "Perlu perbaikan. Kurang pembuka yang jelas"
|
| 457 |
+
elif has_opening and not has_content and not has_closing:
|
| 458 |
+
score = 1
|
| 459 |
+
description = "Kurang lengkap. Hanya ada pembuka"
|
| 460 |
+
else:
|
| 461 |
+
score = 0
|
| 462 |
+
description = "Struktur tidak terdeteksi dengan baik"
|
| 463 |
+
|
| 464 |
+
return {
|
| 465 |
+
'score': score,
|
| 466 |
+
'max_score': 5,
|
| 467 |
+
'description': description,
|
| 468 |
+
'has_opening': has_opening,
|
| 469 |
+
'has_content': has_content,
|
| 470 |
+
'has_closing': has_closing,
|
| 471 |
+
'opening_count': len(structure['opening']),
|
| 472 |
+
'content_count': len(structure['content']),
|
| 473 |
+
'closing_count': len(structure['closing'])
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
# ============ 6. MAIN ANALYSIS FUNCTION ============
|
| 477 |
+
|
| 478 |
+
def analyze_speech(transcript: str, model_path='./best_model',
|
| 479 |
+
apply_rules=True, verbose=True) -> Dict:
|
| 480 |
+
"""
|
| 481 |
+
Fungsi utama untuk menganalisis struktur speech
|
| 482 |
+
|
| 483 |
+
Args:
|
| 484 |
+
transcript: Teks lengkap dari speech
|
| 485 |
+
model_path: Path ke model yang sudah di-train
|
| 486 |
+
apply_rules: Apakah menggunakan heuristic rules
|
| 487 |
+
verbose: Tampilkan detail atau tidak
|
| 488 |
+
|
| 489 |
+
Returns:
|
| 490 |
+
Dict berisi hasil analisis lengkap
|
| 491 |
+
"""
|
| 492 |
+
|
| 493 |
+
# 1. Split into sentences
|
| 494 |
+
sentences = split_into_sentences(transcript)
|
| 495 |
+
|
| 496 |
+
if verbose:
|
| 497 |
+
print(f"π Jumlah kalimat terdeteksi: {len(sentences)}")
|
| 498 |
+
|
| 499 |
+
# 2. Predict each sentence
|
| 500 |
+
predictions = predict_sentences(sentences, model_path)
|
| 501 |
+
|
| 502 |
+
# 3. Apply rules (optional)
|
| 503 |
+
if apply_rules:
|
| 504 |
+
predictions = apply_structure_rules(predictions)
|
| 505 |
+
|
| 506 |
+
# 4. Segment structure
|
| 507 |
+
structure = segment_speech_structure(predictions)
|
| 508 |
+
|
| 509 |
+
# 5. Calculate score
|
| 510 |
+
score_result = calculate_structure_score(structure)
|
| 511 |
+
|
| 512 |
+
# 6. Generate report
|
| 513 |
+
if verbose:
|
| 514 |
+
print("\n" + "="*70)
|
| 515 |
+
print("π HASIL ANALISIS STRUKTUR BERBICARA")
|
| 516 |
+
print("="*70)
|
| 517 |
+
|
| 518 |
+
print(f"\nπ― SKOR: {score_result['score']}/{score_result['max_score']}")
|
| 519 |
+
print(f"π {score_result['description']}")
|
| 520 |
+
|
| 521 |
+
print(f"\nβ
Struktur terdeteksi:")
|
| 522 |
+
print(f" β’ Pembuka (Opening): {score_result['opening_count']} kalimat")
|
| 523 |
+
print(f" β’ Isi (Content): {score_result['content_count']} kalimat")
|
| 524 |
+
print(f" β’ Penutup (Closing): {score_result['closing_count']} kalimat")
|
| 525 |
+
|
| 526 |
+
print(f"\nπ Detail per bagian:")
|
| 527 |
+
print(f"\n{'='*70}")
|
| 528 |
+
|
| 529 |
+
for section in ['opening', 'content', 'closing']:
|
| 530 |
+
if structure[section]:
|
| 531 |
+
print(f"\nπΉ {section.upper()}:")
|
| 532 |
+
for item in structure[section]:
|
| 533 |
+
print(f" [{item['sentence_idx']+1}] {item['text'][:80]}...")
|
| 534 |
+
print(f" Confidence: {item['confidence']:.2%}")
|
| 535 |
+
|
| 536 |
+
print(f"\n{'='*70}")
|
| 537 |
+
|
| 538 |
+
return {
|
| 539 |
+
'sentences': sentences,
|
| 540 |
+
'predictions': predictions,
|
| 541 |
+
'structure': structure,
|
| 542 |
+
'score': score_result,
|
| 543 |
+
'transcript': transcript
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
# ============ 8. CONTOH PENGGUNAAN ============
|
| 548 |
+
|
| 549 |
+
if __name__ == "__main__":
|
| 550 |
+
|
| 551 |
+
# Contoh transkrip speech
|
| 552 |
+
sample_transcript = """
|
| 553 |
+
Assalamualaikum warahmatullahi wabarakatuh. Selamat pagi hadirin yang saya hormati
|
| 554 |
+
Puji syukur kita panjatkan kehadirat Tuhan Yang Maha Esa
|
| 555 |
+
|
| 556 |
+
Pada kesempatan ini saya akan membahas tentang pentingnya pendidikan karakter
|
| 557 |
+
Menurut data dari Kemendikbud tahun 2023, tingkat literasi di Indonesia masih perlu ditingkatkan
|
| 558 |
+
Berdasarkan penelitian menunjukkan bahwa pendidikan karakter sangat penting untuk generasi muda
|
| 559 |
+
Contohnya seperti yang terjadi di negara-negara maju, mereka mengutamakan pendidikan karakter sejak dini
|
| 560 |
+
|
| 561 |
+
Oleh karena itu kita perlu bergerak bersama untuk meningkatkan kualitas pendidikan
|
| 562 |
+
Demikian yang dapat saya sampaikan
|
| 563 |
+
Terima kasih atas perhatian Bapak dan Ibu sekalian
|
| 564 |
+
Wassalamualaikum warahmatullahi wabarakatuh
|
| 565 |
+
"""
|
| 566 |
+
|
| 567 |
+
print("π€ ANALISIS STRUKTUR PUBLIC SPEAKING")
|
| 568 |
+
print("="*70)
|
| 569 |
+
|
| 570 |
+
# Jalankan analisis
|
| 571 |
+
result = analyze_speech(
|
| 572 |
+
transcript=sample_transcript,
|
| 573 |
+
model_path='./best_model',
|
| 574 |
+
apply_rules=True,
|
| 575 |
+
verbose=True
|
| 576 |
+
)
|
| 577 |
+
|
| 578 |
+
print("\n⨠Analisis selesai!")
|
app/services/video_processor.py
ADDED
|
@@ -0,0 +1,319 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Video Processor Orchestrator
|
| 3 |
+
|
| 4 |
+
This module coordinates all AI models and creates the final analysis result.
|
| 5 |
+
"""
|
| 6 |
+
import cv2 as cv
|
| 7 |
+
import time
|
| 8 |
+
from typing import Dict, Any, Optional, Callable
|
| 9 |
+
from loguru import logger
|
| 10 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 11 |
+
|
| 12 |
+
from app.config import settings
|
| 13 |
+
from app.services.eye_tracking import EyeTrackingService
|
| 14 |
+
from app.services.facial_expression import FacialExpressionService
|
| 15 |
+
from app.services.gesture_detection import GestureDetectionService
|
| 16 |
+
from app.models import (
|
| 17 |
+
AnalysisResult,
|
| 18 |
+
VideoMetadata,
|
| 19 |
+
MainIndicators,
|
| 20 |
+
BonusIndicators,
|
| 21 |
+
IndicatorResult,
|
| 22 |
+
Level
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class VideoProcessor:
|
| 27 |
+
"""
|
| 28 |
+
Main video processor that orchestrates all AI models
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self):
|
| 32 |
+
"""Initialize video processor with all services"""
|
| 33 |
+
self.eye_tracking_service = None
|
| 34 |
+
self.facial_expression_service = None
|
| 35 |
+
self.gesture_service = None
|
| 36 |
+
logger.info("VideoProcessor initialized")
|
| 37 |
+
|
| 38 |
+
def _load_models(self):
|
| 39 |
+
"""Lazy load models"""
|
| 40 |
+
if self.eye_tracking_service is None:
|
| 41 |
+
logger.info("Loading Eye Tracking model...")
|
| 42 |
+
self.eye_tracking_service = EyeTrackingService()
|
| 43 |
+
|
| 44 |
+
if self.facial_expression_service is None:
|
| 45 |
+
logger.info("Loading Facial Expression model...")
|
| 46 |
+
self.facial_expression_service = FacialExpressionService()
|
| 47 |
+
|
| 48 |
+
if self.gesture_service is None:
|
| 49 |
+
logger.info("Loading Gesture Detection model...")
|
| 50 |
+
self.gesture_service = GestureDetectionService()
|
| 51 |
+
|
| 52 |
+
logger.info("β All models loaded")
|
| 53 |
+
|
| 54 |
+
def process_video(
|
| 55 |
+
self,
|
| 56 |
+
video_path: str,
|
| 57 |
+
level: int,
|
| 58 |
+
progress_callback: Optional[Callable] = None
|
| 59 |
+
) -> Dict[str, Any]:
|
| 60 |
+
"""
|
| 61 |
+
Process video and return analysis results
|
| 62 |
+
|
| 63 |
+
Args:
|
| 64 |
+
video_path: Path to video file
|
| 65 |
+
level: Public speaking level (1-5)
|
| 66 |
+
progress_callback: Optional callback for progress updates
|
| 67 |
+
Signature: callback(step: str, percentage: float, message: str)
|
| 68 |
+
|
| 69 |
+
Returns:
|
| 70 |
+
Dict containing analysis results
|
| 71 |
+
"""
|
| 72 |
+
start_time = time.time()
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
# Load models
|
| 76 |
+
if progress_callback:
|
| 77 |
+
progress_callback("loading_models", 10, "Loading AI models...")
|
| 78 |
+
self._load_models()
|
| 79 |
+
|
| 80 |
+
# Get video metadata
|
| 81 |
+
if progress_callback:
|
| 82 |
+
progress_callback("reading_video", 15, "Reading video metadata...")
|
| 83 |
+
metadata = self._get_video_metadata(video_path)
|
| 84 |
+
|
| 85 |
+
# Determine which indicators to process based on level
|
| 86 |
+
indicators_config = self._get_indicators_for_level(level)
|
| 87 |
+
|
| 88 |
+
# Process all models in parallel
|
| 89 |
+
if progress_callback:
|
| 90 |
+
progress_callback("processing", 20, "Processing video with AI models...")
|
| 91 |
+
|
| 92 |
+
results = self._process_models_parallel(
|
| 93 |
+
video_path,
|
| 94 |
+
indicators_config,
|
| 95 |
+
progress_callback
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
# Build final result
|
| 99 |
+
if progress_callback:
|
| 100 |
+
progress_callback("finalizing", 90, "Building final analysis...")
|
| 101 |
+
|
| 102 |
+
analysis_result = self._build_analysis_result(
|
| 103 |
+
level=level,
|
| 104 |
+
metadata=metadata,
|
| 105 |
+
results=results
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
processing_time = time.time() - start_time
|
| 109 |
+
|
| 110 |
+
if progress_callback:
|
| 111 |
+
progress_callback("completed", 100, f"Analysis completed in {processing_time:.2f}s")
|
| 112 |
+
|
| 113 |
+
logger.info(f"β Video processed successfully in {processing_time:.2f}s")
|
| 114 |
+
|
| 115 |
+
return analysis_result
|
| 116 |
+
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.error(f"β Video processing failed: {e}")
|
| 119 |
+
raise
|
| 120 |
+
|
| 121 |
+
def _get_video_metadata(self, video_path: str) -> VideoMetadata:
|
| 122 |
+
"""Extract video metadata"""
|
| 123 |
+
try:
|
| 124 |
+
cap = cv.VideoCapture(video_path)
|
| 125 |
+
|
| 126 |
+
if not cap.isOpened():
|
| 127 |
+
raise ValueError(f"Cannot open video: {video_path}")
|
| 128 |
+
|
| 129 |
+
fps = int(cap.get(cv.CAP_PROP_FPS))
|
| 130 |
+
width = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
|
| 131 |
+
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
|
| 132 |
+
frame_count = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
|
| 133 |
+
duration = frame_count / fps if fps > 0 else 0
|
| 134 |
+
|
| 135 |
+
cap.release()
|
| 136 |
+
|
| 137 |
+
# Get file size
|
| 138 |
+
import os
|
| 139 |
+
file_size = os.path.getsize(video_path)
|
| 140 |
+
|
| 141 |
+
return VideoMetadata(
|
| 142 |
+
duration=round(duration, 2),
|
| 143 |
+
fps=fps,
|
| 144 |
+
resolution=f"{width}x{height}",
|
| 145 |
+
file_size=file_size
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
except Exception as e:
|
| 149 |
+
logger.error(f"Failed to get video metadata: {e}")
|
| 150 |
+
raise
|
| 151 |
+
|
| 152 |
+
def _get_indicators_for_level(self, level: int) -> Dict[str, bool]:
|
| 153 |
+
"""
|
| 154 |
+
Determine which indicators to process based on level
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
Dict with indicator names and whether to process them
|
| 158 |
+
"""
|
| 159 |
+
config = {
|
| 160 |
+
# Main indicators (always processed if in level)
|
| 161 |
+
"kontak_mata": level >= 2,
|
| 162 |
+
"kesesuaian_topik": level >= 3,
|
| 163 |
+
"struktur_kalimat": level >= 5,
|
| 164 |
+
|
| 165 |
+
# Bonus indicators (always processed for all levels)
|
| 166 |
+
"face_expression": True,
|
| 167 |
+
"gesture": True,
|
| 168 |
+
"first_impression": True,
|
| 169 |
+
|
| 170 |
+
# Audio indicators (placeholder - not implemented yet)
|
| 171 |
+
"tempo": False,
|
| 172 |
+
"artikulasi": False,
|
| 173 |
+
"jeda": False,
|
| 174 |
+
"kata_pengisi": False,
|
| 175 |
+
"kata_tidak_senonoh": False
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
return config
|
| 179 |
+
|
| 180 |
+
def _process_models_parallel(
|
| 181 |
+
self,
|
| 182 |
+
video_path: str,
|
| 183 |
+
indicators_config: Dict[str, bool],
|
| 184 |
+
progress_callback: Optional[Callable] = None
|
| 185 |
+
) -> Dict[str, Any]:
|
| 186 |
+
"""
|
| 187 |
+
Process all required models in parallel
|
| 188 |
+
|
| 189 |
+
Returns:
|
| 190 |
+
Dict with results from each model
|
| 191 |
+
"""
|
| 192 |
+
results = {}
|
| 193 |
+
|
| 194 |
+
# Define tasks to run
|
| 195 |
+
tasks = []
|
| 196 |
+
|
| 197 |
+
# Eye tracking (for kontak_mata)
|
| 198 |
+
if indicators_config.get("kontak_mata", False):
|
| 199 |
+
tasks.append(("eye_tracking", self.eye_tracking_service.analyze_video, video_path))
|
| 200 |
+
|
| 201 |
+
# Facial expression (always run for first_impression and face_expression)
|
| 202 |
+
if indicators_config.get("face_expression", False):
|
| 203 |
+
tasks.append(("facial_expression", self.facial_expression_service.analyze_video, video_path))
|
| 204 |
+
|
| 205 |
+
# Gesture detection (always run)
|
| 206 |
+
if indicators_config.get("gesture", False):
|
| 207 |
+
tasks.append(("gesture", self.gesture_service.analyze_video, video_path))
|
| 208 |
+
|
| 209 |
+
# Process tasks in parallel
|
| 210 |
+
with ThreadPoolExecutor(max_workers=3) as executor:
|
| 211 |
+
futures = {
|
| 212 |
+
executor.submit(func, video_path): name
|
| 213 |
+
for name, func, video_path in tasks
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
completed = 0
|
| 217 |
+
total = len(futures)
|
| 218 |
+
|
| 219 |
+
for future in as_completed(futures):
|
| 220 |
+
task_name = futures[future]
|
| 221 |
+
try:
|
| 222 |
+
result = future.result()
|
| 223 |
+
results[task_name] = result
|
| 224 |
+
completed += 1
|
| 225 |
+
|
| 226 |
+
if progress_callback:
|
| 227 |
+
pct = 20 + (completed / total) * 60 # 20% to 80%
|
| 228 |
+
progress_callback(
|
| 229 |
+
"processing",
|
| 230 |
+
pct,
|
| 231 |
+
f"Completed {task_name} analysis ({completed}/{total})"
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
logger.info(f"β {task_name} completed")
|
| 235 |
+
|
| 236 |
+
except Exception as e:
|
| 237 |
+
logger.error(f"β {task_name} failed: {e}")
|
| 238 |
+
results[task_name] = {"error": str(e)}
|
| 239 |
+
|
| 240 |
+
return results
|
| 241 |
+
|
| 242 |
+
def _build_analysis_result(
|
| 243 |
+
self,
|
| 244 |
+
level: int,
|
| 245 |
+
metadata: VideoMetadata,
|
| 246 |
+
results: Dict[str, Any]
|
| 247 |
+
) -> Dict[str, Any]:
|
| 248 |
+
"""
|
| 249 |
+
Build final analysis result in the expected format
|
| 250 |
+
|
| 251 |
+
Returns:
|
| 252 |
+
Dict ready to be serialized as JSON response
|
| 253 |
+
"""
|
| 254 |
+
# Main indicators
|
| 255 |
+
main_indicators = {}
|
| 256 |
+
|
| 257 |
+
# Kontak Mata (from eye tracking)
|
| 258 |
+
if "eye_tracking" in results:
|
| 259 |
+
eye_data = results["eye_tracking"]
|
| 260 |
+
main_indicators["kontak_mata"] = {
|
| 261 |
+
"score": eye_data.get("eye_contact_analysis", {}).get("score", 0),
|
| 262 |
+
"raw_data": eye_data
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
# Bonus indicators
|
| 266 |
+
bonus_indicators = {}
|
| 267 |
+
|
| 268 |
+
# First Impression (from facial expression - first 10 seconds)
|
| 269 |
+
if "facial_expression" in results:
|
| 270 |
+
face_data = results["facial_expression"]
|
| 271 |
+
bonus_indicators["first_impression"] = {
|
| 272 |
+
"detected": face_data.get("summary", {}).get("opening_smile_detected", False),
|
| 273 |
+
"raw_data": {
|
| 274 |
+
"smile_percentage": face_data.get("summary", {}).get("opening_smile_percentage", 0),
|
| 275 |
+
"opening_expressions": face_data.get("summary", {}).get("opening_period_expressions", [])
|
| 276 |
+
}
|
| 277 |
+
}
|
| 278 |
+
|
| 279 |
+
# Face Expression (overall)
|
| 280 |
+
bonus_indicators["face_expression"] = {
|
| 281 |
+
"raw_data": {
|
| 282 |
+
"dominant_expression": face_data.get("summary", {}).get("dominant_expression", "unknown"),
|
| 283 |
+
"expression_distribution": face_data.get("summary", {}).get("expression_distribution", {})
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
# Gesture
|
| 288 |
+
if "gesture" in results:
|
| 289 |
+
gesture_data = results["gesture"]
|
| 290 |
+
bonus_indicators["gesture"] = {
|
| 291 |
+
"score": gesture_data.get("gesture_analysis", {}).get("movement_score", 0),
|
| 292 |
+
"raw_data": gesture_data
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
# Build final response
|
| 296 |
+
return {
|
| 297 |
+
"level": level,
|
| 298 |
+
"video_metadata": {
|
| 299 |
+
"duration": metadata.duration,
|
| 300 |
+
"fps": metadata.fps,
|
| 301 |
+
"resolution": metadata.resolution,
|
| 302 |
+
"file_size": metadata.file_size
|
| 303 |
+
},
|
| 304 |
+
"main_indicators": main_indicators,
|
| 305 |
+
"bonus_indicators": bonus_indicators,
|
| 306 |
+
"processing_time": 0 # Will be set by task handler
|
| 307 |
+
}
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
# Singleton instance
|
| 311 |
+
_processor_instance = None
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def get_video_processor() -> VideoProcessor:
|
| 315 |
+
"""Get global video processor instance"""
|
| 316 |
+
global _processor_instance
|
| 317 |
+
if _processor_instance is None:
|
| 318 |
+
_processor_instance = VideoProcessor()
|
| 319 |
+
return _processor_instance
|
app/tasks.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Background Tasks for Video Processing
|
| 3 |
+
"""
|
| 4 |
+
import json
|
| 5 |
+
import time
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from loguru import logger
|
| 8 |
+
|
| 9 |
+
from app.core.redis_client import get_redis_client
|
| 10 |
+
from app.core.storage import get_storage_manager
|
| 11 |
+
from app.services.video_processor import VideoProcessor
|
| 12 |
+
from app.models import TaskStatus
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def process_video_task(task_id: str, video_path: str, level: int):
|
| 16 |
+
"""
|
| 17 |
+
Background task to process video
|
| 18 |
+
|
| 19 |
+
This function is executed by RQ worker in the background.
|
| 20 |
+
|
| 21 |
+
Args:
|
| 22 |
+
task_id: Unique task identifier
|
| 23 |
+
video_path: Path to uploaded video file
|
| 24 |
+
level: Public speaking level (1-5)
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
dict: Analysis results
|
| 28 |
+
"""
|
| 29 |
+
start_time = time.time()
|
| 30 |
+
redis_client = get_redis_client().get_client()
|
| 31 |
+
task_key = f"task:{task_id}"
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
logger.info(f"πΉ Processing task {task_id} (Level {level})")
|
| 35 |
+
|
| 36 |
+
# Update task status to PROCESSING
|
| 37 |
+
update_task_status(
|
| 38 |
+
redis_client,
|
| 39 |
+
task_key,
|
| 40 |
+
TaskStatus.PROCESSING,
|
| 41 |
+
progress={
|
| 42 |
+
"current_step": "initializing",
|
| 43 |
+
"percentage": 0,
|
| 44 |
+
"message": "Initializing video processing..."
|
| 45 |
+
}
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Initialize video processor
|
| 49 |
+
processor = VideoProcessor()
|
| 50 |
+
|
| 51 |
+
# Update progress: Loading models
|
| 52 |
+
update_task_status(
|
| 53 |
+
redis_client,
|
| 54 |
+
task_key,
|
| 55 |
+
TaskStatus.PROCESSING,
|
| 56 |
+
progress={
|
| 57 |
+
"current_step": "loading_models",
|
| 58 |
+
"percentage": 10,
|
| 59 |
+
"message": "Loading AI models..."
|
| 60 |
+
}
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Process video based on level
|
| 64 |
+
result = processor.process_video(
|
| 65 |
+
video_path=video_path,
|
| 66 |
+
level=level,
|
| 67 |
+
progress_callback=lambda step, pct, msg: update_task_status(
|
| 68 |
+
redis_client,
|
| 69 |
+
task_key,
|
| 70 |
+
TaskStatus.PROCESSING,
|
| 71 |
+
progress={
|
| 72 |
+
"current_step": step,
|
| 73 |
+
"percentage": pct,
|
| 74 |
+
"message": msg
|
| 75 |
+
}
|
| 76 |
+
)
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Calculate processing time
|
| 80 |
+
processing_time = time.time() - start_time
|
| 81 |
+
result["processing_time"] = round(processing_time, 2)
|
| 82 |
+
|
| 83 |
+
# Update task status to COMPLETED
|
| 84 |
+
update_task_status(
|
| 85 |
+
redis_client,
|
| 86 |
+
task_key,
|
| 87 |
+
TaskStatus.COMPLETED,
|
| 88 |
+
result=result,
|
| 89 |
+
completed_at=datetime.utcnow().isoformat()
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
logger.info(f"β
Task {task_id} completed in {processing_time:.2f}s")
|
| 93 |
+
|
| 94 |
+
# Cleanup video file
|
| 95 |
+
storage = get_storage_manager()
|
| 96 |
+
storage.delete_video(video_path)
|
| 97 |
+
|
| 98 |
+
return result
|
| 99 |
+
|
| 100 |
+
except Exception as e:
|
| 101 |
+
logger.error(f"β Task {task_id} failed: {e}")
|
| 102 |
+
|
| 103 |
+
# Update task status to FAILED
|
| 104 |
+
update_task_status(
|
| 105 |
+
redis_client,
|
| 106 |
+
task_key,
|
| 107 |
+
TaskStatus.FAILED,
|
| 108 |
+
error=str(e),
|
| 109 |
+
completed_at=datetime.utcnow().isoformat()
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Cleanup video file
|
| 113 |
+
try:
|
| 114 |
+
storage = get_storage_manager()
|
| 115 |
+
storage.delete_video(video_path)
|
| 116 |
+
except:
|
| 117 |
+
pass
|
| 118 |
+
|
| 119 |
+
raise
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def update_task_status(
|
| 123 |
+
redis_client,
|
| 124 |
+
task_key: str,
|
| 125 |
+
status: TaskStatus,
|
| 126 |
+
progress: dict = None,
|
| 127 |
+
result: dict = None,
|
| 128 |
+
error: str = None,
|
| 129 |
+
completed_at: str = None
|
| 130 |
+
):
|
| 131 |
+
"""
|
| 132 |
+
Update task status in Redis
|
| 133 |
+
|
| 134 |
+
Args:
|
| 135 |
+
redis_client: Redis client instance
|
| 136 |
+
task_key: Redis key for task
|
| 137 |
+
status: Task status
|
| 138 |
+
progress: Progress information (optional)
|
| 139 |
+
result: Analysis result (optional)
|
| 140 |
+
error: Error message (optional)
|
| 141 |
+
completed_at: Completion timestamp (optional)
|
| 142 |
+
"""
|
| 143 |
+
try:
|
| 144 |
+
# Get existing task data
|
| 145 |
+
task_data_str = redis_client.get(task_key)
|
| 146 |
+
if not task_data_str:
|
| 147 |
+
logger.warning(f"Task key {task_key} not found in Redis")
|
| 148 |
+
return
|
| 149 |
+
|
| 150 |
+
task_data = json.loads(task_data_str)
|
| 151 |
+
|
| 152 |
+
# Update fields
|
| 153 |
+
task_data["status"] = status.value
|
| 154 |
+
|
| 155 |
+
if progress:
|
| 156 |
+
task_data["progress"] = progress
|
| 157 |
+
|
| 158 |
+
if result:
|
| 159 |
+
task_data["result"] = result
|
| 160 |
+
|
| 161 |
+
if error:
|
| 162 |
+
task_data["error"] = error
|
| 163 |
+
|
| 164 |
+
if completed_at:
|
| 165 |
+
task_data["completed_at"] = completed_at
|
| 166 |
+
|
| 167 |
+
# Save back to Redis
|
| 168 |
+
redis_client.set(task_key, json.dumps(task_data))
|
| 169 |
+
|
| 170 |
+
except Exception as e:
|
| 171 |
+
logger.error(f"Failed to update task status: {e}")
|
app/worker.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RQ Worker Entry Point
|
| 3 |
+
"""
|
| 4 |
+
import sys
|
| 5 |
+
from loguru import logger
|
| 6 |
+
from redis import Redis
|
| 7 |
+
from rq import Worker, Queue, Connection
|
| 8 |
+
|
| 9 |
+
from app.config import settings
|
| 10 |
+
from app.core.redis_client import get_redis_client
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
# Configure logging
|
| 14 |
+
logger.remove()
|
| 15 |
+
logger.add(
|
| 16 |
+
sys.stdout,
|
| 17 |
+
format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan> - <level>{message}</level>",
|
| 18 |
+
level=settings.LOG_LEVEL
|
| 19 |
+
)
|
| 20 |
+
logger.add(
|
| 21 |
+
"logs/swara_worker_{time:YYYY-MM-DD}.log",
|
| 22 |
+
rotation="1 day",
|
| 23 |
+
retention="7 days",
|
| 24 |
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function} - {message}",
|
| 25 |
+
level=settings.LOG_LEVEL
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def main():
|
| 30 |
+
"""Start RQ worker"""
|
| 31 |
+
logger.info("=" * 70)
|
| 32 |
+
logger.info("π§ SWARA Worker Starting...")
|
| 33 |
+
logger.info("=" * 70)
|
| 34 |
+
logger.info(f"Environment: {settings.ENV}")
|
| 35 |
+
logger.info(f"Redis URL: {settings.REDIS_URL}")
|
| 36 |
+
logger.info(f"Queue Name: {settings.TASK_QUEUE_NAME}")
|
| 37 |
+
|
| 38 |
+
try:
|
| 39 |
+
# Connect to Redis
|
| 40 |
+
redis_client = get_redis_client()
|
| 41 |
+
redis_conn = redis_client.connect()
|
| 42 |
+
logger.info("β Redis connected")
|
| 43 |
+
|
| 44 |
+
# Create queue
|
| 45 |
+
queue = Queue(settings.TASK_QUEUE_NAME, connection=redis_conn)
|
| 46 |
+
logger.info(f"β Queue '{settings.TASK_QUEUE_NAME}' initialized")
|
| 47 |
+
|
| 48 |
+
logger.info("=" * 70)
|
| 49 |
+
logger.info("β Worker ready and listening for tasks...")
|
| 50 |
+
logger.info("=" * 70)
|
| 51 |
+
|
| 52 |
+
# Start worker
|
| 53 |
+
with Connection(redis_conn):
|
| 54 |
+
worker = Worker([queue])
|
| 55 |
+
worker.work(with_scheduler=True)
|
| 56 |
+
|
| 57 |
+
except KeyboardInterrupt:
|
| 58 |
+
logger.info("\nπ Worker interrupted by user")
|
| 59 |
+
except Exception as e:
|
| 60 |
+
logger.error(f"β Worker failed: {e}")
|
| 61 |
+
sys.exit(1)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
main()
|
models/.gitkeep
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Models directory
|
| 2 |
+
|
| 3 |
+
# Place your AI model files (.onnx, .pt, etc) here
|
models/best.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf443423d8fcd415869dcec043c4332be6f06bdd5034af0b7b31fc362ece1908
|
| 3 |
+
size 10586597
|
requirements.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Web Framework
|
| 2 |
+
fastapi==0.115.5
|
| 3 |
+
uvicorn[standard]==0.32.1
|
| 4 |
+
python-multipart==0.0.20
|
| 5 |
+
pydantic==2.10.3
|
| 6 |
+
pydantic-settings==2.6.1
|
| 7 |
+
|
| 8 |
+
# Background Jobs
|
| 9 |
+
rq==2.0.0
|
| 10 |
+
redis==5.2.1
|
| 11 |
+
|
| 12 |
+
# Computer Vision & AI
|
| 13 |
+
opencv-python==4.10.0.84
|
| 14 |
+
mediapipe==0.10.21
|
| 15 |
+
numpy==1.26.4
|
| 16 |
+
ultralytics==8.3.52
|
| 17 |
+
|
| 18 |
+
# Data Processing
|
| 19 |
+
pandas==2.2.3
|
| 20 |
+
matplotlib==3.10.0
|
| 21 |
+
scipy==1.14.1
|
| 22 |
+
|
| 23 |
+
# Utilities
|
| 24 |
+
python-dotenv==1.0.1
|
| 25 |
+
httpx==0.28.1
|
| 26 |
+
aiofiles==24.1.0
|
| 27 |
+
|
| 28 |
+
# Logging & Monitoring
|
| 29 |
+
loguru==0.7.3
|