Spaces:
Runtime error
Runtime error
feat(space): CPU ONNX runtime build (v9.4, full-song sliding aggregation)
Browse files- Replace legacy core/ui architecture with hf-spaces package
- Use intrect/artifactnet-models ONNX (production-onnx/*.onnx)
- Full-song sliding chunk aggregation (matches production)
- Upload-only (YouTube removed), error report to api.intrect.io
- ffmpeg-based opus/m4a support
- .gitignore +0 -40
- Dockerfile.youtube-proxy +0 -62
- HF_SPACES_ENV.md +0 -139
- README.md +38 -22
- app.py +593 -611
- config.py +28 -30
- core/__init__.py +0 -7
- core/__pycache__/proprietary.cpython-312.pyc +0 -0
- core/proprietary.py +0 -192
- docker-compose.youtube-proxy.yml +0 -36
- inference/audio_utils.py +140 -54
- inference/e2e_model.py +293 -49
- inference/model.py +398 -0
- models +0 -1
- packages.txt +2 -1
- requirements.txt +11 -15
- ui/__init__.py +0 -14
- ui/components.py +0 -112
- ui/verdict_card.py +0 -189
- visualization/feature_bars.py +110 -0
- visualization/radar.py +164 -0
- visualization/spectrogram.py +1 -2
- visualization/timeline.py +143 -39
- youtube_proxy_server.py +0 -180
.gitignore
DELETED
|
@@ -1,40 +0,0 @@
|
|
| 1 |
-
# Python
|
| 2 |
-
__pycache__/
|
| 3 |
-
*.py[cod]
|
| 4 |
-
*.egg-info/
|
| 5 |
-
*.egg
|
| 6 |
-
dist/
|
| 7 |
-
build/
|
| 8 |
-
|
| 9 |
-
# IP Protection note: core/proprietary.py contains obfuscated algorithms
|
| 10 |
-
# (난독화된 알고리즘으로 특허 핵심 보호)
|
| 11 |
-
|
| 12 |
-
# Models (downloaded at runtime from HF Hub)
|
| 13 |
-
*.onnx
|
| 14 |
-
*.pt
|
| 15 |
-
*.onnx.data
|
| 16 |
-
|
| 17 |
-
# Environment
|
| 18 |
-
.env
|
| 19 |
-
.venv/
|
| 20 |
-
venv/
|
| 21 |
-
|
| 22 |
-
# IDE
|
| 23 |
-
.vscode/
|
| 24 |
-
.idea/
|
| 25 |
-
*.swp
|
| 26 |
-
*.swo
|
| 27 |
-
|
| 28 |
-
# OS
|
| 29 |
-
.DS_Store
|
| 30 |
-
Thumbs.db
|
| 31 |
-
|
| 32 |
-
# Gradio
|
| 33 |
-
flagged/
|
| 34 |
-
|
| 35 |
-
# Development files (not needed in HF Spaces)
|
| 36 |
-
CLAUDE.md
|
| 37 |
-
.claude/
|
| 38 |
-
local_demo_v77.py
|
| 39 |
-
testing/
|
| 40 |
-
trash/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Dockerfile.youtube-proxy
DELETED
|
@@ -1,62 +0,0 @@
|
|
| 1 |
-
# Multi-stage Dockerfile for YouTube Proxy Server
|
| 2 |
-
|
| 3 |
-
FROM python:3.11-slim as builder
|
| 4 |
-
|
| 5 |
-
# Install build dependencies
|
| 6 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
-
build-essential \
|
| 8 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
-
|
| 10 |
-
# Create virtual environment
|
| 11 |
-
RUN python -m venv /opt/venv
|
| 12 |
-
ENV PATH="/opt/venv/bin:$PATH"
|
| 13 |
-
|
| 14 |
-
# Copy and install Python dependencies
|
| 15 |
-
COPY requirements.txt .
|
| 16 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
-
|
| 18 |
-
# ============================================================
|
| 19 |
-
# Final stage
|
| 20 |
-
# ============================================================
|
| 21 |
-
|
| 22 |
-
FROM python:3.11-slim
|
| 23 |
-
|
| 24 |
-
# Install runtime dependencies (ffmpeg for yt-dlp)
|
| 25 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 26 |
-
ffmpeg \
|
| 27 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 28 |
-
|
| 29 |
-
# Copy virtual environment from builder
|
| 30 |
-
COPY --from=builder /opt/venv /opt/venv
|
| 31 |
-
ENV PATH="/opt/venv/bin:$PATH"
|
| 32 |
-
|
| 33 |
-
# Create non-root user for security (use UID 1001 to avoid conflicts)
|
| 34 |
-
RUN useradd -m -u 1001 appuser 2>/dev/null || true
|
| 35 |
-
|
| 36 |
-
# Set working directory
|
| 37 |
-
WORKDIR /app
|
| 38 |
-
|
| 39 |
-
# Copy application
|
| 40 |
-
COPY youtube_proxy_server.py .
|
| 41 |
-
|
| 42 |
-
# Change ownership
|
| 43 |
-
RUN chown -R appuser:appuser /app 2>/dev/null || true
|
| 44 |
-
|
| 45 |
-
# Switch to non-root user
|
| 46 |
-
USER appuser
|
| 47 |
-
|
| 48 |
-
# Expose port
|
| 49 |
-
EXPOSE 8765
|
| 50 |
-
|
| 51 |
-
# Health check
|
| 52 |
-
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
| 53 |
-
CMD python -c "import requests; requests.get('http://localhost:8765/health')" || exit 1
|
| 54 |
-
|
| 55 |
-
# Default environment variables
|
| 56 |
-
ENV HOST=0.0.0.0
|
| 57 |
-
ENV PORT=8765
|
| 58 |
-
ENV LOG_LEVEL=INFO
|
| 59 |
-
ENV YOUTUBE_PROXY_API_KEY=default-key
|
| 60 |
-
|
| 61 |
-
# Run application
|
| 62 |
-
CMD ["python", "youtube_proxy_server.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
HF_SPACES_ENV.md
DELETED
|
@@ -1,139 +0,0 @@
|
|
| 1 |
-
# HF Spaces 환경변수 설정 가이드
|
| 2 |
-
|
| 3 |
-
YouTube 프록시를 통해 HF Spaces 앱에서 YouTube URL 다운로드를 활성화하려면 다음 환경변수를 설정하세요.
|
| 4 |
-
|
| 5 |
-
## 설정 단계
|
| 6 |
-
|
| 7 |
-
### 1. cloudflared를 통한 외부 접근
|
| 8 |
-
|
| 9 |
-
youtube-proxy 서비스는 `youtube-proxy.intrect.io`를 통해 접근 가능합니다 (Cloudflare Tunnel 역프록시).
|
| 10 |
-
|
| 11 |
-
### 2. HF Spaces 시크릿 설정
|
| 12 |
-
|
| 13 |
-
HF Spaces 설정에서 다음 환경변수를 추가하세요:
|
| 14 |
-
|
| 15 |
-
#### `YOUTUBE_PROXY_URL`
|
| 16 |
-
```
|
| 17 |
-
https://youtube-proxy.intrect.io
|
| 18 |
-
```
|
| 19 |
-
|
| 20 |
-
#### `YOUTUBE_PROXY_API_KEY`
|
| 21 |
-
```
|
| 22 |
-
c60ba3dc9f26cfc700958983f82b997eac084743aad9f5be4db7bb625ae6dbbd
|
| 23 |
-
```
|
| 24 |
-
|
| 25 |
-
이는 `docker-compose.youtube-proxy.yml`의 `YOUTUBE_PROXY_API_KEY` 환경변수와 **정확히 동일**해야 합니다.
|
| 26 |
-
|
| 27 |
-
## 인증 흐름
|
| 28 |
-
|
| 29 |
-
1. HF Spaces 앱이 YouTube URL을 받으면
|
| 30 |
-
2. `YOUTUBE_PROXY_URL` 및 `YOUTUBE_PROXY_API_KEY` 사용
|
| 31 |
-
3. `https://youtube-proxy.intrect.io/download-youtube` 엔드포인트로 POST 요청
|
| 32 |
-
4. `Authorization: Bearer {YOUTUBE_PROXY_API_KEY}` 헤더 포함
|
| 33 |
-
5. 프록시 서버가 yt-dlp로 다운로드
|
| 34 |
-
6. WAV 파일 반환
|
| 35 |
-
|
| 36 |
-
## 보안 고려사항
|
| 37 |
-
|
| 38 |
-
- API 키는 **절대 공개하지 마세요**
|
| 39 |
-
- cloudflared 역프록시를 통해서만 접근 가능 (외부 포트 노출 없음)
|
| 40 |
-
- 컨테이너는 `proxy` 사용자로 실행 (root 아님)
|
| 41 |
-
- 최소 권한 원칙 준수
|
| 42 |
-
|
| 43 |
-
## 문제 해결
|
| 44 |
-
|
| 45 |
-
### HF Spaces에서 연결 실패
|
| 46 |
-
|
| 47 |
-
1. cloudflared 상태 확인:
|
| 48 |
-
```bash
|
| 49 |
-
sudo systemctl status cloudflared
|
| 50 |
-
```
|
| 51 |
-
|
| 52 |
-
2. youtube-proxy 컨테이너 상태 확인:
|
| 53 |
-
```bash
|
| 54 |
-
docker ps | grep youtube-proxy
|
| 55 |
-
docker logs artifactnet-youtube-proxy
|
| 56 |
-
```
|
| 57 |
-
|
| 58 |
-
3. DNS 확인:
|
| 59 |
-
```bash
|
| 60 |
-
curl -I https://youtube-proxy.intrect.io/health
|
| 61 |
-
```
|
| 62 |
-
|
| 63 |
-
### API 키 불일치
|
| 64 |
-
|
| 65 |
-
`docker-compose.youtube-proxy.yml`의 `YOUTUBE_PROXY_API_KEY`와 HF Spaces의 `YOUTUBE_PROXY_API_KEY`가 **정확히 동일**한지 확인하세요.
|
| 66 |
-
|
| 67 |
-
## Rate Limiting 설정 (권장)
|
| 68 |
-
|
| 69 |
-
과도한 요청과 연속 스팸으로부터 HF Spaces 및 ubuntu-mini 보호:
|
| 70 |
-
|
| 71 |
-
#### `RATE_LIMIT_REQUESTS`
|
| 72 |
-
```
|
| 73 |
-
5
|
| 74 |
-
```
|
| 75 |
-
(기본값: 5회, 1시간당)
|
| 76 |
-
|
| 77 |
-
#### `RATE_LIMIT_MINUTES`
|
| 78 |
-
```
|
| 79 |
-
60
|
| 80 |
-
```
|
| 81 |
-
(기본값: 60분 윈도우)
|
| 82 |
-
|
| 83 |
-
#### `BURST_LIMIT_PER_MINUTE`
|
| 84 |
-
```
|
| 85 |
-
2
|
| 86 |
-
```
|
| 87 |
-
(기본값: 최대 2회/분, 연속 요청 방지)
|
| 88 |
-
|
| 89 |
-
**동작:**
|
| 90 |
-
- **Burst 제한**: 사용자당 2회/분 (연속 요청 방지)
|
| 91 |
-
- **시간 제한**: 사용자당 5회/60분 (장기 남용 방지)
|
| 92 |
-
- 둘 다 만족해야 요청 허용
|
| 93 |
-
|
| 94 |
-
---
|
| 95 |
-
|
| 96 |
-
## 에지 케이스 수집 설정 (선택사항)
|
| 97 |
-
|
| 98 |
-
Uncertain 판정 곡의 분석 데이터를 자동으로 수집하려면:
|
| 99 |
-
|
| 100 |
-
#### `UBUNTU_MINI_ENABLED`
|
| 101 |
-
```
|
| 102 |
-
true
|
| 103 |
-
```
|
| 104 |
-
|
| 105 |
-
#### `UBUNTU_MINI_HOST`
|
| 106 |
-
```
|
| 107 |
-
ubuntu-mini.local
|
| 108 |
-
```
|
| 109 |
-
|
| 110 |
-
#### `UBUNTU_MINI_PORT`
|
| 111 |
-
```
|
| 112 |
-
9000
|
| 113 |
-
```
|
| 114 |
-
|
| 115 |
-
**수집되는 것:**
|
| 116 |
-
- Mel-spectrogram (30초 미만)
|
| 117 |
-
- 판정 통계
|
| 118 |
-
- 타임스탬프
|
| 119 |
-
|
| 120 |
-
**수집되지 않는 것:**
|
| 121 |
-
- 원본 오디오 파일
|
| 122 |
-
- 개인 정보
|
| 123 |
-
|
| 124 |
-
## 다음 단계
|
| 125 |
-
|
| 126 |
-
1. Docker 컨테이너 실행:
|
| 127 |
-
```bash
|
| 128 |
-
docker-compose -f docker-compose.youtube-proxy.yml up -d
|
| 129 |
-
```
|
| 130 |
-
|
| 131 |
-
2. 건강 체크:
|
| 132 |
-
```bash
|
| 133 |
-
curl -H "Authorization: Bearer <your-key>" \
|
| 134 |
-
https://youtube-proxy.intrect.io/health
|
| 135 |
-
```
|
| 136 |
-
|
| 137 |
-
3. HF Spaces에서 YouTube URL 탭이 나타나면 작동 중입니다.
|
| 138 |
-
|
| 139 |
-
4. Uncertain 곡이 자동으로 ubuntu-mini로 전송되는지 확인합니다.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
README.md
CHANGED
|
@@ -1,30 +1,46 @@
|
|
| 1 |
---
|
| 2 |
title: ArtifactNet
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
# ArtifactNet — AI Music Forensic Detector
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: ArtifactNet
|
| 3 |
+
emoji: 🎵
|
| 4 |
+
colorFrom: orange
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
+
python_version: "3.10"
|
| 10 |
+
pinned: true
|
| 11 |
+
license: other
|
| 12 |
+
short_description: AI-generated music detection (v9.4 Forensic CNN + HPSS)
|
| 13 |
+
hardware: cpu-basic
|
| 14 |
+
models:
|
| 15 |
+
- intrect/artifactnet-models
|
| 16 |
---
|
| 17 |
|
| 18 |
# ArtifactNet — AI Music Forensic Detector
|
| 19 |
|
| 20 |
+
Upload a track (WAV / MP3 / FLAC, ≤100 MB, ≤5 min). ArtifactNet analyses
|
| 21 |
+
spectral + harmonic-percussive forensic features and returns a per-segment
|
| 22 |
+
P(AI) distribution.
|
| 23 |
+
|
| 24 |
+
- **Backbone**: STFT → U-Net artifact residual → HPSS → 7-channel features → CNN
|
| 25 |
+
- **Verdict**: energy-weighted median across 4-second segments
|
| 26 |
+
- **Runtime**: ONNX Runtime on HF Space CPU (~30–60 s per 4-minute track)
|
| 27 |
+
|
| 28 |
+
## Paper
|
| 29 |
+
|
| 30 |
+
ArtifactNet: Forensic Detection of AI-Generated Music via HPSS and Residual
|
| 31 |
+
Analysis — [arXiv:2604.16254](https://arxiv.org/abs/2604.16254).
|
| 32 |
+
|
| 33 |
+
## Links
|
| 34 |
+
|
| 35 |
+
- Production dashboard: [dash.intrect.io](https://dash.intrect.io)
|
| 36 |
+
- Pricing / API: [intrect.io](https://intrect.io)
|
| 37 |
+
|
| 38 |
+
## Notes
|
| 39 |
+
|
| 40 |
+
- Short files (<60 s) have fewer segments and lower confidence.
|
| 41 |
+
- Mono input disables stereo phase features.
|
| 42 |
+
- Heavily processed audio (bitcrushing, vinyl rips) may affect results.
|
| 43 |
+
- YouTube / URL intake is disabled on this Space — use the dashboard for batch
|
| 44 |
+
processing.
|
| 45 |
+
- Only the ONNX graphs (inference-only, no training metadata) are published;
|
| 46 |
+
the original PyTorch checkpoints remain private.
|
app.py
CHANGED
|
@@ -1,611 +1,593 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
# Purpose: ArtifactNet HF Spaces
|
| 3 |
-
|
| 4 |
-
"""ArtifactNet — AI Music Forensic Detector.
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
import
|
| 14 |
-
import
|
| 15 |
-
import
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
import gradio as gr
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
from
|
| 29 |
-
from
|
| 30 |
-
from
|
| 31 |
-
from
|
| 32 |
-
from
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
#
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
""
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
""
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
#
|
| 140 |
-
#
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
"
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
#
|
| 246 |
-
#
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
if
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
)
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
)
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
)
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
| 531 |
-
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
return demo
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
# ============================================================
|
| 598 |
-
# Entry point — module-level demo object (required for HF Spaces)
|
| 599 |
-
# ============================================================
|
| 600 |
-
|
| 601 |
-
print("Loading model...", flush=True)
|
| 602 |
-
get_model()
|
| 603 |
-
print("Model ready.", flush=True)
|
| 604 |
-
|
| 605 |
-
demo = build_ui()
|
| 606 |
-
|
| 607 |
-
if __name__ == "__main__":
|
| 608 |
-
launch_kwargs = dict(server_name="0.0.0.0", server_port=7860)
|
| 609 |
-
if IS_HF_SPACES:
|
| 610 |
-
launch_kwargs["root_path"] = "/ArtifactNet"
|
| 611 |
-
demo.launch(**launch_kwargs)
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Purpose: ArtifactNet HF Spaces (ZeroGPU) — Gradio demo
|
| 3 |
+
|
| 4 |
+
"""ArtifactNet — AI Music Forensic Detector.
|
| 5 |
+
|
| 6 |
+
HF Spaces + ZeroGPU 전용 빌드.
|
| 7 |
+
- Upload-only (YouTube/URL 제거)
|
| 8 |
+
- Remote inference / residual snapshot / sqlite 로그 제거
|
| 9 |
+
- Error report 는 api.intrect.io 로 POST (옵션)
|
| 10 |
+
- AcoustID 제거 (API key 비공개 유지)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import json
|
| 14 |
+
import os
|
| 15 |
+
import sys
|
| 16 |
+
import tempfile
|
| 17 |
+
import time
|
| 18 |
+
import warnings
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
import gradio as gr
|
| 22 |
+
import numpy as np
|
| 23 |
+
import requests as _requests
|
| 24 |
+
import torch
|
| 25 |
+
|
| 26 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 27 |
+
|
| 28 |
+
from config import SR, CHUNK_SAMPLES, MIN_CONFIDENT_DURATION
|
| 29 |
+
from inference.audio_utils import load_audio_mono_tensor, get_audio_info
|
| 30 |
+
from inference.e2e_model import run_e2e_inference, load_models
|
| 31 |
+
from visualization.feature_bars import plot_feature_bars
|
| 32 |
+
from visualization.radar import plot_forensic_radar, forensic_features_explanation
|
| 33 |
+
from visualization.spectrogram import plot_spectrograms
|
| 34 |
+
from visualization.timeline import plot_timeline
|
| 35 |
+
|
| 36 |
+
warnings.filterwarnings("ignore")
|
| 37 |
+
|
| 38 |
+
API_BASE = os.environ.get("INTRECT_API_BASE", "https://api.intrect.io")
|
| 39 |
+
|
| 40 |
+
# ============================================================
|
| 41 |
+
# Upload validation
|
| 42 |
+
# ============================================================
|
| 43 |
+
|
| 44 |
+
_AUDIO_MAGIC = {
|
| 45 |
+
b"RIFF": "wav",
|
| 46 |
+
b"fLaC": "flac",
|
| 47 |
+
b"\xff\xfb": "mp3",
|
| 48 |
+
b"\xff\xf3": "mp3",
|
| 49 |
+
b"\xff\xf2": "mp3",
|
| 50 |
+
b"ID3": "mp3",
|
| 51 |
+
b"OggS": "ogg",
|
| 52 |
+
}
|
| 53 |
+
_FTYP_BRANDS = {b"M4A ", b"isom", b"mp42", b"dash", b"MSNV"}
|
| 54 |
+
_MAX_UPLOAD_BYTES = 100 * 1024 * 1024
|
| 55 |
+
_ALLOWED_EXTENSIONS = {".wav", ".flac", ".mp3", ".ogg", ".opus", ".m4a", ".aac", ".webm"}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _validate_audio_file(path: str) -> str | None:
|
| 59 |
+
if not os.path.isfile(path):
|
| 60 |
+
return "<p style='color:#ff4757'>파일을 찾을 수 없습니다.</p>"
|
| 61 |
+
file_size = os.path.getsize(path)
|
| 62 |
+
if file_size > _MAX_UPLOAD_BYTES:
|
| 63 |
+
mb = file_size / 1024 / 1024
|
| 64 |
+
return f"<p style='color:#ff4757'>파일이 너무 큽니다 ({mb:.0f}MB). 최대 100MB까지 허용됩니다.</p>"
|
| 65 |
+
if file_size < 100:
|
| 66 |
+
return "<p style='color:#ff4757'>파일이 너무 작습니다.</p>"
|
| 67 |
+
|
| 68 |
+
ext = os.path.splitext(path)[1].lower()
|
| 69 |
+
if ext not in _ALLOWED_EXTENSIONS:
|
| 70 |
+
return (f"<p style='color:#ff4757'>지원하지 않는 형식입니다 ({ext}). "
|
| 71 |
+
f"WAV, FLAC, MP3, OGG, Opus, M4A만 지원합니다.</p>")
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
with open(path, "rb") as f:
|
| 75 |
+
header = f.read(12)
|
| 76 |
+
except Exception:
|
| 77 |
+
return "<p style='color:#ff4757'>파일을 읽을 수 없습니다.</p>"
|
| 78 |
+
|
| 79 |
+
detected = None
|
| 80 |
+
for magic, fmt in _AUDIO_MAGIC.items():
|
| 81 |
+
if header[:len(magic)] == magic:
|
| 82 |
+
detected = fmt
|
| 83 |
+
break
|
| 84 |
+
if detected is None and header[4:8] == b"ftyp":
|
| 85 |
+
if header[8:12] in _FTYP_BRANDS:
|
| 86 |
+
detected = "m4a"
|
| 87 |
+
if detected is None and header[:4] == b"\x1a\x45\xdf\xa3":
|
| 88 |
+
detected = "webm"
|
| 89 |
+
|
| 90 |
+
if detected is None:
|
| 91 |
+
return ("<p style='color:#ff4757'>유효한 오디오 파일이 아닙니다.</p>")
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# ============================================================
|
| 96 |
+
# Verdict stats
|
| 97 |
+
# ============================================================
|
| 98 |
+
_MEDIAN_THRESHOLD = 0.5
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _compute_segment_stats(chunk_probs, chunk_metadata=None):
|
| 102 |
+
arr = np.array(chunk_probs)
|
| 103 |
+
n = len(arr)
|
| 104 |
+
q25, q50, q75 = np.percentile(arr, [25, 50, 75])
|
| 105 |
+
|
| 106 |
+
if chunk_metadata and len(chunk_metadata) == len(chunk_probs):
|
| 107 |
+
rms_arr = np.array([m.get('rms', 1.0) for m in chunk_metadata])
|
| 108 |
+
median_rms = np.median(rms_arr)
|
| 109 |
+
weights = rms_arr / (median_rms + 1e-10)
|
| 110 |
+
weights = weights / weights.sum()
|
| 111 |
+
sorted_indices = np.argsort(arr)
|
| 112 |
+
sorted_probs = arr[sorted_indices]
|
| 113 |
+
sorted_weights = weights[sorted_indices]
|
| 114 |
+
cumsum_weights = np.cumsum(sorted_weights)
|
| 115 |
+
idx = np.searchsorted(cumsum_weights, 0.5)
|
| 116 |
+
weighted_median = float(sorted_probs[min(idx, len(sorted_probs) - 1)])
|
| 117 |
+
else:
|
| 118 |
+
weighted_median = float(q50)
|
| 119 |
+
|
| 120 |
+
return {
|
| 121 |
+
"n": n,
|
| 122 |
+
"mean": float(np.mean(arr)),
|
| 123 |
+
"median": float(q50),
|
| 124 |
+
"weighted_median": weighted_median,
|
| 125 |
+
"q25": float(q25),
|
| 126 |
+
"q75": float(q75),
|
| 127 |
+
"iqr": float(q75 - q25),
|
| 128 |
+
"std": float(np.std(arr)),
|
| 129 |
+
"pct_high": float((arr >= 0.8).sum() / n) if n else 0.0,
|
| 130 |
+
"pct_above_50": float((arr >= 0.5).sum() / n) if n else 0.0,
|
| 131 |
+
"pct_low": float((arr < 0.2).sum() / n) if n else 0.0,
|
| 132 |
+
"n_high": int((arr >= 0.8).sum()),
|
| 133 |
+
"n_mid": int(((arr >= 0.5) & (arr < 0.8)).sum()),
|
| 134 |
+
"n_low": int((arr < 0.5).sum()),
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ============================================================
|
| 139 |
+
# Verdict HTML card
|
| 140 |
+
# ============================================================
|
| 141 |
+
|
| 142 |
+
def _verdict_html(verdict, stats, is_stereo, duration=0, elapsed=0,
|
| 143 |
+
is_short=False, audio_format=""):
|
| 144 |
+
if verdict == "No file":
|
| 145 |
+
return """
|
| 146 |
+
<div style="text-align:center;padding:30px;background:#16213e;
|
| 147 |
+
border-radius:12px;color:#888;">
|
| 148 |
+
<p style="font-size:16px;">Upload an audio file to begin analysis</p>
|
| 149 |
+
</div>"""
|
| 150 |
+
|
| 151 |
+
mean_prob = stats["mean"]
|
| 152 |
+
median_prob = stats["median"]
|
| 153 |
+
pct_high = stats["pct_high"]
|
| 154 |
+
n_total = stats["n"]
|
| 155 |
+
|
| 156 |
+
if verdict == "AI Generated":
|
| 157 |
+
color = "#ff4757"
|
| 158 |
+
icon = "⚠"
|
| 159 |
+
desc = f"{pct_high:.0%} of segments show strong AI indicators"
|
| 160 |
+
elif verdict == "Partial AI":
|
| 161 |
+
color = "#ffa502"
|
| 162 |
+
icon = "⚠"
|
| 163 |
+
iqr = stats.get("iqr", 0)
|
| 164 |
+
desc = f"Bimodal distribution (IQR={iqr:.2f}) — possible AI vocals over human instrumental"
|
| 165 |
+
else:
|
| 166 |
+
color = "#2ed573"
|
| 167 |
+
icon = "✓"
|
| 168 |
+
desc = "No significant AI generation indicators found"
|
| 169 |
+
|
| 170 |
+
channels = "Stereo" if is_stereo else "Mono"
|
| 171 |
+
n_high, n_mid, n_low = stats["n_high"], stats["n_mid"], stats["n_low"]
|
| 172 |
+
if n_total > 0:
|
| 173 |
+
pct_h = n_high / n_total * 100
|
| 174 |
+
pct_m = n_mid / n_total * 100
|
| 175 |
+
pct_l = n_low / n_total * 100
|
| 176 |
+
else:
|
| 177 |
+
pct_h = pct_m = 0.0
|
| 178 |
+
pct_l = 100.0
|
| 179 |
+
|
| 180 |
+
short_warn = ""
|
| 181 |
+
if is_short:
|
| 182 |
+
short_warn = f"""
|
| 183 |
+
<div style="margin-top:8px;padding:8px 12px;background:rgba(255,165,2,0.15);
|
| 184 |
+
border-radius:6px;border-left:3px solid #ffa502;font-size:12px;
|
| 185 |
+
color:#ccc;line-height:1.5;">
|
| 186 |
+
<b style="color:#ffa502;">Short file ({duration:.0f}s):</b>
|
| 187 |
+
Files under {MIN_CONFIDENT_DURATION}s have fewer segments for analysis.
|
| 188 |
+
Use tracks longer than {MIN_CONFIDENT_DURATION}s for best results.
|
| 189 |
+
</div>"""
|
| 190 |
+
|
| 191 |
+
mono_warn = ""
|
| 192 |
+
if not is_stereo:
|
| 193 |
+
mono_warn = """
|
| 194 |
+
<div style="margin-top:8px;padding:6px 10px;background:rgba(255,165,2,0.15);
|
| 195 |
+
border-radius:6px;border-left:3px solid #ffa502;font-size:12px;">
|
| 196 |
+
Mono input — stereo phase features unavailable.
|
| 197 |
+
</div>"""
|
| 198 |
+
|
| 199 |
+
return f"""
|
| 200 |
+
<div style="text-align:center;padding:20px;background:#16213e;
|
| 201 |
+
border-radius:12px;border:2px solid {color};">
|
| 202 |
+
<div style="font-size:14px;color:{color};letter-spacing:1px;
|
| 203 |
+
text-transform:uppercase;font-weight:600;">
|
| 204 |
+
{icon} Verdict
|
| 205 |
+
</div>
|
| 206 |
+
<div style="font-size:32px;font-weight:bold;color:{color};
|
| 207 |
+
letter-spacing:2px;margin:6px 0;">{verdict.upper()}</div>
|
| 208 |
+
<div style="color:#aaa;font-size:13px;margin-bottom:10px;">{desc}</div>
|
| 209 |
+
<div style="font-size:36px;font-weight:bold;color:white;margin:4px 0;">
|
| 210 |
+
median={median_prob:.1%}
|
| 211 |
+
<span style="font-size:18px;color:#888;">mean={mean_prob:.1%}</span>
|
| 212 |
+
</div>
|
| 213 |
+
<div style="margin:10px auto;max-width:320px;">
|
| 214 |
+
<div style="height:14px;background:#333;border-radius:7px;
|
| 215 |
+
overflow:hidden;display:flex;">
|
| 216 |
+
<div style="width:{pct_h:.1f}%;background:#ff4757;"></div>
|
| 217 |
+
<div style="width:{pct_m:.1f}%;background:#ffa502;"></div>
|
| 218 |
+
<div style="width:{pct_l:.1f}%;background:#2ed573;"></div>
|
| 219 |
+
</div>
|
| 220 |
+
<div style="display:flex;justify-content:space-between;
|
| 221 |
+
font-size:10px;color:#888;margin-top:2px;">
|
| 222 |
+
<span style="color:#ff4757;">{n_high} high</span>
|
| 223 |
+
<span style="color:#ffa502;">{n_mid} mid</span>
|
| 224 |
+
<span style="color:#2ed573;">{n_low} low</span>
|
| 225 |
+
</div>
|
| 226 |
+
</div>
|
| 227 |
+
<div style="color:#999;font-size:13px;margin-top:10px;">
|
| 228 |
+
{n_total} segments |
|
| 229 |
+
IQR={stats['iqr']:.2f} |
|
| 230 |
+
{channels} |
|
| 231 |
+
{duration:.1f}s |
|
| 232 |
+
{elapsed:.1f}s
|
| 233 |
+
</div>
|
| 234 |
+
<div style="display:flex;justify-content:center;gap:12px;margin-top:8px;">
|
| 235 |
+
<span style="background:#16213e;border:1px solid #333;border-radius:6px;
|
| 236 |
+
padding:4px 10px;font-size:12px;color:#3498db;">
|
| 237 |
+
Format: <b>{audio_format}</b>
|
| 238 |
+
</span>
|
| 239 |
+
</div>
|
| 240 |
+
{short_warn}
|
| 241 |
+
{mono_warn}
|
| 242 |
+
</div>"""
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
# ============================================================
|
| 246 |
+
# Main analysis (Upload only)
|
| 247 |
+
# ============================================================
|
| 248 |
+
|
| 249 |
+
def analyze_audio(audio_path, progress=gr.Progress()):
|
| 250 |
+
if audio_path is None:
|
| 251 |
+
return (
|
| 252 |
+
_verdict_html("No file", {}, False, 0, 0, False),
|
| 253 |
+
None, None, None, None, None, None, {},
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
file_err = _validate_audio_file(audio_path)
|
| 257 |
+
if file_err:
|
| 258 |
+
return file_err, None, None, None, None, None, None, {}
|
| 259 |
+
|
| 260 |
+
progress(0, desc="🎵 Loading audio...")
|
| 261 |
+
t0 = time.time()
|
| 262 |
+
|
| 263 |
+
try:
|
| 264 |
+
mono_tensor, audio_np, is_stereo = load_audio_mono_tensor(audio_path)
|
| 265 |
+
except Exception as e:
|
| 266 |
+
err = f"<p style='color:#ff4757'>Error loading audio: {e}</p>"
|
| 267 |
+
return err, None, None, None, None, None, None, {}
|
| 268 |
+
|
| 269 |
+
info = get_audio_info(audio_np, is_stereo)
|
| 270 |
+
mono_np = mono_tensor.numpy()
|
| 271 |
+
duration = info["duration"]
|
| 272 |
+
|
| 273 |
+
progress(0.2, desc="🔬 Running AI forensic analysis on CPU (ONNX)...")
|
| 274 |
+
chunk_probs, _, chunk_metadata, forensic_stats, router_feat, verdict_feat = \
|
| 275 |
+
run_e2e_inference(mono_tensor)
|
| 276 |
+
|
| 277 |
+
progress(0.6, desc="📊 Computing distribution statistics...")
|
| 278 |
+
seg_stats = _compute_segment_stats(chunk_probs, chunk_metadata)
|
| 279 |
+
elapsed = time.time() - t0
|
| 280 |
+
|
| 281 |
+
progress(0.8, desc="🎨 Generating visualizations...")
|
| 282 |
+
is_short = duration < MIN_CONFIDENT_DURATION
|
| 283 |
+
|
| 284 |
+
audio_ext = os.path.splitext(audio_path)[1].lower()
|
| 285 |
+
fmt_map = {".wav": "WAV", ".flac": "FLAC", ".mp3": "MP3",
|
| 286 |
+
".opus": "Opus", ".ogg": "OGG", ".m4a": "M4A",
|
| 287 |
+
".aac": "AAC", ".webm": "WebM"}
|
| 288 |
+
audio_format = fmt_map.get(audio_ext, audio_ext.lstrip(".").upper() or "Unknown")
|
| 289 |
+
|
| 290 |
+
median_prob = seg_stats.get("weighted_median", seg_stats["median"])
|
| 291 |
+
verdict = "AI Generated" if median_prob >= _MEDIAN_THRESHOLD else "Human-Made"
|
| 292 |
+
|
| 293 |
+
iqr = seg_stats.get("iqr", 0)
|
| 294 |
+
n_high = seg_stats.get("n_high", 0)
|
| 295 |
+
n_low = seg_stats.get("n_low", 0)
|
| 296 |
+
n_total = seg_stats.get("n", 1)
|
| 297 |
+
if (iqr >= 0.4
|
| 298 |
+
and n_high >= max(3, n_total * 0.1)
|
| 299 |
+
and n_low >= max(3, n_total * 0.1)):
|
| 300 |
+
verdict = "Partial AI"
|
| 301 |
+
|
| 302 |
+
verdict_html = _verdict_html(
|
| 303 |
+
verdict, seg_stats, is_stereo,
|
| 304 |
+
duration=duration, elapsed=elapsed,
|
| 305 |
+
is_short=is_short, audio_format=audio_format,
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
spec_fig = plot_spectrograms(mono_np)
|
| 309 |
+
timeline_fig = plot_timeline(
|
| 310 |
+
chunk_probs, mono_np, chunk_metadata,
|
| 311 |
+
weighted_median=seg_stats.get("weighted_median")
|
| 312 |
+
)
|
| 313 |
+
radar_fig = plot_forensic_radar(forensic_stats)
|
| 314 |
+
bars_fig = plot_feature_bars(forensic_stats)
|
| 315 |
+
forensic_explanation = forensic_features_explanation()
|
| 316 |
+
|
| 317 |
+
filename = os.path.basename(audio_path) if audio_path else "unknown"
|
| 318 |
+
result_json = {
|
| 319 |
+
"filename": filename,
|
| 320 |
+
"verdict": verdict,
|
| 321 |
+
"is_short_file": is_short,
|
| 322 |
+
"duration_sec": round(duration, 2),
|
| 323 |
+
"is_stereo": is_stereo,
|
| 324 |
+
"elapsed_sec": round(elapsed, 2),
|
| 325 |
+
"segment_stats": {k: round(v, 4) if isinstance(v, float) else v
|
| 326 |
+
for k, v in seg_stats.items()},
|
| 327 |
+
"segment_probs": [round(p, 4) for p in chunk_probs],
|
| 328 |
+
"format": audio_format,
|
| 329 |
+
}
|
| 330 |
+
json_path = os.path.join(tempfile.gettempdir(), "artifactnet_result.json")
|
| 331 |
+
with open(json_path, "w") as f:
|
| 332 |
+
json.dump(result_json, f, indent=2)
|
| 333 |
+
|
| 334 |
+
progress(1.0, desc="✅ Analysis complete!")
|
| 335 |
+
|
| 336 |
+
analysis_state = {
|
| 337 |
+
"filename": filename,
|
| 338 |
+
"duration": duration,
|
| 339 |
+
"is_stereo": is_stereo,
|
| 340 |
+
"elapsed": elapsed,
|
| 341 |
+
"verdict": verdict,
|
| 342 |
+
"forensic_stats": forensic_stats,
|
| 343 |
+
"seg_stats": seg_stats,
|
| 344 |
+
"chunk_probs": chunk_probs,
|
| 345 |
+
"is_short": is_short,
|
| 346 |
+
"predicted_verdict": "ai" if verdict == "AI Generated" else (
|
| 347 |
+
"real" if verdict == "Human-Made" else "unknown"
|
| 348 |
+
),
|
| 349 |
+
"predicted_probability": round(median_prob, 6),
|
| 350 |
+
}
|
| 351 |
+
return verdict_html, spec_fig, timeline_fig, radar_fig, bars_fig, forensic_explanation, json_path, analysis_state
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
# ============================================================
|
| 355 |
+
# Error report → api.intrect.io
|
| 356 |
+
# ============================================================
|
| 357 |
+
|
| 358 |
+
def submit_error_report(analysis_state, reported_as: str, comment: str):
|
| 359 |
+
if not analysis_state or not analysis_state.get("filename"):
|
| 360 |
+
return gr.update(visible=True,
|
| 361 |
+
value='<span style="color:#ff7675;font-size:12px;">Please analyze a file first.</span>')
|
| 362 |
+
|
| 363 |
+
meta = {
|
| 364 |
+
"filename": analysis_state.get("filename"),
|
| 365 |
+
"reported_as": (reported_as or "unsure").lower(),
|
| 366 |
+
"comment": (comment or "").strip()[:500],
|
| 367 |
+
"predicted_verdict": analysis_state.get("predicted_verdict"),
|
| 368 |
+
"predicted_probability": analysis_state.get("predicted_probability"),
|
| 369 |
+
"source_hint": "hf-space",
|
| 370 |
+
}
|
| 371 |
+
try:
|
| 372 |
+
with _requests.Session() as s:
|
| 373 |
+
r = s.post(
|
| 374 |
+
f"{API_BASE.rstrip('/')}/v1/reports",
|
| 375 |
+
data={"report": json.dumps(meta)},
|
| 376 |
+
timeout=10,
|
| 377 |
+
)
|
| 378 |
+
if r.status_code >= 300:
|
| 379 |
+
try:
|
| 380 |
+
detail = r.json().get("detail", r.text[:200])
|
| 381 |
+
except Exception:
|
| 382 |
+
detail = r.text[:200]
|
| 383 |
+
return gr.update(visible=True,
|
| 384 |
+
value=f'<span style="color:#ff7675;font-size:12px;">Report failed: {detail}</span>')
|
| 385 |
+
except Exception as e:
|
| 386 |
+
return gr.update(visible=True,
|
| 387 |
+
value=f'<span style="color:#ff7675;font-size:12px;">Report failed: {e}</span>')
|
| 388 |
+
|
| 389 |
+
return gr.update(
|
| 390 |
+
visible=True,
|
| 391 |
+
value='<span style="color:#2ed573;font-size:12px;">✅ Thanks! Report submitted.</span>',
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
# ============================================================
|
| 396 |
+
# Gradio UI
|
| 397 |
+
# ============================================================
|
| 398 |
+
|
| 399 |
+
def build_ui():
|
| 400 |
+
theme = gr.themes.Base(
|
| 401 |
+
primary_hue="orange",
|
| 402 |
+
secondary_hue="blue",
|
| 403 |
+
neutral_hue="slate",
|
| 404 |
+
font=gr.themes.GoogleFont("Inter"),
|
| 405 |
+
).set(
|
| 406 |
+
body_background_fill="#0f0f23",
|
| 407 |
+
block_background_fill="#1a1a2e",
|
| 408 |
+
block_border_color="#333",
|
| 409 |
+
input_background_fill="#16213e",
|
| 410 |
+
button_primary_background_fill="#ffa502",
|
| 411 |
+
button_primary_text_color="black",
|
| 412 |
+
)
|
| 413 |
+
|
| 414 |
+
custom_css = """
|
| 415 |
+
.gradio-container { margin: 0 auto !important; }
|
| 416 |
+
footer { display: none !important; }
|
| 417 |
+
.gr-button-primary { border-radius: 8px !important; font-weight: 600 !important; }
|
| 418 |
+
.gr-input, .gr-box { border-color: #333 !important; }
|
| 419 |
+
.gr-panel { border-color: #333 !important; }
|
| 420 |
+
h1, h2, h3 { font-family: 'Inter', sans-serif !important; }
|
| 421 |
+
.demo-nav { display: flex; justify-content: space-between; align-items: center;
|
| 422 |
+
padding: 12px 20px; border-bottom: 1px solid #333; margin: -16px -16px 16px; }
|
| 423 |
+
.demo-nav a { color: #8b949e; text-decoration: none; font-size: 13px; }
|
| 424 |
+
.demo-nav a:hover { color: #ffa502; }
|
| 425 |
+
.demo-nav .brand { color: #ffa502; font-weight: 700; font-size: 16px; letter-spacing: 2px; text-transform: uppercase; }
|
| 426 |
+
"""
|
| 427 |
+
|
| 428 |
+
with gr.Blocks(theme=theme, css=custom_css,
|
| 429 |
+
title="ArtifactNet — AI Music Forensic Detector") as demo:
|
| 430 |
+
gr.HTML("""
|
| 431 |
+
<div class="demo-nav">
|
| 432 |
+
<a href="https://intrect.io" class="brand">Intrect</a>
|
| 433 |
+
<div style="display:flex;gap:20px;align-items:center;">
|
| 434 |
+
<a href="https://intrect.io">Home</a>
|
| 435 |
+
<a href="https://dash.intrect.io">Dashboard</a>
|
| 436 |
+
<a href="https://intrect.io/#pricing">Pricing</a>
|
| 437 |
+
</div>
|
| 438 |
+
</div>
|
| 439 |
+
""")
|
| 440 |
+
|
| 441 |
+
gr.HTML(f"""
|
| 442 |
+
<div style="text-align:center;padding:16px 0 8px;">
|
| 443 |
+
<h1 style="color:white;font-size:26px;margin:0;letter-spacing:-0.5px;">
|
| 444 |
+
ArtifactNet
|
| 445 |
+
</h1>
|
| 446 |
+
<p style="color:#6e7681;font-size:13px;margin:4px 0 0;">
|
| 447 |
+
AI-Generated Music Detection — ONNX Runtime CPU
|
| 448 |
+
</p>
|
| 449 |
+
<div style="margin:8px auto;max-width:540px;padding:6px 12px;background:rgba(255,165,2,0.12);
|
| 450 |
+
border:1px solid #ffa502;border-radius:8px;font-size:12px;color:#ffa502;">
|
| 451 |
+
Running on CPU — a 4-minute track takes ~30–60 s.
|
| 452 |
+
</div>
|
| 453 |
+
</div>
|
| 454 |
+
""")
|
| 455 |
+
|
| 456 |
+
with gr.Row():
|
| 457 |
+
with gr.Column(scale=1):
|
| 458 |
+
audio_input = gr.Audio(
|
| 459 |
+
label="WAV / MP3 / FLAC (max 100MB, 5 min)",
|
| 460 |
+
type="filepath",
|
| 461 |
+
sources=["upload"],
|
| 462 |
+
)
|
| 463 |
+
analyze_btn = gr.Button("Analyze", variant="primary", size="lg")
|
| 464 |
+
with gr.Column(scale=1):
|
| 465 |
+
verdict_output = gr.HTML(
|
| 466 |
+
value=_verdict_html("No file", {}, False, 0, 0, False),
|
| 467 |
+
label="Verdict",
|
| 468 |
+
)
|
| 469 |
+
with gr.Accordion("Think this result is wrong?", open=False):
|
| 470 |
+
gr.HTML(
|
| 471 |
+
"""<p style="color:#aaa;font-size:12px;margin:4px 0;">
|
| 472 |
+
Help us improve — anonymous feedback.
|
| 473 |
+
</p>"""
|
| 474 |
+
)
|
| 475 |
+
report_reported_as = gr.Radio(
|
| 476 |
+
choices=[
|
| 477 |
+
("It should be AI", "ai"),
|
| 478 |
+
("It should be Real / Human", "real"),
|
| 479 |
+
("Unsure / Mixed", "unsure"),
|
| 480 |
+
],
|
| 481 |
+
label="What do you think it actually is?",
|
| 482 |
+
value="ai",
|
| 483 |
+
)
|
| 484 |
+
report_comment = gr.Textbox(
|
| 485 |
+
label="Optional comment (≤500 chars)",
|
| 486 |
+
placeholder="Any context we should know?",
|
| 487 |
+
max_lines=3,
|
| 488 |
+
lines=2,
|
| 489 |
+
)
|
| 490 |
+
report_submit_btn = gr.Button("🚩 Submit report", variant="secondary", size="sm")
|
| 491 |
+
report_status = gr.HTML(value="", visible=False)
|
| 492 |
+
|
| 493 |
+
with gr.Row():
|
| 494 |
+
spec_output = gr.Plot(label="Spectral Analysis")
|
| 495 |
+
|
| 496 |
+
with gr.Row():
|
| 497 |
+
with gr.Column(scale=2):
|
| 498 |
+
timeline_output = gr.Plot(label="P(AI) Timeline")
|
| 499 |
+
with gr.Column(scale=1):
|
| 500 |
+
radar_output = gr.Plot(label="Forensic Features")
|
| 501 |
+
|
| 502 |
+
with gr.Row():
|
| 503 |
+
bars_output = gr.Plot(label="Feature Strength Analysis")
|
| 504 |
+
|
| 505 |
+
forensic_explanation_output = gr.HTML(visible=False)
|
| 506 |
+
|
| 507 |
+
with gr.Row():
|
| 508 |
+
json_output = gr.File(label="Result JSON", visible=True)
|
| 509 |
+
|
| 510 |
+
with gr.Accordion("About ArtifactNet", open=False):
|
| 511 |
+
gr.HTML(f"""
|
| 512 |
+
<div style="color:#ccc;font-size:13px;line-height:1.6;padding:10px;">
|
| 513 |
+
<h3 style="color:white;">Overview</h3>
|
| 514 |
+
<p>ArtifactNet is a neural forensic detector for AI-generated music.
|
| 515 |
+
It uses HPSS and 7-channel forensic features to detect generation artifacts.</p>
|
| 516 |
+
|
| 517 |
+
<h3 style="color:white;">Pipeline</h3>
|
| 518 |
+
<ol>
|
| 519 |
+
<li>STFT + U-Net artifact residual</li>
|
| 520 |
+
<li>HPSS (harmonic-percussive separation)</li>
|
| 521 |
+
<li>7ch features (mel, H/P ratio, temporal derivatives, spectral flux)</li>
|
| 522 |
+
<li>CNN classifier → per-segment P(AI)</li>
|
| 523 |
+
<li>Median aggregation across segments</li>
|
| 524 |
+
</ol>
|
| 525 |
+
|
| 526 |
+
<h3 style="color:white;">Limitations</h3>
|
| 527 |
+
<ul>
|
| 528 |
+
<li>Short files (<{MIN_CONFIDENT_DURATION}s) have lower confidence</li>
|
| 529 |
+
<li>Mono input reduces accuracy</li>
|
| 530 |
+
<li>Heavily processed audio may affect results</li>
|
| 531 |
+
</ul>
|
| 532 |
+
<p style="color:#888;font-size:11px;margin-top:10px;">
|
| 533 |
+
Research project — interpret alongside other evidence. See
|
| 534 |
+
<a href="https://api.intrect.io/legal/disclaimer" style="color:#6e7681;">Disclaimer</a>.
|
| 535 |
+
</p>
|
| 536 |
+
</div>
|
| 537 |
+
""")
|
| 538 |
+
|
| 539 |
+
analysis_state = gr.State({})
|
| 540 |
+
outputs = [verdict_output, spec_output, timeline_output,
|
| 541 |
+
radar_output, bars_output, forensic_explanation_output,
|
| 542 |
+
json_output, analysis_state]
|
| 543 |
+
|
| 544 |
+
analyze_btn.click(
|
| 545 |
+
fn=analyze_audio,
|
| 546 |
+
inputs=[audio_input],
|
| 547 |
+
outputs=outputs,
|
| 548 |
+
api_name=False,
|
| 549 |
+
concurrency_limit=1,
|
| 550 |
+
concurrency_id="gpu_inference",
|
| 551 |
+
)
|
| 552 |
+
|
| 553 |
+
report_submit_btn.click(
|
| 554 |
+
fn=submit_error_report,
|
| 555 |
+
inputs=[analysis_state, report_reported_as, report_comment],
|
| 556 |
+
outputs=[report_status],
|
| 557 |
+
)
|
| 558 |
+
|
| 559 |
+
gr.HTML("""
|
| 560 |
+
<div style="text-align:center;padding:24px 0 8px;border-top:1px solid #333;margin-top:24px;">
|
| 561 |
+
<p style="color:#484f58;font-size:12px;margin:0;">
|
| 562 |
+
Powered by <a href="https://intrect.io" style="color:#ffa502;text-decoration:none;">Intrect</a>
|
| 563 |
+
| <a href="https://dash.intrect.io" style="color:#6e7681;text-decoration:none;">Dashboard</a>
|
| 564 |
+
| <a href="https://intrect.io/#pricing" style="color:#6e7681;text-decoration:none;">Pricing</a>
|
| 565 |
+
</p>
|
| 566 |
+
<p style="color:#484f58;font-size:11px;margin:6px 0 0;">
|
| 567 |
+
<a href="https://api.intrect.io/legal/terms" style="color:#6e7681;text-decoration:none;">Terms</a>
|
| 568 |
+
· <a href="https://api.intrect.io/legal/privacy" style="color:#6e7681;text-decoration:none;">Privacy</a>
|
| 569 |
+
· <a href="https://api.intrect.io/legal/disclaimer" style="color:#6e7681;text-decoration:none;">Disclaimer</a>
|
| 570 |
+
</p>
|
| 571 |
+
<p style="color:#484f58;font-size:10px;margin:8px 0 0;font-style:italic;">
|
| 572 |
+
ArtifactNet provides forensic indicators, not conclusive legal proof.
|
| 573 |
+
</p>
|
| 574 |
+
</div>
|
| 575 |
+
""")
|
| 576 |
+
|
| 577 |
+
return demo
|
| 578 |
+
|
| 579 |
+
|
| 580 |
+
# ============================================================
|
| 581 |
+
# Entry point
|
| 582 |
+
# ============================================================
|
| 583 |
+
|
| 584 |
+
print("[hf-spaces] downloading ONNX models from HF Hub...", flush=True)
|
| 585 |
+
load_models()
|
| 586 |
+
print("[hf-spaces] models ready (onnxruntime CPU).", flush=True)
|
| 587 |
+
|
| 588 |
+
demo = build_ui()
|
| 589 |
+
demo.queue(max_size=10, default_concurrency_limit=1)
|
| 590 |
+
|
| 591 |
+
|
| 592 |
+
if __name__ == "__main__":
|
| 593 |
+
demo.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
CHANGED
|
@@ -1,30 +1,28 @@
|
|
| 1 |
-
# Purpose: ArtifactNet HF Spaces demo — constants
|
| 2 |
-
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
#
|
| 10 |
-
HF_MODEL_REPO = "intrect/artifactnet-models"
|
| 11 |
-
UNET_ONNX_FILENAME = "
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
# Audio constants
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
#
|
| 28 |
-
|
| 29 |
-
# ============================================================
|
| 30 |
-
E2E_BATCH_SIZE = get_params('batch')
|
|
|
|
| 1 |
+
# Purpose: ArtifactNet HF Spaces demo — constants (HF Hub paths)
|
| 2 |
+
|
| 3 |
+
"""HF Spaces (CPU / onnxruntime) 전용 설정.
|
| 4 |
+
|
| 5 |
+
가중치는 HF Hub 의 intrect/artifactnet-models 에서 .onnx 만 다운로드한다
|
| 6 |
+
(.pt 는 private).
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
# HF Hub model repo
|
| 10 |
+
HF_MODEL_REPO = "intrect/artifactnet-models"
|
| 11 |
+
UNET_ONNX_FILENAME = "production-onnx/unet_codec4.onnx"
|
| 12 |
+
CNN_ONNX_FILENAME = "production-onnx/cnn_v94.onnx"
|
| 13 |
+
|
| 14 |
+
# Audio constants
|
| 15 |
+
SR = 44100
|
| 16 |
+
MAX_DURATION_SEC = 300
|
| 17 |
+
CHUNK_SEC = 4.0
|
| 18 |
+
CHUNK_SAMPLES = int(CHUNK_SEC * SR)
|
| 19 |
+
|
| 20 |
+
# STFT
|
| 21 |
+
N_FFT = 2048
|
| 22 |
+
HOP_LENGTH = 512
|
| 23 |
+
|
| 24 |
+
# CNN batch (CPU 에선 2 vCPU 기준 너무 크면 latency 증가)
|
| 25 |
+
BATCH_SIZE = 4
|
| 26 |
+
|
| 27 |
+
# 60 초 미만 파일은 "Too Short" 경고
|
| 28 |
+
MIN_CONFIDENT_DURATION = 60
|
|
|
|
|
|
core/__init__.py
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
# Proprietary core algorithms (IP protected)
|
| 2 |
-
|
| 3 |
-
"""Core algorithms for ArtifactNet — CONFIDENTIAL."""
|
| 4 |
-
|
| 5 |
-
from .proprietary import compute_stats, classify, get_params
|
| 6 |
-
|
| 7 |
-
__all__ = ['compute_stats', 'classify', 'get_params']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
core/__pycache__/proprietary.cpython-312.pyc
DELETED
|
Binary file (7.03 kB)
|
|
|
core/proprietary.py
DELETED
|
@@ -1,192 +0,0 @@
|
|
| 1 |
-
# CONFIDENTIAL - ArtifactNet Proprietary Algorithms
|
| 2 |
-
# Copyright (c) 2026. All rights reserved.
|
| 3 |
-
# Trade secrets and proprietary algorithms.
|
| 4 |
-
# Reverse engineering, decompilation, or disclosure is strictly prohibited.
|
| 5 |
-
|
| 6 |
-
"""Proprietary core algorithms — IP protected with runtime decryption."""
|
| 7 |
-
|
| 8 |
-
import base64
|
| 9 |
-
import json
|
| 10 |
-
import numpy as np
|
| 11 |
-
|
| 12 |
-
# Encrypted parameters (XOR + Base64) - DO NOT MODIFY
|
| 13 |
-
_ENC_P = 'AR3tX367a8ZODq4dcKFpkRJK8EYD8i6RWAW+GXKxZ9JYUcFLOvVpyFoNrhlkrWvQElDuD2ahfsNIE74PMt4mlxZMvBd8sHnKVh+8Tz31KJpYBb4VcKFpnxtHwUkp82nIWgyuHSE='
|
| 14 |
-
_ENC_T = 'IQ+wFXChe9xIE74dcrZ+3loPsBxp3A=='
|
| 15 |
-
|
| 16 |
-
# Key fragments (obfuscated distribution)
|
| 17 |
-
_K1 = [122, 63]
|
| 18 |
-
_K2 = [158, 45, 92]
|
| 19 |
-
_K3 = [129, 75, 242]
|
| 20 |
-
_K = _K1 + _K2 + _K3
|
| 21 |
-
|
| 22 |
-
# Decryption cache (computed once)
|
| 23 |
-
_cache = {}
|
| 24 |
-
|
| 25 |
-
# Obfuscated constants (decoys)
|
| 26 |
-
_MAGIC_A = 0x1F3D5A7B
|
| 27 |
-
_MAGIC_B = 0x9C8E2F41
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def _d(s, k):
|
| 31 |
-
"""Obfuscated decryption routine with anti-tampering."""
|
| 32 |
-
if s in _cache:
|
| 33 |
-
return _cache[s]
|
| 34 |
-
|
| 35 |
-
# Anti-tampering check (dummy operation)
|
| 36 |
-
if not _verify():
|
| 37 |
-
k = [x ^ 0xFF for x in k] # Corrupt key if tampered
|
| 38 |
-
|
| 39 |
-
try:
|
| 40 |
-
# Decode base64
|
| 41 |
-
b = base64.b64decode(s.encode('utf-8'))
|
| 42 |
-
r = bytearray()
|
| 43 |
-
|
| 44 |
-
# XOR decryption with key rotation
|
| 45 |
-
for i, x in enumerate(b):
|
| 46 |
-
# Obfuscated XOR (adds dummy operations)
|
| 47 |
-
decrypted_byte = x ^ k[i % len(k)]
|
| 48 |
-
# Dummy operation (no effect)
|
| 49 |
-
if i % 17 == 0:
|
| 50 |
-
decrypted_byte = (decrypted_byte ^ 0x00) & 0xFF
|
| 51 |
-
r.append(decrypted_byte)
|
| 52 |
-
|
| 53 |
-
# Parse JSON
|
| 54 |
-
v = json.loads(r.decode('utf-8'))
|
| 55 |
-
_cache[s] = v
|
| 56 |
-
return v
|
| 57 |
-
except Exception:
|
| 58 |
-
# Fallback to prevent crashes
|
| 59 |
-
return {} if isinstance(s, str) and len(s) > 50 else []
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
def get_params(key: str = None):
|
| 63 |
-
"""Get proprietary parameters (encrypted at rest, decrypted at runtime)."""
|
| 64 |
-
p = _d(_ENC_P, _K)
|
| 65 |
-
if key:
|
| 66 |
-
return p.get(key)
|
| 67 |
-
return p.copy()
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
def compute_stats(chunk_probs: list[float]) -> dict:
|
| 71 |
-
"""Proprietary distribution statistics computation.
|
| 72 |
-
|
| 73 |
-
Algorithm obfuscated with control flow complexity and encrypted thresholds.
|
| 74 |
-
"""
|
| 75 |
-
arr = np.array(chunk_probs)
|
| 76 |
-
n = len(arr)
|
| 77 |
-
|
| 78 |
-
# Handle edge case: empty array (very short audio)
|
| 79 |
-
if n == 0:
|
| 80 |
-
return {
|
| 81 |
-
"n": 0,
|
| 82 |
-
"mean": 0.5,
|
| 83 |
-
"median": 0.5,
|
| 84 |
-
"q25": 0.5,
|
| 85 |
-
"q75": 0.5,
|
| 86 |
-
"iqr": 0.0,
|
| 87 |
-
"std": 0.0,
|
| 88 |
-
"pct_high": 0.0,
|
| 89 |
-
"pct_above_50": 0.0,
|
| 90 |
-
"pct_low": 0.0,
|
| 91 |
-
"n_high": 0,
|
| 92 |
-
"n_mid": 0,
|
| 93 |
-
"n_low": 0,
|
| 94 |
-
}
|
| 95 |
-
|
| 96 |
-
# Obfuscated percentile calculation
|
| 97 |
-
q = np.percentile(arr, [25, 50, 75])
|
| 98 |
-
q25, q50, q75 = q[0], q[1], q[2]
|
| 99 |
-
|
| 100 |
-
# Decrypt thresholds (runtime decryption)
|
| 101 |
-
t = _d(_ENC_T, _K)
|
| 102 |
-
|
| 103 |
-
# Obfuscated threshold comparisons with dummy operations
|
| 104 |
-
mask_h = _h1(arr, t[0])
|
| 105 |
-
mask_m = _h2(arr, 0.5, t[0])
|
| 106 |
-
mask_l = arr < 0.5
|
| 107 |
-
mask_low = arr < t[1]
|
| 108 |
-
|
| 109 |
-
# Dummy computation (no effect, increases complexity)
|
| 110 |
-
_dummy = _calibrate_threshold(0.5, offset=0.1) if n > 5 else 0.5
|
| 111 |
-
|
| 112 |
-
# Statistical aggregation (obfuscated)
|
| 113 |
-
return {
|
| 114 |
-
"n": n,
|
| 115 |
-
"mean": float(np.nan_to_num(np.mean(arr), nan=0.5)),
|
| 116 |
-
"median": float(np.nan_to_num(q50, nan=0.5)),
|
| 117 |
-
"q25": float(np.nan_to_num(q25, nan=0.5)),
|
| 118 |
-
"q75": float(np.nan_to_num(q75, nan=0.5)),
|
| 119 |
-
"iqr": float(np.nan_to_num(q75 - q25, nan=0.0)),
|
| 120 |
-
"std": float(np.nan_to_num(np.std(arr), nan=0.0)),
|
| 121 |
-
"pct_high": float(mask_h.sum() / n) if n > 0 else 0.0,
|
| 122 |
-
"pct_above_50": float((arr >= 0.5).sum() / n) if n > 0 else 0.0,
|
| 123 |
-
"pct_low": float(mask_low.sum() / n) if n > 0 else 0.0,
|
| 124 |
-
"n_high": int(mask_h.sum()),
|
| 125 |
-
"n_mid": int(mask_m.sum()),
|
| 126 |
-
"n_low": int(mask_l.sum()),
|
| 127 |
-
}
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
def classify(stats: dict) -> str:
|
| 131 |
-
"""3-Tier distribution-based verdict (v8.0).
|
| 132 |
-
|
| 133 |
-
Encrypted threshold-based classification using segment distribution statistics.
|
| 134 |
-
"""
|
| 135 |
-
t = _d(_ENC_T, _K)
|
| 136 |
-
ph = stats["pct_high"]
|
| 137 |
-
pa = stats["pct_above_50"]
|
| 138 |
-
|
| 139 |
-
if _verify() and (ph + pa) >= 0:
|
| 140 |
-
if ph >= t[2]:
|
| 141 |
-
return "AI Generated"
|
| 142 |
-
elif pa < t[3]:
|
| 143 |
-
return "Human-Made"
|
| 144 |
-
else:
|
| 145 |
-
return "Uncertain"
|
| 146 |
-
else:
|
| 147 |
-
return "Error"
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
# Anti-tampering check (dummy function to increase complexity)
|
| 151 |
-
def _verify():
|
| 152 |
-
"""Integrity verification (obfuscated)."""
|
| 153 |
-
# XOR checksum (122 ^ 242 = 136)
|
| 154 |
-
return len(_K) == 8 and _K[0] ^ _K[-1] == 136
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
# Dummy decoy functions (increase reverse engineering cost)
|
| 158 |
-
def _calibrate_threshold(x, offset=0.0):
|
| 159 |
-
"""Decoy function - not used in actual algorithm."""
|
| 160 |
-
return x + offset * 0.01
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
def _normalize_distribution(arr):
|
| 164 |
-
"""Decoy function - not used in actual algorithm."""
|
| 165 |
-
return (arr - arr.min()) / (arr.max() - arr.min() + 1e-10)
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
def _apply_smoothing(probs, window=3):
|
| 169 |
-
"""Decoy function - not used in actual algorithm."""
|
| 170 |
-
if len(probs) < window:
|
| 171 |
-
return probs
|
| 172 |
-
return [sum(probs[max(0, i - window // 2):i + window // 2 + 1]) / window
|
| 173 |
-
for i in range(len(probs))]
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
# Obfuscated helpers (used internally)
|
| 177 |
-
def _h1(v, t):
|
| 178 |
-
"""Helper 1 (obfuscated name) - threshold comparison."""
|
| 179 |
-
return v >= t
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
def _h2(v, lo, hi):
|
| 183 |
-
"""Helper 2 (obfuscated name) - range check."""
|
| 184 |
-
return (v >= lo) & (v < hi)
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
# Memory protection: clear key fragments on module unload (Python limitation)
|
| 188 |
-
def _cleanup():
|
| 189 |
-
"""Clear sensitive data from memory (best effort)."""
|
| 190 |
-
global _K, _K1, _K2, _K3, _cache
|
| 191 |
-
_cache.clear()
|
| 192 |
-
# Note: Python doesn't guarantee memory erasure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docker-compose.youtube-proxy.yml
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
version: '3.9'
|
| 2 |
-
|
| 3 |
-
services:
|
| 4 |
-
youtube-proxy:
|
| 5 |
-
build:
|
| 6 |
-
context: .
|
| 7 |
-
dockerfile: Dockerfile.youtube-proxy
|
| 8 |
-
image: artifactnet-youtube-proxy:latest
|
| 9 |
-
container_name: artifactnet-youtube-proxy
|
| 10 |
-
restart: unless-stopped
|
| 11 |
-
environment:
|
| 12 |
-
- HOST=0.0.0.0
|
| 13 |
-
- PORT=8765
|
| 14 |
-
- LOG_LEVEL=INFO
|
| 15 |
-
- YOUTUBE_PROXY_API_KEY=${YOUTUBE_PROXY_API_KEY:-c60ba3dc9f26cfc700958983f82b997eac084743aad9f5be4db7bb625ae6dbbd}
|
| 16 |
-
ports:
|
| 17 |
-
- "0.0.0.0:8765:8765" # Accessible to cloudflared tunnel
|
| 18 |
-
healthcheck:
|
| 19 |
-
test: ["CMD", "python", "-c", "import requests; requests.get('http://localhost:8765/health')"]
|
| 20 |
-
interval: 30s
|
| 21 |
-
timeout: 10s
|
| 22 |
-
retries: 3
|
| 23 |
-
start_period: 5s
|
| 24 |
-
networks:
|
| 25 |
-
- default
|
| 26 |
-
security_opt:
|
| 27 |
-
- no-new-privileges:true
|
| 28 |
-
cap_drop:
|
| 29 |
-
- ALL
|
| 30 |
-
cap_add:
|
| 31 |
-
- NET_BIND_SERVICE
|
| 32 |
-
|
| 33 |
-
networks:
|
| 34 |
-
default:
|
| 35 |
-
name: artifactnet-network
|
| 36 |
-
driver: bridge
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inference/audio_utils.py
CHANGED
|
@@ -1,54 +1,140 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Purpose: Audio load/resample/sliding-chunk utilities for HF Spaces
|
| 2 |
+
# Dependencies: soundfile, torch, numpy
|
| 3 |
+
|
| 4 |
+
"""HF Space 전용 — demo/ 나 vendor/ 의존성 없음.
|
| 5 |
+
|
| 6 |
+
- load_audio: soundfile 우선, 실패시 ffmpeg WAV 변환 fallback
|
| 7 |
+
- sliding_chunks: production infer.py::_sliding_chunks 와 동일한 규칙
|
| 8 |
+
· stride=CHUNK_SAMPLES (4s)
|
| 9 |
+
· 꼬리 chunk 는 actual_ratio >= 0.5 일 때만 유지
|
| 10 |
+
· 최소 1 chunk 보장 (짧은 곡도 padding)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import subprocess
|
| 16 |
+
import tempfile
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
import numpy as np
|
| 20 |
+
import soundfile as sf
|
| 21 |
+
import torch
|
| 22 |
+
import torch.nn.functional as F
|
| 23 |
+
|
| 24 |
+
from config import SR, MAX_DURATION_SEC, CHUNK_SAMPLES
|
| 25 |
+
|
| 26 |
+
_NEEDS_FFMPEG = {".m4a", ".aac", ".wma", ".opus", ".mp4", ".webm"}
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _ffmpeg_to_wav(path: str) -> str | None:
|
| 30 |
+
tmp = tempfile.mktemp(suffix=".wav")
|
| 31 |
+
try:
|
| 32 |
+
r = subprocess.run(
|
| 33 |
+
["ffmpeg", "-hide_banner", "-loglevel", "error",
|
| 34 |
+
"-i", str(path), "-f", "wav", "-acodec", "pcm_f32le",
|
| 35 |
+
"-ac", "2", "-ar", str(SR), "-t", str(MAX_DURATION_SEC),
|
| 36 |
+
"-y", tmp],
|
| 37 |
+
capture_output=True, timeout=30,
|
| 38 |
+
)
|
| 39 |
+
return tmp if r.returncode == 0 else None
|
| 40 |
+
except Exception:
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def load_audio(path: str) -> tuple[np.ndarray, bool]:
|
| 45 |
+
"""Return (audio[samples, channels] float32, is_stereo)."""
|
| 46 |
+
ext = Path(path).suffix.lower()
|
| 47 |
+
converted = None
|
| 48 |
+
if ext in _NEEDS_FFMPEG:
|
| 49 |
+
converted = _ffmpeg_to_wav(path)
|
| 50 |
+
if converted is None:
|
| 51 |
+
raise RuntimeError(f"Failed to convert {ext} via ffmpeg")
|
| 52 |
+
path = converted
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
audio, sr = sf.read(str(path), dtype="float32", always_2d=True)
|
| 56 |
+
|
| 57 |
+
if sr != SR:
|
| 58 |
+
try:
|
| 59 |
+
import torchaudio
|
| 60 |
+
t = torch.from_numpy(audio.T)
|
| 61 |
+
resampler = torchaudio.transforms.Resample(sr, SR)
|
| 62 |
+
audio = resampler(t).T.numpy()
|
| 63 |
+
except Exception:
|
| 64 |
+
# scipy fallback (linear) — 품질 낮지만 crash 방지
|
| 65 |
+
from scipy.signal import resample_poly
|
| 66 |
+
up, down = SR, sr
|
| 67 |
+
audio = np.stack([
|
| 68 |
+
resample_poly(audio[:, c], up, down)
|
| 69 |
+
for c in range(audio.shape[1])
|
| 70 |
+
], axis=1).astype(np.float32)
|
| 71 |
+
|
| 72 |
+
max_samples = MAX_DURATION_SEC * SR
|
| 73 |
+
if len(audio) > max_samples:
|
| 74 |
+
audio = audio[:max_samples]
|
| 75 |
+
|
| 76 |
+
is_stereo = audio.shape[1] >= 2
|
| 77 |
+
return audio.astype(np.float32), is_stereo
|
| 78 |
+
finally:
|
| 79 |
+
if converted:
|
| 80 |
+
Path(converted).unlink(missing_ok=True)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def load_audio_mono_tensor(path: str) -> tuple[torch.Tensor, np.ndarray, bool]:
|
| 84 |
+
audio, is_stereo = load_audio(path)
|
| 85 |
+
if is_stereo and audio.shape[1] >= 2:
|
| 86 |
+
mono = (audio[:, 0] + audio[:, 1]) / 2.0
|
| 87 |
+
else:
|
| 88 |
+
mono = audio[:, 0]
|
| 89 |
+
return torch.from_numpy(mono), audio, is_stereo
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def sliding_chunks(wav: torch.Tensor, chunk_size: int = CHUNK_SAMPLES,
|
| 93 |
+
min_actual_ratio: float = 0.5) -> list[tuple[torch.Tensor, dict]]:
|
| 94 |
+
"""production 과 동일 규칙으로 곡 전체를 4s stride 로 sliding.
|
| 95 |
+
|
| 96 |
+
���환: [(chunk_tensor, metadata), ...] — metadata = start_sample, actual_samples, actual_ratio, rms
|
| 97 |
+
"""
|
| 98 |
+
n = wav.shape[0]
|
| 99 |
+
chunks: list[tuple[torch.Tensor, dict]] = []
|
| 100 |
+
if n < chunk_size // 2:
|
| 101 |
+
# 2초 미만 — 빈 결과 (호출측에서 "Too Short" 처리)
|
| 102 |
+
return chunks
|
| 103 |
+
|
| 104 |
+
for start in range(0, n, chunk_size):
|
| 105 |
+
c = wav[start:start + chunk_size]
|
| 106 |
+
actual = c.shape[0]
|
| 107 |
+
actual_ratio = actual / chunk_size
|
| 108 |
+
if actual_ratio < min_actual_ratio:
|
| 109 |
+
continue
|
| 110 |
+
if actual < chunk_size:
|
| 111 |
+
c = F.pad(c, (0, chunk_size - actual))
|
| 112 |
+
rms = float(torch.sqrt(torch.mean(c ** 2)))
|
| 113 |
+
chunks.append((c, {
|
| 114 |
+
"start_sample": int(start),
|
| 115 |
+
"actual_samples": int(actual),
|
| 116 |
+
"actual_ratio": float(actual_ratio),
|
| 117 |
+
"rms": rms,
|
| 118 |
+
}))
|
| 119 |
+
|
| 120 |
+
if not chunks:
|
| 121 |
+
# 2~4 초 곡 — 1 chunk 는 padding 해서 보장
|
| 122 |
+
c = wav[:chunk_size]
|
| 123 |
+
c = F.pad(c, (0, chunk_size - c.shape[0]))
|
| 124 |
+
chunks.append((c, {
|
| 125 |
+
"start_sample": 0,
|
| 126 |
+
"actual_samples": int(n),
|
| 127 |
+
"actual_ratio": float(n / chunk_size),
|
| 128 |
+
"rms": float(torch.sqrt(torch.mean(c ** 2))),
|
| 129 |
+
}))
|
| 130 |
+
return chunks
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
def get_audio_info(audio: np.ndarray, is_stereo: bool) -> dict:
|
| 134 |
+
duration = len(audio) / SR
|
| 135 |
+
return {
|
| 136 |
+
"duration": duration,
|
| 137 |
+
"sr": SR,
|
| 138 |
+
"channels": "Stereo" if is_stereo else "Mono",
|
| 139 |
+
"samples": len(audio),
|
| 140 |
+
}
|
inference/e2e_model.py
CHANGED
|
@@ -1,49 +1,293 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Purpose: ArtifactNet 7ch inference pipeline — HF Spaces (CPU, ONNX Runtime)
|
| 2 |
+
# Dependencies: onnxruntime, torch (HPSS/Mel only), huggingface_hub, scipy
|
| 3 |
+
|
| 4 |
+
"""ArtifactNet v9.4 inference — onnxruntime CPU.
|
| 5 |
+
|
| 6 |
+
UNet + CNN 은 .onnx (public-safe) 로 실행, HPSS + Mel + 7ch feature 는
|
| 7 |
+
pytorch CPU 로 처리 (가중치 없는 고정 연산이라 노출 위험 없음).
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
import numpy as np
|
| 14 |
+
import onnxruntime as ort
|
| 15 |
+
import torch
|
| 16 |
+
from huggingface_hub import hf_hub_download
|
| 17 |
+
from scipy import stats as sp_stats
|
| 18 |
+
|
| 19 |
+
from config import (
|
| 20 |
+
HF_MODEL_REPO, UNET_ONNX_FILENAME, CNN_ONNX_FILENAME,
|
| 21 |
+
SR, N_FFT, HOP_LENGTH, CHUNK_SAMPLES, BATCH_SIZE,
|
| 22 |
+
)
|
| 23 |
+
from .audio_utils import sliding_chunks
|
| 24 |
+
from .model import (
|
| 25 |
+
DifferentiableMel, hpss_gpu_pure, compute_forensic_features_7ch,
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
N_MELS = 128
|
| 29 |
+
|
| 30 |
+
FREQ_BANDS = [
|
| 31 |
+
("sub", 0, 250),
|
| 32 |
+
("low", 250, 2000),
|
| 33 |
+
("mid", 2000, 6000),
|
| 34 |
+
("hi_mid", 6000, 10000),
|
| 35 |
+
("hi", 10000, 16000),
|
| 36 |
+
("air", 16000, 22050),
|
| 37 |
+
]
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# ============================================================
|
| 41 |
+
# Lazy singletons
|
| 42 |
+
# ============================================================
|
| 43 |
+
|
| 44 |
+
_unet_sess: ort.InferenceSession | None = None
|
| 45 |
+
_cnn_sess: ort.InferenceSession | None = None
|
| 46 |
+
_mel: DifferentiableMel | None = None
|
| 47 |
+
_stft_window: torch.Tensor | None = None
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _ort_threads() -> int:
|
| 51 |
+
"""HF Spaces CPU basic = 2 vCPU. 환경변수로 override 가능."""
|
| 52 |
+
try:
|
| 53 |
+
return int(os.environ.get("ORT_THREADS", "2"))
|
| 54 |
+
except ValueError:
|
| 55 |
+
return 2
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _resolve_onnx(filename: str, env_var: str) -> str:
|
| 59 |
+
"""로컬 override (ARTIFACTNET_UNET_ONNX / _CNN_ONNX) 있으면 그걸 사용, 아니면 HF Hub."""
|
| 60 |
+
local = os.environ.get(env_var)
|
| 61 |
+
if local and Path(local).is_file():
|
| 62 |
+
return local
|
| 63 |
+
return hf_hub_download(HF_MODEL_REPO, filename)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def load_models():
|
| 67 |
+
"""ONNX 세션 + Mel/Window 초기화 (import 후 1회)."""
|
| 68 |
+
global _unet_sess, _cnn_sess, _mel, _stft_window
|
| 69 |
+
if _unet_sess is not None:
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
unet_path = _resolve_onnx(UNET_ONNX_FILENAME, "ARTIFACTNET_UNET_ONNX")
|
| 73 |
+
cnn_path = _resolve_onnx(CNN_ONNX_FILENAME, "ARTIFACTNET_CNN_ONNX")
|
| 74 |
+
|
| 75 |
+
opts = ort.SessionOptions()
|
| 76 |
+
opts.intra_op_num_threads = _ort_threads()
|
| 77 |
+
opts.inter_op_num_threads = 1
|
| 78 |
+
opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| 79 |
+
|
| 80 |
+
_unet_sess = ort.InferenceSession(unet_path, sess_options=opts,
|
| 81 |
+
providers=["CPUExecutionProvider"])
|
| 82 |
+
_cnn_sess = ort.InferenceSession(cnn_path, sess_options=opts,
|
| 83 |
+
providers=["CPUExecutionProvider"])
|
| 84 |
+
|
| 85 |
+
_mel = DifferentiableMel(sr=SR, n_fft=N_FFT, n_mels=N_MELS)
|
| 86 |
+
_mel.eval()
|
| 87 |
+
_stft_window = torch.hann_window(N_FFT)
|
| 88 |
+
|
| 89 |
+
print(f"[hf-spaces] ONNX sessions ready (intra_threads={_ort_threads()})", flush=True)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
# ============================================================
|
| 93 |
+
# Feature extraction helpers (75-dim Router + 28-dim Verdict)
|
| 94 |
+
# ============================================================
|
| 95 |
+
|
| 96 |
+
def _extract_router_verdict_features(
|
| 97 |
+
all_mag, all_res, all_H, all_P, all_mask, all_mel_res, probs,
|
| 98 |
+
):
|
| 99 |
+
"""infer.py extract_features()와 동일한 로직 (device=CPU)."""
|
| 100 |
+
freq_hz = torch.linspace(0, SR / 2, all_mag.shape[2])
|
| 101 |
+
orig_total = all_mag.pow(2).mean().item() + 1e-8
|
| 102 |
+
res_total = all_res.pow(2).mean().item() + 1e-8
|
| 103 |
+
|
| 104 |
+
band_idx = []
|
| 105 |
+
for _, flo, fhi in FREQ_BANDS:
|
| 106 |
+
lo = (freq_hz >= flo).nonzero(as_tuple=True)[0]
|
| 107 |
+
hi = (freq_hz >= fhi).nonzero(as_tuple=True)[0]
|
| 108 |
+
band_idx.append((
|
| 109 |
+
lo[0].item() if len(lo) else 0,
|
| 110 |
+
hi[0].item() if len(hi) else all_mag.shape[2],
|
| 111 |
+
))
|
| 112 |
+
|
| 113 |
+
rf = []
|
| 114 |
+
for i0, i1 in band_idx:
|
| 115 |
+
oe = all_mag[:, :, i0:i1, :].pow(2).mean().item() / orig_total
|
| 116 |
+
re = all_res[:, :, i0:i1, :].pow(2).mean().item() / res_total
|
| 117 |
+
rf.extend([oe, re, re / (oe + 1e-8)])
|
| 118 |
+
|
| 119 |
+
mel_profile = all_mel_res.mean(dim=[0, 3]).squeeze().cpu().numpy()
|
| 120 |
+
step = N_MELS // 32
|
| 121 |
+
compressed = mel_profile[:32 * step].reshape(32, step).mean(axis=1)
|
| 122 |
+
compressed = compressed - compressed.mean()
|
| 123 |
+
norm = np.abs(compressed).max() + 1e-8
|
| 124 |
+
rf.extend((compressed / norm).tolist())
|
| 125 |
+
|
| 126 |
+
H_total = all_H.pow(2).mean().item() + 1e-8
|
| 127 |
+
P_total = all_P.pow(2).mean().item() + 1e-8
|
| 128 |
+
hp_ratio = H_total / (H_total + P_total)
|
| 129 |
+
rf.append(hp_ratio)
|
| 130 |
+
|
| 131 |
+
for i0, i1 in band_idx:
|
| 132 |
+
rf.extend([
|
| 133 |
+
all_H[:, :, i0:i1, :].pow(2).mean().item() / H_total,
|
| 134 |
+
all_P[:, :, i0:i1, :].pow(2).mean().item() / P_total,
|
| 135 |
+
])
|
| 136 |
+
|
| 137 |
+
mask_np = all_mask.cpu().numpy().flatten()
|
| 138 |
+
rf.extend([
|
| 139 |
+
float(mask_np.mean()), float(mask_np.std()),
|
| 140 |
+
float(np.percentile(mask_np, 10)), float(np.percentile(mask_np, 25)),
|
| 141 |
+
float(np.percentile(mask_np, 75)), float(np.percentile(mask_np, 90)),
|
| 142 |
+
float(np.median(mask_np)),
|
| 143 |
+
])
|
| 144 |
+
|
| 145 |
+
rf.extend([
|
| 146 |
+
float(probs.mean()), float(probs.std()), float(np.median(probs)),
|
| 147 |
+
float(np.percentile(probs, 10)), float(np.percentile(probs, 90)),
|
| 148 |
+
])
|
| 149 |
+
|
| 150 |
+
router_feat = np.nan_to_num(np.array(rf, dtype=np.float32))
|
| 151 |
+
|
| 152 |
+
arr = probs.astype(np.float64)
|
| 153 |
+
n = len(arr)
|
| 154 |
+
cnn_20 = np.array([
|
| 155 |
+
n, arr.mean(), arr.std(), np.median(arr),
|
| 156 |
+
arr.min(), arr.max(), arr.max() - arr.min(),
|
| 157 |
+
np.percentile(arr, 10), np.percentile(arr, 25),
|
| 158 |
+
np.percentile(arr, 75), np.percentile(arr, 90),
|
| 159 |
+
(arr >= 0.3).mean(), (arr >= 0.5).mean(),
|
| 160 |
+
(arr >= 0.7).mean(), (arr >= 0.8).mean(), (arr >= 0.9).mean(),
|
| 161 |
+
float(sp_stats.skew(arr)) if n >= 3 else 0.0,
|
| 162 |
+
float(sp_stats.kurtosis(arr)) if n >= 3 else 0.0,
|
| 163 |
+
float(np.diff(arr).std()) if n >= 2 else 0.0,
|
| 164 |
+
float(np.abs(np.diff(arr)).max()) if n >= 2 else 0.0,
|
| 165 |
+
], dtype=np.float32)
|
| 166 |
+
|
| 167 |
+
hf8k_i = (freq_hz >= 8000).nonzero(as_tuple=True)[0]
|
| 168 |
+
hf8k_i = hf8k_i[0].item() if len(hf8k_i) else all_mag.shape[2]
|
| 169 |
+
ai0, ai1 = band_idx[5]
|
| 170 |
+
|
| 171 |
+
res_8 = np.array([
|
| 172 |
+
all_res[:, :, hf8k_i:, :].pow(2).mean().item() / res_total,
|
| 173 |
+
all_res[:, :, ai0:ai1, :].pow(2).mean().item() / res_total,
|
| 174 |
+
all_H[:, :, ai0:ai1, :].pow(2).mean().item() / H_total,
|
| 175 |
+
all_P[:, :, ai0:ai1, :].pow(2).mean().item() / P_total,
|
| 176 |
+
float(mel_profile[-1]),
|
| 177 |
+
float(mel_profile[0]),
|
| 178 |
+
float(mask_np.mean()),
|
| 179 |
+
float(hp_ratio),
|
| 180 |
+
], dtype=np.float32)
|
| 181 |
+
|
| 182 |
+
verdict_feat = np.nan_to_num(np.concatenate([cnn_20, res_8]))
|
| 183 |
+
return router_feat, verdict_feat
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
# ============================================================
|
| 187 |
+
# Inference
|
| 188 |
+
# ============================================================
|
| 189 |
+
|
| 190 |
+
@torch.no_grad()
|
| 191 |
+
def run_e2e_inference(wav_mono_tensor: torch.Tensor):
|
| 192 |
+
"""mono waveform -> (probs, placeholder, metadata, forensic_stats, router_feat, verdict_feat).
|
| 193 |
+
|
| 194 |
+
ONNX Runtime CPU + pytorch HPSS/Mel.
|
| 195 |
+
"""
|
| 196 |
+
if _unet_sess is None:
|
| 197 |
+
load_models()
|
| 198 |
+
|
| 199 |
+
chunk_data = sliding_chunks(wav_mono_tensor, CHUNK_SAMPLES)
|
| 200 |
+
if not chunk_data:
|
| 201 |
+
return [], torch.zeros_like(wav_mono_tensor), [], {}, \
|
| 202 |
+
np.zeros(75, dtype=np.float32), np.zeros(28, dtype=np.float32)
|
| 203 |
+
chunks = [chunk for chunk, _ in chunk_data]
|
| 204 |
+
metadata_list = [meta for _, meta in chunk_data]
|
| 205 |
+
|
| 206 |
+
probs = []
|
| 207 |
+
all_features = []
|
| 208 |
+
all_mag_list, all_res_list, all_H_list, all_P_list = [], [], [], []
|
| 209 |
+
all_mask_list, all_mel_res_list = [], []
|
| 210 |
+
|
| 211 |
+
for i in range(0, len(chunks), BATCH_SIZE):
|
| 212 |
+
batch = torch.stack(chunks[i:i + BATCH_SIZE]) # (B, CHUNK_SAMPLES)
|
| 213 |
+
|
| 214 |
+
# STFT (torch, CPU)
|
| 215 |
+
stft = torch.stft(
|
| 216 |
+
batch, N_FFT, HOP_LENGTH,
|
| 217 |
+
window=_stft_window, return_complex=True)
|
| 218 |
+
stft_mag = stft.abs().unsqueeze(1) # (B, 1, F, T)
|
| 219 |
+
|
| 220 |
+
# UNet mask via ONNX
|
| 221 |
+
mask_np = _unet_sess.run(
|
| 222 |
+
["mask"],
|
| 223 |
+
{"stft_mag": stft_mag.numpy().astype(np.float32)},
|
| 224 |
+
)[0]
|
| 225 |
+
mask = torch.from_numpy(mask_np)
|
| 226 |
+
res_mag = mask * stft_mag
|
| 227 |
+
|
| 228 |
+
# HPSS — CPU median filter (unfold + median) 로 학습 분포 유지.
|
| 229 |
+
# librosa.decompose.hpss 는 결과가 달라 v9.4 CNN 오판 (CLAUDE.md 경고 참조).
|
| 230 |
+
H_mag, P_mag = hpss_gpu_pure(res_mag)
|
| 231 |
+
|
| 232 |
+
# Mel 3-band
|
| 233 |
+
mel_res = _mel(res_mag)
|
| 234 |
+
mel_H = _mel(H_mag)
|
| 235 |
+
mel_P = _mel(P_mag)
|
| 236 |
+
|
| 237 |
+
features_7ch = compute_forensic_features_7ch(mel_res, mel_H, mel_P)
|
| 238 |
+
all_features.append(features_7ch)
|
| 239 |
+
|
| 240 |
+
# CNN logit via ONNX → sigmoid
|
| 241 |
+
logits = _cnn_sess.run(
|
| 242 |
+
["logit"],
|
| 243 |
+
{"features_7ch": features_7ch.numpy().astype(np.float32)},
|
| 244 |
+
)[0]
|
| 245 |
+
batch_probs = (1.0 / (1.0 + np.exp(-np.clip(logits, -30, 30)))).tolist()
|
| 246 |
+
probs.extend(batch_probs)
|
| 247 |
+
|
| 248 |
+
all_mag_list.append(stft_mag)
|
| 249 |
+
all_res_list.append(res_mag)
|
| 250 |
+
all_H_list.append(H_mag)
|
| 251 |
+
all_P_list.append(P_mag)
|
| 252 |
+
all_mask_list.append(mask)
|
| 253 |
+
all_mel_res_list.append(mel_res)
|
| 254 |
+
|
| 255 |
+
if all_features:
|
| 256 |
+
all_feat_tensor = torch.cat(all_features, dim=0)
|
| 257 |
+
channel_means = all_feat_tensor.mean(dim=[2, 3])
|
| 258 |
+
feature_medians = channel_means.median(dim=0).values
|
| 259 |
+
feat_min = channel_means.min(dim=0).values
|
| 260 |
+
feat_max = channel_means.max(dim=0).values
|
| 261 |
+
feat_range = feat_max - feat_min + 1e-8
|
| 262 |
+
normalized = ((feature_medians - feat_min) / feat_range).clamp(0, 1)
|
| 263 |
+
|
| 264 |
+
forensic_stats = {
|
| 265 |
+
"residual_energy": float(normalized[0]),
|
| 266 |
+
"harmonic_strength": float(normalized[1]),
|
| 267 |
+
"percussive_strength": float(normalized[2]),
|
| 268 |
+
"temporal_delta": float(normalized[3]),
|
| 269 |
+
"temporal_accel": float(normalized[4]),
|
| 270 |
+
"hp_ratio": float(normalized[5]),
|
| 271 |
+
"spectral_flux": float(normalized[6]),
|
| 272 |
+
}
|
| 273 |
+
else:
|
| 274 |
+
forensic_stats = {}
|
| 275 |
+
|
| 276 |
+
probs_arr = np.array(probs, dtype=np.float32)
|
| 277 |
+
if all_mag_list:
|
| 278 |
+
all_mag = torch.cat(all_mag_list, dim=0)
|
| 279 |
+
all_res = torch.cat(all_res_list, dim=0)
|
| 280 |
+
all_H = torch.cat(all_H_list, dim=0)
|
| 281 |
+
all_P = torch.cat(all_P_list, dim=0)
|
| 282 |
+
all_mask = torch.cat(all_mask_list, dim=0)
|
| 283 |
+
all_mel_res = torch.cat(all_mel_res_list, dim=0)
|
| 284 |
+
|
| 285 |
+
router_feat, verdict_feat = _extract_router_verdict_features(
|
| 286 |
+
all_mag, all_res, all_H, all_P, all_mask, all_mel_res, probs_arr,
|
| 287 |
+
)
|
| 288 |
+
else:
|
| 289 |
+
router_feat = np.zeros(75, dtype=np.float32)
|
| 290 |
+
verdict_feat = np.zeros(28, dtype=np.float32)
|
| 291 |
+
|
| 292 |
+
residual_placeholder = torch.zeros_like(wav_mono_tensor)
|
| 293 |
+
return probs, residual_placeholder, metadata_list, forensic_stats, router_feat, verdict_feat
|
inference/model.py
ADDED
|
@@ -0,0 +1,398 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created: 2026-03-03
|
| 2 |
+
# Purpose: ArtifactNet 7ch Forensic CNN 아키텍처 (PyTorch)
|
| 3 |
+
# Dependencies: torch, numpy
|
| 4 |
+
|
| 5 |
+
"""ArtifactNet model architecture — ArtifactUNet + 7ch Forensic CNN.
|
| 6 |
+
|
| 7 |
+
v9.0: PyTorch 7ch pipeline (replaces ONNX v8.0).
|
| 8 |
+
GPU required for HPSS median filtering.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
import torch
|
| 13 |
+
import torch.nn as nn
|
| 14 |
+
import torch.nn.functional as F
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
SR = 44100
|
| 18 |
+
N_FFT = 2048
|
| 19 |
+
HOP_LENGTH = 512
|
| 20 |
+
N_MELS = 128
|
| 21 |
+
FREQ_BINS = N_FFT // 2 + 1 # 1025
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# ============================================================
|
| 25 |
+
# GatedResidualBlock
|
| 26 |
+
# ============================================================
|
| 27 |
+
|
| 28 |
+
class GatedResidualBlock(nn.Module):
|
| 29 |
+
"""GLU bottleneck with dilated convolution."""
|
| 30 |
+
|
| 31 |
+
def __init__(self, channels, dilation=1):
|
| 32 |
+
super().__init__()
|
| 33 |
+
mid = channels // 2
|
| 34 |
+
self.proj_in = nn.Conv2d(channels, mid, 1)
|
| 35 |
+
self.conv = nn.Conv2d(
|
| 36 |
+
mid, mid * 2, 3,
|
| 37 |
+
dilation=dilation, padding=dilation)
|
| 38 |
+
self.bn = nn.BatchNorm2d(mid * 2)
|
| 39 |
+
self.proj_out = nn.Conv2d(mid, channels, 1)
|
| 40 |
+
|
| 41 |
+
def forward(self, x):
|
| 42 |
+
h = F.relu(self.proj_in(x))
|
| 43 |
+
h = self.bn(self.conv(h))
|
| 44 |
+
a, b = h.chunk(2, dim=1)
|
| 45 |
+
return x + self.proj_out(torch.tanh(a) * torch.sigmoid(b))
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ============================================================
|
| 49 |
+
# ConvBlock
|
| 50 |
+
# ============================================================
|
| 51 |
+
|
| 52 |
+
class ConvBlock(nn.Module):
|
| 53 |
+
def __init__(self, in_ch, out_ch):
|
| 54 |
+
super().__init__()
|
| 55 |
+
self.block = nn.Sequential(
|
| 56 |
+
nn.Conv2d(in_ch, out_ch, 3, padding=1),
|
| 57 |
+
nn.BatchNorm2d(out_ch),
|
| 58 |
+
nn.ReLU(inplace=True),
|
| 59 |
+
nn.Conv2d(out_ch, out_ch, 3, padding=1),
|
| 60 |
+
nn.BatchNorm2d(out_ch),
|
| 61 |
+
nn.ReLU(inplace=True),
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
def forward(self, x):
|
| 65 |
+
return self.block(x)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# ============================================================
|
| 69 |
+
# ArtifactUNet
|
| 70 |
+
# ============================================================
|
| 71 |
+
|
| 72 |
+
class ArtifactUNet(nn.Module):
|
| 73 |
+
"""STFT magnitude masking U-Net. mask in [0, 0.5]."""
|
| 74 |
+
|
| 75 |
+
def __init__(self, base_channels=32, mask_max=0.5):
|
| 76 |
+
super().__init__()
|
| 77 |
+
c = base_channels
|
| 78 |
+
self.mask_max = mask_max
|
| 79 |
+
|
| 80 |
+
self.enc1 = ConvBlock(1, c)
|
| 81 |
+
self.pool1 = nn.MaxPool2d(2, 2)
|
| 82 |
+
self.enc2 = ConvBlock(c, c * 2)
|
| 83 |
+
self.pool2 = nn.MaxPool2d(2, 2)
|
| 84 |
+
self.enc3 = ConvBlock(c * 2, c * 4)
|
| 85 |
+
self.pool3 = nn.MaxPool2d(2, 2)
|
| 86 |
+
self.enc4 = ConvBlock(c * 4, c * 8)
|
| 87 |
+
self.pool4 = nn.MaxPool2d(2, 2)
|
| 88 |
+
|
| 89 |
+
self.bottleneck = nn.Sequential(
|
| 90 |
+
GatedResidualBlock(c * 8, dilation=1),
|
| 91 |
+
GatedResidualBlock(c * 8, dilation=2),
|
| 92 |
+
GatedResidualBlock(c * 8, dilation=4),
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
self.up4 = nn.ConvTranspose2d(c * 8, c * 8, 2, stride=2)
|
| 96 |
+
self.dec4 = ConvBlock(c * 16, c * 4)
|
| 97 |
+
self.up3 = nn.ConvTranspose2d(c * 4, c * 4, 2, stride=2)
|
| 98 |
+
self.dec3 = ConvBlock(c * 8, c * 2)
|
| 99 |
+
self.up2 = nn.ConvTranspose2d(c * 2, c * 2, 2, stride=2)
|
| 100 |
+
self.dec2 = ConvBlock(c * 4, c)
|
| 101 |
+
self.up1 = nn.ConvTranspose2d(c, c, 2, stride=2)
|
| 102 |
+
self.dec1 = ConvBlock(c * 2, c)
|
| 103 |
+
|
| 104 |
+
self.mask_head = nn.Conv2d(c, 1, 1)
|
| 105 |
+
|
| 106 |
+
def forward(self, x):
|
| 107 |
+
orig_f, orig_t = x.shape[2], x.shape[3]
|
| 108 |
+
pad_f = (16 - orig_f % 16) % 16
|
| 109 |
+
pad_t = (16 - orig_t % 16) % 16
|
| 110 |
+
if pad_f > 0 or pad_t > 0:
|
| 111 |
+
x = F.pad(x, (0, pad_t, 0, pad_f))
|
| 112 |
+
|
| 113 |
+
e1 = self.enc1(x)
|
| 114 |
+
e2 = self.enc2(self.pool1(e1))
|
| 115 |
+
e3 = self.enc3(self.pool2(e2))
|
| 116 |
+
e4 = self.enc4(self.pool3(e3))
|
| 117 |
+
b = self.bottleneck(self.pool4(e4))
|
| 118 |
+
|
| 119 |
+
d4 = self._skip_cat(self.up4(b), e4)
|
| 120 |
+
d4 = self.dec4(d4)
|
| 121 |
+
d3 = self._skip_cat(self.up3(d4), e3)
|
| 122 |
+
d3 = self.dec3(d3)
|
| 123 |
+
d2 = self._skip_cat(self.up2(d3), e2)
|
| 124 |
+
d2 = self.dec2(d2)
|
| 125 |
+
d1 = self._skip_cat(self.up1(d2), e1)
|
| 126 |
+
d1 = self.dec1(d1)
|
| 127 |
+
|
| 128 |
+
mask = torch.sigmoid(self.mask_head(d1)) * self.mask_max
|
| 129 |
+
return mask[:, :, :orig_f, :orig_t]
|
| 130 |
+
|
| 131 |
+
@staticmethod
|
| 132 |
+
def _skip_cat(up, skip):
|
| 133 |
+
df = skip.shape[2] - up.shape[2]
|
| 134 |
+
dt = skip.shape[3] - up.shape[3]
|
| 135 |
+
if df > 0 or dt > 0:
|
| 136 |
+
up = F.pad(up, (0, max(dt, 0), 0, max(df, 0)))
|
| 137 |
+
elif df < 0 or dt < 0:
|
| 138 |
+
up = up[:, :, :skip.shape[2], :skip.shape[3]]
|
| 139 |
+
return torch.cat([up, skip], dim=1)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# ============================================================
|
| 143 |
+
# ResidualCNNNch (7-channel forensic CNN)
|
| 144 |
+
# ============================================================
|
| 145 |
+
|
| 146 |
+
class ResidualCNNNch(nn.Module):
|
| 147 |
+
"""N-channel forensic CNN. Conv-BN-ReLU-Pool structure."""
|
| 148 |
+
|
| 149 |
+
def __init__(self, in_channels=7):
|
| 150 |
+
super().__init__()
|
| 151 |
+
self.in_channels = in_channels
|
| 152 |
+
self.features = nn.Sequential(
|
| 153 |
+
nn.Conv2d(in_channels, 32, 3, padding=1),
|
| 154 |
+
nn.BatchNorm2d(32),
|
| 155 |
+
nn.ReLU(inplace=True),
|
| 156 |
+
nn.MaxPool2d(2, 2),
|
| 157 |
+
nn.Conv2d(32, 64, 3, padding=1),
|
| 158 |
+
nn.BatchNorm2d(64),
|
| 159 |
+
nn.ReLU(inplace=True),
|
| 160 |
+
nn.MaxPool2d(2, 2),
|
| 161 |
+
nn.Conv2d(64, 128, 3, padding=1),
|
| 162 |
+
nn.BatchNorm2d(128),
|
| 163 |
+
nn.ReLU(inplace=True),
|
| 164 |
+
nn.AdaptiveAvgPool2d((4, 4)),
|
| 165 |
+
)
|
| 166 |
+
self.classifier = nn.Sequential(
|
| 167 |
+
nn.Dropout(0.5),
|
| 168 |
+
nn.Linear(128 * 4 * 4, 256),
|
| 169 |
+
nn.ReLU(inplace=True),
|
| 170 |
+
nn.Dropout(0.3),
|
| 171 |
+
nn.Linear(256, 1),
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
def forward(self, x):
|
| 175 |
+
x = self.features(x)
|
| 176 |
+
x = x.view(x.size(0), -1)
|
| 177 |
+
x = self.classifier(x)
|
| 178 |
+
return x.squeeze(-1)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
class ResidualCNN7ch(nn.Module):
|
| 182 |
+
"""7-channel CNN for v9.x SOTA pipeline.
|
| 183 |
+
4-layer Conv + GlobalAvgPool + FC. ResidualCNNNch(3-conv)보다 깊음.
|
| 184 |
+
가중치: models/cnn_v94_best.pt (v9.4 SOTA, balanced dataset)"""
|
| 185 |
+
|
| 186 |
+
def __init__(self):
|
| 187 |
+
super().__init__()
|
| 188 |
+
self.conv1 = nn.Conv2d(7, 32, 3, padding=1); self.bn1 = nn.BatchNorm2d(32); self.pool1 = nn.MaxPool2d(2)
|
| 189 |
+
self.conv2 = nn.Conv2d(32, 64, 3, padding=1); self.bn2 = nn.BatchNorm2d(64); self.pool2 = nn.MaxPool2d(2)
|
| 190 |
+
self.conv3 = nn.Conv2d(64, 128, 3, padding=1); self.bn3 = nn.BatchNorm2d(128); self.pool3 = nn.MaxPool2d(2)
|
| 191 |
+
self.conv4 = nn.Conv2d(128, 256, 3, padding=1);self.bn4 = nn.BatchNorm2d(256); self.pool4 = nn.MaxPool2d(2)
|
| 192 |
+
self.global_pool = nn.AdaptiveAvgPool2d(1)
|
| 193 |
+
self.fc1 = nn.Linear(256, 128)
|
| 194 |
+
self.dropout = nn.Dropout(0.5)
|
| 195 |
+
self.fc2 = nn.Linear(128, 1)
|
| 196 |
+
|
| 197 |
+
def forward(self, x):
|
| 198 |
+
"""x: (B, 7, N_MELS, T) → (B,) logits"""
|
| 199 |
+
x = self.pool1(F.relu(self.bn1(self.conv1(x))))
|
| 200 |
+
x = self.pool2(F.relu(self.bn2(self.conv2(x))))
|
| 201 |
+
x = self.pool3(F.relu(self.bn3(self.conv3(x))))
|
| 202 |
+
x = self.pool4(F.relu(self.bn4(self.conv4(x))))
|
| 203 |
+
x = self.global_pool(x).view(x.size(0), -1)
|
| 204 |
+
return self.fc2(F.relu(self.fc1(x))).view(-1)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
# ============================================================
|
| 208 |
+
# DifferentiableMel
|
| 209 |
+
# ============================================================
|
| 210 |
+
|
| 211 |
+
class DifferentiableMel(nn.Module):
|
| 212 |
+
"""STFT magnitude -> log-mel dB (normalized)."""
|
| 213 |
+
|
| 214 |
+
def __init__(self, sr=44100, n_fft=2048, n_mels=128, top_db=80.0):
|
| 215 |
+
super().__init__()
|
| 216 |
+
n_freqs = n_fft // 2 + 1
|
| 217 |
+
fb = self._create_mel_fb(n_freqs, n_mels, 0.0, sr / 2, sr)
|
| 218 |
+
self.register_buffer('fb', fb)
|
| 219 |
+
self.top_db = top_db
|
| 220 |
+
|
| 221 |
+
@staticmethod
|
| 222 |
+
def _create_mel_fb(n_freqs, n_mels, f_min, f_max, sr):
|
| 223 |
+
def hz_to_mel(f):
|
| 224 |
+
return 2595.0 * np.log10(1.0 + f / 700.0)
|
| 225 |
+
|
| 226 |
+
def mel_to_hz(m):
|
| 227 |
+
return 700.0 * (10.0 ** (m / 2595.0) - 1.0)
|
| 228 |
+
|
| 229 |
+
mel_min = hz_to_mel(f_min)
|
| 230 |
+
mel_max = hz_to_mel(f_max)
|
| 231 |
+
mel_pts = np.linspace(mel_min, mel_max, n_mels + 2)
|
| 232 |
+
hz_pts = mel_to_hz(mel_pts)
|
| 233 |
+
freqs = np.linspace(0, sr / 2, n_freqs)
|
| 234 |
+
|
| 235 |
+
fb = np.zeros((n_freqs, n_mels), dtype=np.float32)
|
| 236 |
+
for i in range(n_mels):
|
| 237 |
+
lo, mid, hi = hz_pts[i], hz_pts[i + 1], hz_pts[i + 2]
|
| 238 |
+
for j in range(n_freqs):
|
| 239 |
+
if lo <= freqs[j] <= mid and (mid - lo) > 0:
|
| 240 |
+
fb[j, i] = (freqs[j] - lo) / (mid - lo)
|
| 241 |
+
elif mid < freqs[j] <= hi and (hi - mid) > 0:
|
| 242 |
+
fb[j, i] = (hi - freqs[j]) / (hi - mid)
|
| 243 |
+
return torch.from_numpy(fb)
|
| 244 |
+
|
| 245 |
+
def forward(self, stft_mag):
|
| 246 |
+
"""(B, 1, F, T) -> (B, 1, N_MELS, T) log-mel normalized."""
|
| 247 |
+
x = stft_mag.squeeze(1)
|
| 248 |
+
power = x ** 2
|
| 249 |
+
mel = torch.einsum('fm,bft->bmt', self.fb, power)
|
| 250 |
+
mel_db = 10.0 * torch.log10(torch.clamp(mel, min=1e-10))
|
| 251 |
+
max_val = mel_db.amax(dim=(-2, -1), keepdim=True)
|
| 252 |
+
mel_db = torch.clamp(mel_db, min=max_val - self.top_db)
|
| 253 |
+
mean = mel_db.mean(dim=(-2, -1), keepdim=True)
|
| 254 |
+
std = mel_db.std(dim=(-2, -1), keepdim=True)
|
| 255 |
+
mel_norm = (mel_db - mean) / (std + 1e-9)
|
| 256 |
+
return mel_norm.unsqueeze(1)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
# ============================================================
|
| 260 |
+
# CPU HPSS (librosa)
|
| 261 |
+
# ============================================================
|
| 262 |
+
|
| 263 |
+
def hpss_cpu(mag):
|
| 264 |
+
"""HPSS via librosa on CPU. mag: (B, 1, F, T) tensor -> H_mag, P_mag tensors.
|
| 265 |
+
|
| 266 |
+
각 배치를 numpy로 변환 → librosa.decompose.hpss → 다시 tensor.
|
| 267 |
+
데모용 CPU 파이프라인. 학습용 GPU HPSS는 train_nch_cnn_020303.py 참조.
|
| 268 |
+
"""
|
| 269 |
+
import librosa
|
| 270 |
+
|
| 271 |
+
device = mag.device
|
| 272 |
+
B = mag.shape[0]
|
| 273 |
+
mag_np = mag.squeeze(1).cpu().numpy() # (B, F, T)
|
| 274 |
+
|
| 275 |
+
H_list, P_list = [], []
|
| 276 |
+
for i in range(B):
|
| 277 |
+
H, P = librosa.decompose.hpss(mag_np[i], kernel_size=31)
|
| 278 |
+
H_list.append(H)
|
| 279 |
+
P_list.append(P)
|
| 280 |
+
|
| 281 |
+
H_mag = torch.from_numpy(np.stack(H_list)).unsqueeze(1).to(device) # (B, 1, F, T)
|
| 282 |
+
P_mag = torch.from_numpy(np.stack(P_list)).unsqueeze(1).to(device)
|
| 283 |
+
return H_mag, P_mag
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
# ============================================================
|
| 287 |
+
# GPU/MPS HPSS (순수 PyTorch — unfold + median, Triton 불필요)
|
| 288 |
+
# ============================================================
|
| 289 |
+
|
| 290 |
+
def _gpu_median_filter_2d(x, kernel_size, dim):
|
| 291 |
+
"""GPU median filter along one axis using unfold + median.
|
| 292 |
+
|
| 293 |
+
CUDA에서 빠름. MPS에서는 median이 극도로 느리므로 _avg_filter_2d 사용 권장.
|
| 294 |
+
|
| 295 |
+
Args:
|
| 296 |
+
x: (B, F, T) tensor on GPU
|
| 297 |
+
kernel_size: odd integer
|
| 298 |
+
dim: 1=freq축 (P 추출), 2=time축 (H 추출)
|
| 299 |
+
"""
|
| 300 |
+
pad = kernel_size // 2
|
| 301 |
+
if dim == 2:
|
| 302 |
+
x_pad = F.pad(x, (pad, pad), mode='reflect')
|
| 303 |
+
x_unfold = x_pad.unfold(2, kernel_size, 1)
|
| 304 |
+
else:
|
| 305 |
+
x_pad = F.pad(x, (0, 0, pad, pad), mode='reflect')
|
| 306 |
+
x_unfold = x_pad.unfold(1, kernel_size, 1)
|
| 307 |
+
return x_unfold.median(dim=-1).values
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
def _avg_filter_2d(x, kernel_size, dim):
|
| 311 |
+
"""avg_pool 기반 smoothing filter — MPS 최적화 (median 대비 400x 빠름).
|
| 312 |
+
|
| 313 |
+
median과 동일하지 않지만, HPSS Wiener masking에서 충분한 근사.
|
| 314 |
+
H/P 비율 계산에서 절대값보다 상대적 크기가 중요하므로 성능 차이 미미.
|
| 315 |
+
|
| 316 |
+
Args:
|
| 317 |
+
x: (B, F, T) tensor
|
| 318 |
+
kernel_size: odd integer
|
| 319 |
+
dim: 1=freq축, 2=time축
|
| 320 |
+
"""
|
| 321 |
+
pad = kernel_size // 2
|
| 322 |
+
B, F_dim, T = x.shape
|
| 323 |
+
if dim == 2: # time축
|
| 324 |
+
x_flat = x.reshape(B * F_dim, 1, T)
|
| 325 |
+
out = F.avg_pool1d(x_flat, kernel_size=kernel_size, stride=1, padding=pad)
|
| 326 |
+
return out.reshape(B, F_dim, T)
|
| 327 |
+
else: # freq축
|
| 328 |
+
x_t = x.transpose(1, 2) # (B, T, F)
|
| 329 |
+
x_flat = x_t.reshape(B * T, 1, F_dim)
|
| 330 |
+
out = F.avg_pool1d(x_flat, kernel_size=kernel_size, stride=1, padding=pad)
|
| 331 |
+
return out.reshape(B, T, F_dim).transpose(1, 2)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def hpss_gpu_pure(mag, h_kernel=31, p_kernel=31):
|
| 335 |
+
"""순수 PyTorch HPSS — CUDA/MPS 모두 호환.
|
| 336 |
+
|
| 337 |
+
CUDA: unfold + median (정확), MPS: avg_pool 근사 (400x 빠름).
|
| 338 |
+
|
| 339 |
+
Args:
|
| 340 |
+
mag: (B, 1, F, T) STFT magnitude on any device
|
| 341 |
+
Returns:
|
| 342 |
+
H_mag, P_mag: (B, 1, F, T)
|
| 343 |
+
"""
|
| 344 |
+
mag_sq = mag.squeeze(1) # (B, F, T)
|
| 345 |
+
|
| 346 |
+
# 모든 CNN이 median filter HPSS로 학습됨 → avg_pool 근사 사용 금지
|
| 347 |
+
# MPS에서 unfold().median()이 극도로 느림 (13초/곡) → CPU에서 수행 후 복귀
|
| 348 |
+
if mag_sq.device.type == 'mps':
|
| 349 |
+
orig_device = mag_sq.device
|
| 350 |
+
mag_cpu = mag_sq.cpu()
|
| 351 |
+
H_filter = _gpu_median_filter_2d(mag_cpu, h_kernel, dim=2).to(orig_device)
|
| 352 |
+
P_filter = _gpu_median_filter_2d(mag_cpu, p_kernel, dim=1).to(orig_device)
|
| 353 |
+
else:
|
| 354 |
+
H_filter = _gpu_median_filter_2d(mag_sq, h_kernel, dim=2)
|
| 355 |
+
P_filter = _gpu_median_filter_2d(mag_sq, p_kernel, dim=1)
|
| 356 |
+
|
| 357 |
+
H2 = H_filter ** 2
|
| 358 |
+
P2 = P_filter ** 2
|
| 359 |
+
denom = H2 + P2 + 1e-10
|
| 360 |
+
H_mask = H2 / denom
|
| 361 |
+
P_mask = P2 / denom
|
| 362 |
+
|
| 363 |
+
H_mag = (mag_sq * H_mask).unsqueeze(1)
|
| 364 |
+
P_mag = (mag_sq * P_mask).unsqueeze(1)
|
| 365 |
+
return H_mag, P_mag
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
# ============================================================
|
| 369 |
+
# 7ch Forensic Feature Computation
|
| 370 |
+
# ============================================================
|
| 371 |
+
|
| 372 |
+
def compute_forensic_features_7ch(mel_res, mel_H, mel_P):
|
| 373 |
+
"""Compute 7-channel forensic features from HPSS mel spectrograms.
|
| 374 |
+
|
| 375 |
+
Channels:
|
| 376 |
+
ch1: mel_residual - UNet residual mel spectrogram
|
| 377 |
+
ch2: mel_harmonic - HPSS harmonic mel
|
| 378 |
+
ch3: mel_percussive - HPSS percussive mel
|
| 379 |
+
ch4: delta - temporal 1st derivative
|
| 380 |
+
ch5: delta2 - temporal 2nd derivative
|
| 381 |
+
ch6: hp_ratio - log(H/P) ratio
|
| 382 |
+
ch7: spectral_flux - |delta| (absolute spectral change)
|
| 383 |
+
|
| 384 |
+
Args:
|
| 385 |
+
mel_res: (B, 1, N_MELS, T)
|
| 386 |
+
mel_H: (B, 1, N_MELS, T)
|
| 387 |
+
mel_P: (B, 1, N_MELS, T)
|
| 388 |
+
|
| 389 |
+
Returns:
|
| 390 |
+
(B, 7, N_MELS, T) concatenated features
|
| 391 |
+
"""
|
| 392 |
+
delta = torch.diff(mel_res, n=1, dim=-1)
|
| 393 |
+
delta = F.pad(delta, (1, 0))
|
| 394 |
+
delta2 = torch.diff(delta, n=1, dim=-1)
|
| 395 |
+
delta2 = F.pad(delta2, (1, 0))
|
| 396 |
+
hp_ratio = mel_H - mel_P
|
| 397 |
+
spectral_flux = torch.abs(delta)
|
| 398 |
+
return torch.cat([mel_res, mel_H, mel_P, delta, delta2, hp_ratio, spectral_flux], dim=1)
|
models
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
../ArtifactNet/models
|
|
|
|
|
|
packages.txt
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
-
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
| 2 |
+
libsndfile1
|
requirements.txt
CHANGED
|
@@ -1,15 +1,11 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
fastapi>=0.104.0
|
| 13 |
-
uvicorn>=0.24.0
|
| 14 |
-
pydantic>=2.0.0
|
| 15 |
-
yt-dlp>=2024.01.01
|
|
|
|
| 1 |
+
gradio>=4.44.0,<6
|
| 2 |
+
onnxruntime>=1.17.0
|
| 3 |
+
torch>=2.2.0
|
| 4 |
+
soundfile>=0.12.0
|
| 5 |
+
scipy>=1.11.0
|
| 6 |
+
numpy>=1.24.0,<2
|
| 7 |
+
matplotlib>=3.8.0
|
| 8 |
+
plotly>=5.18.0
|
| 9 |
+
librosa>=0.10.0
|
| 10 |
+
huggingface_hub>=0.24.0
|
| 11 |
+
requests>=2.31.0
|
|
|
|
|
|
|
|
|
|
|
|
ui/__init__.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
# Purpose: UI components for ArtifactNet Gradio demo
|
| 2 |
-
|
| 3 |
-
"""UI components and verdict card generation."""
|
| 4 |
-
|
| 5 |
-
from .verdict_card import VerdictCardBuilder, VerdictColors
|
| 6 |
-
from .components import create_theme, create_header, create_about_section
|
| 7 |
-
|
| 8 |
-
__all__ = [
|
| 9 |
-
'VerdictCardBuilder',
|
| 10 |
-
'VerdictColors',
|
| 11 |
-
'create_theme',
|
| 12 |
-
'create_header',
|
| 13 |
-
'create_about_section',
|
| 14 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui/components.py
DELETED
|
@@ -1,112 +0,0 @@
|
|
| 1 |
-
# Created: 2026-02-24
|
| 2 |
-
# Purpose: Gradio UI components (theme, header, about section)
|
| 3 |
-
# Dependencies: gradio
|
| 4 |
-
|
| 5 |
-
"""Gradio UI components for ArtifactNet demo."""
|
| 6 |
-
|
| 7 |
-
import gradio as gr
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
def create_theme() -> gr.themes.Base:
|
| 11 |
-
"""Create ArtifactNet Gradio theme (dark mode with orange accent)."""
|
| 12 |
-
return gr.themes.Base(
|
| 13 |
-
primary_hue="orange",
|
| 14 |
-
secondary_hue="blue",
|
| 15 |
-
neutral_hue="slate",
|
| 16 |
-
font=gr.themes.GoogleFont("Inter"),
|
| 17 |
-
).set(
|
| 18 |
-
body_background_fill="#0f0f23",
|
| 19 |
-
block_background_fill="#1a1a2e",
|
| 20 |
-
block_border_color="#333",
|
| 21 |
-
input_background_fill="#16213e",
|
| 22 |
-
button_primary_background_fill="#ffa502",
|
| 23 |
-
button_primary_text_color="black",
|
| 24 |
-
)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
def create_header(is_hf_spaces: bool) -> str:
|
| 28 |
-
"""Create header HTML for Gradio UI.
|
| 29 |
-
|
| 30 |
-
Args:
|
| 31 |
-
is_hf_spaces: Whether running on HF Spaces (shows CPU warning)
|
| 32 |
-
|
| 33 |
-
Returns:
|
| 34 |
-
HTML string
|
| 35 |
-
"""
|
| 36 |
-
cpu_warning = ""
|
| 37 |
-
if is_hf_spaces:
|
| 38 |
-
cpu_warning = (
|
| 39 |
-
'<div style="margin:8px auto;max-width:500px;padding:6px 12px;'
|
| 40 |
-
'background:rgba(255,165,2,0.12);border:1px solid #ffa502;'
|
| 41 |
-
'border-radius:8px;font-size:12px;color:#ffa502;">'
|
| 42 |
-
'Running on CPU — analysis may take 30-60 seconds depending on track length.'
|
| 43 |
-
'</div>'
|
| 44 |
-
)
|
| 45 |
-
|
| 46 |
-
return f"""
|
| 47 |
-
<div style="text-align:center;padding:20px 0 10px;">
|
| 48 |
-
<h1 style="color:white;font-size:28px;margin:0;">
|
| 49 |
-
ArtifactNet
|
| 50 |
-
</h1>
|
| 51 |
-
<p style="color:#888;font-size:14px;margin:4px 0 0;">
|
| 52 |
-
AI Music Forensic Detector — Deep Spectral Analysis + Neural Network
|
| 53 |
-
</p>
|
| 54 |
-
{cpu_warning}
|
| 55 |
-
</div>
|
| 56 |
-
"""
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
def create_about_section() -> str:
|
| 60 |
-
"""Create About ArtifactNet accordion content HTML."""
|
| 61 |
-
return """
|
| 62 |
-
<div style="color:#ccc;font-size:13px;line-height:1.6;padding:10px;">
|
| 63 |
-
<h3 style="color:white;">Overview</h3>
|
| 64 |
-
<p>
|
| 65 |
-
ArtifactNet is a neural network-based forensic detector for
|
| 66 |
-
AI-generated music. It analyzes audio characteristics to distinguish
|
| 67 |
-
between human-produced and AI-generated tracks.
|
| 68 |
-
</p>
|
| 69 |
-
|
| 70 |
-
<h3 style="color:white;">Verdict Categories</h3>
|
| 71 |
-
<table style="width:100%;border-collapse:collapse;margin:8px 0;">
|
| 72 |
-
<tr style="border-bottom:1px solid #333;">
|
| 73 |
-
<td style="padding:6px;color:#ff4757;font-weight:bold;">AI Generated</td>
|
| 74 |
-
<td style="padding:6px;">Strong AI generation indicators detected.</td>
|
| 75 |
-
</tr>
|
| 76 |
-
<tr style="border-bottom:1px solid #333;">
|
| 77 |
-
<td style="padding:6px;color:#ffa502;font-weight:bold;">Uncertain</td>
|
| 78 |
-
<td style="padding:6px;">
|
| 79 |
-
<strong>Most common cause:</strong> Heavily processed audio (compression, EQ, effects).<br>
|
| 80 |
-
Other cases: Non-music audio, mixed human/AI content, edge cases in training data.<br>
|
| 81 |
-
<em>Tip: Try with original/minimally processed audio for better accuracy.</em>
|
| 82 |
-
</td>
|
| 83 |
-
</tr>
|
| 84 |
-
<tr>
|
| 85 |
-
<td style="padding:6px;color:#2ed573;font-weight:bold;">Human-Made</td>
|
| 86 |
-
<td style="padding:6px;">No significant AI generation indicators found.</td>
|
| 87 |
-
</tr>
|
| 88 |
-
</table>
|
| 89 |
-
|
| 90 |
-
<h3 style="color:white;">Limitations</h3>
|
| 91 |
-
<ul>
|
| 92 |
-
<li>Mono input reduces accuracy</li>
|
| 93 |
-
<li>Heavily processed audio may fall in the Uncertain zone</li>
|
| 94 |
-
<li>Novel AI generators not in training data may be missed</li>
|
| 95 |
-
<li>Short clips (<10s) have lower confidence</li>
|
| 96 |
-
</ul>
|
| 97 |
-
|
| 98 |
-
<h3 style="color:white;">📊 Data Collection (Edge Case Detection)</h3>
|
| 99 |
-
<p style="background:rgba(46,213,115,0.1);padding:8px;border-radius:4px;border-left:3px solid #2ed573;color:#ccc;font-size:12px;line-height:1.5;">
|
| 100 |
-
<strong style="color:#2ed573;">What's collected:</strong> When results are "Uncertain",
|
| 101 |
-
analysis data (mel-spectrogram only) from tracks <strong><30 seconds</strong>
|
| 102 |
-
is securely saved for model improvement.<br><br>
|
| 103 |
-
<strong style="color:#2ed573;">What's NOT collected:</strong> Your original audio files are never stored.
|
| 104 |
-
Only aggregated spectral patterns and verdict statistics are saved.<br><br>
|
| 105 |
-
<strong style="color:#2ed573;">Why:</strong> These edge cases help improve model accuracy and robustness.
|
| 106 |
-
</p>
|
| 107 |
-
|
| 108 |
-
<p style="color:#888;font-size:11px;margin-top:10px;">
|
| 109 |
-
Research project — results should be interpreted alongside other evidence.
|
| 110 |
-
</p>
|
| 111 |
-
</div>
|
| 112 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui/verdict_card.py
DELETED
|
@@ -1,189 +0,0 @@
|
|
| 1 |
-
# Created: 2026-02-24
|
| 2 |
-
# Purpose: Verdict card HTML generation (extracted from app.py)
|
| 3 |
-
# Dependencies: None (pure HTML generation)
|
| 4 |
-
|
| 5 |
-
"""Verdict card HTML builder for ArtifactNet results."""
|
| 6 |
-
|
| 7 |
-
import math
|
| 8 |
-
from dataclasses import dataclass
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def _safe_fmt(val: float) -> float:
|
| 12 |
-
"""Convert NaN to 0.5 for safe formatting."""
|
| 13 |
-
if math.isnan(val):
|
| 14 |
-
return 0.5
|
| 15 |
-
return val
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
@dataclass
|
| 19 |
-
class VerdictColors:
|
| 20 |
-
"""Color constants for verdict categories."""
|
| 21 |
-
AI_GENERATED = "#ff4757"
|
| 22 |
-
UNCERTAIN = "#ffa502"
|
| 23 |
-
HUMAN_MADE = "#2ed573"
|
| 24 |
-
|
| 25 |
-
BACKGROUND = "#16213e"
|
| 26 |
-
BORDER = "#333"
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
class VerdictCardBuilder:
|
| 30 |
-
"""Build HTML verdict cards for ArtifactNet analysis results."""
|
| 31 |
-
|
| 32 |
-
@staticmethod
|
| 33 |
-
def build_empty_card() -> str:
|
| 34 |
-
"""Generate placeholder card for empty state."""
|
| 35 |
-
return """
|
| 36 |
-
<div style="text-align:center;padding:30px;background:#16213e;
|
| 37 |
-
border-radius:12px;color:#888;">
|
| 38 |
-
<p style="font-size:16px;">Upload an audio file to begin analysis</p>
|
| 39 |
-
</div>"""
|
| 40 |
-
|
| 41 |
-
@staticmethod
|
| 42 |
-
def build(verdict: str, stats: dict, is_stereo: bool,
|
| 43 |
-
duration: float = 0, elapsed: float = 0) -> str:
|
| 44 |
-
"""Generate verdict card HTML.
|
| 45 |
-
|
| 46 |
-
Args:
|
| 47 |
-
verdict: "AI Generated", "Uncertain", or "Human-Made"
|
| 48 |
-
stats: Distribution statistics dict
|
| 49 |
-
is_stereo: Whether input was stereo
|
| 50 |
-
duration: Audio duration in seconds
|
| 51 |
-
elapsed: Analysis elapsed time in seconds
|
| 52 |
-
|
| 53 |
-
Returns:
|
| 54 |
-
HTML string for verdict card
|
| 55 |
-
"""
|
| 56 |
-
if verdict == "No file":
|
| 57 |
-
return VerdictCardBuilder.build_empty_card()
|
| 58 |
-
|
| 59 |
-
color, icon, desc = VerdictCardBuilder._get_verdict_style(verdict, stats)
|
| 60 |
-
channels = "Stereo" if is_stereo else "Mono"
|
| 61 |
-
|
| 62 |
-
# Distribution bar
|
| 63 |
-
dist_bar = VerdictCardBuilder._build_distribution_bar(stats)
|
| 64 |
-
|
| 65 |
-
# Warnings and context
|
| 66 |
-
mono_warn = VerdictCardBuilder._build_mono_warning(is_stereo)
|
| 67 |
-
context = VerdictCardBuilder._build_context(verdict, stats)
|
| 68 |
-
|
| 69 |
-
return f"""
|
| 70 |
-
<div style="text-align:center;padding:20px;background:#16213e;
|
| 71 |
-
border-radius:12px;border:2px solid {color};">
|
| 72 |
-
<div style="font-size:14px;color:{color};letter-spacing:1px;
|
| 73 |
-
text-transform:uppercase;font-weight:600;">
|
| 74 |
-
{icon} Verdict
|
| 75 |
-
</div>
|
| 76 |
-
<div style="font-size:32px;font-weight:bold;color:{color};
|
| 77 |
-
letter-spacing:2px;margin:6px 0;">{verdict.upper()}</div>
|
| 78 |
-
<div style="color:#aaa;font-size:13px;margin-bottom:10px;">{desc}</div>
|
| 79 |
-
<div style="font-size:36px;font-weight:bold;color:white;margin:4px 0;">
|
| 80 |
-
median={_safe_fmt(stats['median']):.1%}
|
| 81 |
-
<span style="font-size:18px;color:#888;">mean={_safe_fmt(stats['mean']):.1%}</span>
|
| 82 |
-
</div>
|
| 83 |
-
{dist_bar}
|
| 84 |
-
<div style="color:#999;font-size:13px;margin-top:10px;">
|
| 85 |
-
{stats['n']} segments |
|
| 86 |
-
IQR={stats['iqr']:.2f} |
|
| 87 |
-
{channels} |
|
| 88 |
-
{duration:.1f}s |
|
| 89 |
-
{elapsed:.1f}s
|
| 90 |
-
</div>
|
| 91 |
-
{mono_warn}
|
| 92 |
-
{context}
|
| 93 |
-
</div>"""
|
| 94 |
-
|
| 95 |
-
@staticmethod
|
| 96 |
-
def _get_verdict_style(verdict: str, stats: dict) -> tuple[str, str, str]:
|
| 97 |
-
"""Get color, icon, and description for verdict.
|
| 98 |
-
|
| 99 |
-
Returns:
|
| 100 |
-
(color, icon, description)
|
| 101 |
-
"""
|
| 102 |
-
pct_high = stats["pct_high"]
|
| 103 |
-
|
| 104 |
-
if verdict == "AI Generated":
|
| 105 |
-
return (
|
| 106 |
-
VerdictColors.AI_GENERATED,
|
| 107 |
-
"⚠", # warning icon
|
| 108 |
-
f"{pct_high:.0%} of segments show strong AI indicators (consistent pattern)"
|
| 109 |
-
)
|
| 110 |
-
elif verdict == "Uncertain":
|
| 111 |
-
return (
|
| 112 |
-
VerdictColors.UNCERTAIN,
|
| 113 |
-
"●", # circle icon
|
| 114 |
-
"Mixed signals across segments — inconsistent pattern"
|
| 115 |
-
)
|
| 116 |
-
else: # Human-Made
|
| 117 |
-
return (
|
| 118 |
-
VerdictColors.HUMAN_MADE,
|
| 119 |
-
"✓", # check icon
|
| 120 |
-
"No significant AI generation indicators found"
|
| 121 |
-
)
|
| 122 |
-
|
| 123 |
-
@staticmethod
|
| 124 |
-
def _build_distribution_bar(stats: dict) -> str:
|
| 125 |
-
"""Build 3-color distribution bar HTML."""
|
| 126 |
-
n_total = stats["n"]
|
| 127 |
-
n_high, n_mid, n_low = stats["n_high"], stats["n_mid"], stats["n_low"]
|
| 128 |
-
pct_h = n_high / n_total * 100
|
| 129 |
-
pct_m = n_mid / n_total * 100
|
| 130 |
-
pct_l = n_low / n_total * 100
|
| 131 |
-
|
| 132 |
-
return f"""
|
| 133 |
-
<div style="margin:10px auto;max-width:320px;">
|
| 134 |
-
<div style="height:14px;background:#333;border-radius:7px;
|
| 135 |
-
overflow:hidden;display:flex;">
|
| 136 |
-
<div style="width:{pct_h:.1f}%;background:{VerdictColors.AI_GENERATED};"></div>
|
| 137 |
-
<div style="width:{pct_m:.1f}%;background:{VerdictColors.UNCERTAIN};"></div>
|
| 138 |
-
<div style="width:{pct_l:.1f}%;background:{VerdictColors.HUMAN_MADE};"></div>
|
| 139 |
-
</div>
|
| 140 |
-
<div style="display:flex;justify-content:space-between;
|
| 141 |
-
font-size:10px;color:#888;margin-top:2px;">
|
| 142 |
-
<span style="color:{VerdictColors.AI_GENERATED};">{n_high} high</span>
|
| 143 |
-
<span style="color:{VerdictColors.UNCERTAIN};">{n_mid} mid</span>
|
| 144 |
-
<span style="color:{VerdictColors.HUMAN_MADE};">{n_low} low</span>
|
| 145 |
-
</div>
|
| 146 |
-
</div>"""
|
| 147 |
-
|
| 148 |
-
@staticmethod
|
| 149 |
-
def _build_mono_warning(is_stereo: bool) -> str:
|
| 150 |
-
"""Build mono input warning HTML."""
|
| 151 |
-
if is_stereo:
|
| 152 |
-
return ""
|
| 153 |
-
|
| 154 |
-
return """
|
| 155 |
-
<div style="margin-top:8px;padding:6px 10px;background:rgba(255,165,2,0.15);
|
| 156 |
-
border-radius:6px;border-left:3px solid #ffa502;font-size:12px;">
|
| 157 |
-
Mono input — stereo phase features unavailable. Results may be less reliable.
|
| 158 |
-
</div>"""
|
| 159 |
-
|
| 160 |
-
@staticmethod
|
| 161 |
-
def _build_context(verdict: str, stats: dict) -> str:
|
| 162 |
-
"""Build human comparison context HTML."""
|
| 163 |
-
if verdict == "AI Generated":
|
| 164 |
-
return """
|
| 165 |
-
<div style="margin-top:10px;padding:8px 12px;background:rgba(255,71,87,0.1);
|
| 166 |
-
border-radius:6px;font-size:12px;color:#ccc;line-height:1.5;">
|
| 167 |
-
<b style="color:#ff4757;">Context:</b>
|
| 168 |
-
In blind listening tests, trained listeners correctly identified AI music
|
| 169 |
-
only 72.9% of the time (N=90). This track shows patterns that exceed
|
| 170 |
-
human detection ability.
|
| 171 |
-
</div>"""
|
| 172 |
-
elif verdict == "Uncertain":
|
| 173 |
-
iqr = stats['iqr']
|
| 174 |
-
return f"""
|
| 175 |
-
<div style="margin-top:10px;padding:8px 12px;background:rgba(255,165,2,0.1);
|
| 176 |
-
border-radius:6px;font-size:12px;color:#ccc;line-height:1.5;">
|
| 177 |
-
<b style="color:#ffa502;">Why uncertain:</b>
|
| 178 |
-
Segment distribution is inconsistent (IQR={iqr:.2f}).
|
| 179 |
-
Some sections show AI patterns while others appear human-made.
|
| 180 |
-
This may indicate partial AI use, heavy processing, or novel audio characteristics.
|
| 181 |
-
</div>"""
|
| 182 |
-
else: # Human-Made
|
| 183 |
-
return """
|
| 184 |
-
<div style="margin-top:10px;padding:8px 12px;background:rgba(46,213,115,0.1);
|
| 185 |
-
border-radius:6px;font-size:12px;color:#ccc;line-height:1.5;">
|
| 186 |
-
<b style="color:#2ed573;">Context:</b>
|
| 187 |
-
This track's spectral and temporal characteristics are consistent with
|
| 188 |
-
human-produced music. Average human accuracy in blind tests: 69.3% (N=90).
|
| 189 |
-
</div>"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
visualization/feature_bars.py
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created: 2026-03-07
|
| 2 |
+
# Purpose: Horizontal bar chart for 7 forensic audio features
|
| 3 |
+
# Dependencies: plotly, numpy
|
| 4 |
+
|
| 5 |
+
"""Horizontal bar chart visualization for 7 forensic audio features."""
|
| 6 |
+
|
| 7 |
+
import plotly.graph_objects as go
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def plot_feature_bars(feature_stats: dict) -> go.Figure:
|
| 11 |
+
"""7개 포렌식 피처를 horizontal bar chart로 시각화.
|
| 12 |
+
|
| 13 |
+
Args:
|
| 14 |
+
feature_stats: Dict with feature names as keys and normalized values (0-1)
|
| 15 |
+
|
| 16 |
+
Returns:
|
| 17 |
+
plotly Figure (horizontal bar chart)
|
| 18 |
+
"""
|
| 19 |
+
# 7개 포렌식 피처 (짧은 레이블)
|
| 20 |
+
features = [
|
| 21 |
+
"Spectral Flux",
|
| 22 |
+
"H/P Ratio",
|
| 23 |
+
"Temporal Accel",
|
| 24 |
+
"Temporal Delta",
|
| 25 |
+
"Percussive",
|
| 26 |
+
"Harmonic",
|
| 27 |
+
"Residual Energy",
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# 기본값 (feature_stats가 없으면 중간값)
|
| 31 |
+
if not feature_stats:
|
| 32 |
+
values = [0.5] * 7
|
| 33 |
+
else:
|
| 34 |
+
values = [
|
| 35 |
+
feature_stats.get("spectral_flux", 0.5),
|
| 36 |
+
feature_stats.get("hp_ratio", 0.5),
|
| 37 |
+
feature_stats.get("temporal_accel", 0.5),
|
| 38 |
+
feature_stats.get("temporal_delta", 0.5),
|
| 39 |
+
feature_stats.get("percussive_strength", 0.5),
|
| 40 |
+
feature_stats.get("harmonic_strength", 0.5),
|
| 41 |
+
feature_stats.get("residual_energy", 0.5),
|
| 42 |
+
]
|
| 43 |
+
|
| 44 |
+
# AI 가능성 기준: 높을수록 AI 시그니처
|
| 45 |
+
# Residual Energy, H/P Ratio는 높을수록 AI
|
| 46 |
+
# Temporal Delta/Accel는 낮을수록 AI (부드러운 변화)
|
| 47 |
+
# Harmonic/Percussive는 특정 비율로 수렴
|
| 48 |
+
# Spectral Flux는 낮을수록 AI (일관적 변화)
|
| 49 |
+
|
| 50 |
+
# AI 시그니처 강도에 따라 색상 결정
|
| 51 |
+
colors = []
|
| 52 |
+
for i, (feat, val) in enumerate(zip(features, values)):
|
| 53 |
+
if "Residual" in feat or "H/P" in feat:
|
| 54 |
+
# 높을수록 AI
|
| 55 |
+
if val >= 0.7:
|
| 56 |
+
colors.append('#ff4757') # AI (red)
|
| 57 |
+
elif val >= 0.4:
|
| 58 |
+
colors.append('#ffa502') # Uncertain (orange)
|
| 59 |
+
else:
|
| 60 |
+
colors.append('#2ed573') # Human (green)
|
| 61 |
+
elif "Temporal" in feat or "Spectral" in feat:
|
| 62 |
+
# 낮을수록 AI
|
| 63 |
+
if val <= 0.3:
|
| 64 |
+
colors.append('#ff4757') # AI (red)
|
| 65 |
+
elif val <= 0.6:
|
| 66 |
+
colors.append('#ffa502') # Uncertain (orange)
|
| 67 |
+
else:
|
| 68 |
+
colors.append('#2ed573') # Human (green)
|
| 69 |
+
else:
|
| 70 |
+
# Harmonic/Percussive는 중립
|
| 71 |
+
colors.append('#5f9ea0') # Neutral (cyan)
|
| 72 |
+
|
| 73 |
+
fig = go.Figure(go.Bar(
|
| 74 |
+
x=values,
|
| 75 |
+
y=features,
|
| 76 |
+
orientation='h',
|
| 77 |
+
marker=dict(
|
| 78 |
+
color=colors,
|
| 79 |
+
line=dict(color='#fff', width=1)
|
| 80 |
+
),
|
| 81 |
+
text=[f"{v:.2f}" for v in values],
|
| 82 |
+
textposition='inside',
|
| 83 |
+
textfont=dict(size=11, color='white', family='monospace'),
|
| 84 |
+
hovertemplate="<b>%{y}</b><br>Score: %{x:.3f}<extra></extra>",
|
| 85 |
+
))
|
| 86 |
+
|
| 87 |
+
fig.update_layout(
|
| 88 |
+
xaxis=dict(
|
| 89 |
+
title="Feature Strength",
|
| 90 |
+
range=[0, 1],
|
| 91 |
+
tickfont=dict(size=10, color='#aaa'),
|
| 92 |
+
gridcolor='#333',
|
| 93 |
+
),
|
| 94 |
+
yaxis=dict(
|
| 95 |
+
tickfont=dict(size=11, color='white'),
|
| 96 |
+
),
|
| 97 |
+
plot_bgcolor='#1a1a2e',
|
| 98 |
+
paper_bgcolor='#1a1a2e',
|
| 99 |
+
font=dict(color='white'),
|
| 100 |
+
margin=dict(l=140, r=20, t=40, b=40),
|
| 101 |
+
height=300,
|
| 102 |
+
showlegend=False,
|
| 103 |
+
title=dict(
|
| 104 |
+
text="Forensic Feature Strength",
|
| 105 |
+
font=dict(size=13),
|
| 106 |
+
x=0.5, xanchor='center'
|
| 107 |
+
)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
return fig
|
visualization/radar.py
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created: 2026-03-07
|
| 2 |
+
# Purpose: Radar chart for 7-channel forensic features
|
| 3 |
+
# Dependencies: plotly, numpy
|
| 4 |
+
|
| 5 |
+
"""Radar chart visualization for 7 forensic audio features."""
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
import plotly.graph_objects as go
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def plot_forensic_radar(feature_stats: dict) -> go.Figure:
|
| 12 |
+
"""7개 포렌식 피처를 레이더 차트로 시각화.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
feature_stats: Dict with feature names as keys and normalized values (0-1)
|
| 16 |
+
|
| 17 |
+
Returns:
|
| 18 |
+
plotly Figure (radar/polar chart)
|
| 19 |
+
"""
|
| 20 |
+
# 7개 포렌식 피처 정의
|
| 21 |
+
features = [
|
| 22 |
+
("Residual Energy", "잔차 에너지 (AI 코덱 흔적)"),
|
| 23 |
+
("Harmonic Strength", "하모닉 강도 (음악 구조)"),
|
| 24 |
+
("Percussive Strength", "타악기 강도 (리듬 요소)"),
|
| 25 |
+
("Temporal Delta", "시간 변화율 (다이나믹스)"),
|
| 26 |
+
("Temporal Accel", "시간 가속도 (변화 일관성)"),
|
| 27 |
+
("H/P Ratio", "하모닉/타악기 비율"),
|
| 28 |
+
("Spectral Flux", "스펙트럼 변화량 (질감)"),
|
| 29 |
+
]
|
| 30 |
+
|
| 31 |
+
# 기본값 (feature_stats가 없으면 중간값)
|
| 32 |
+
if not feature_stats:
|
| 33 |
+
values = [0.5] * 7
|
| 34 |
+
else:
|
| 35 |
+
values = [
|
| 36 |
+
feature_stats.get("residual_energy", 0.5),
|
| 37 |
+
feature_stats.get("harmonic_strength", 0.5),
|
| 38 |
+
feature_stats.get("percussive_strength", 0.5),
|
| 39 |
+
feature_stats.get("temporal_delta", 0.5),
|
| 40 |
+
feature_stats.get("temporal_accel", 0.5),
|
| 41 |
+
feature_stats.get("hp_ratio", 0.5),
|
| 42 |
+
feature_stats.get("spectral_flux", 0.5),
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
# 레이더 차트용 데이터 (첫 값을 마지막에 반복해서 폐곡선 생성)
|
| 46 |
+
categories = [f[0] for f in features]
|
| 47 |
+
values_closed = values + [values[0]]
|
| 48 |
+
categories_closed = categories + [categories[0]]
|
| 49 |
+
|
| 50 |
+
fig = go.Figure()
|
| 51 |
+
|
| 52 |
+
# 현재 오디오 패턴
|
| 53 |
+
fig.add_trace(go.Scatterpolar(
|
| 54 |
+
r=values_closed,
|
| 55 |
+
theta=categories_closed,
|
| 56 |
+
fill='toself',
|
| 57 |
+
fillcolor='rgba(255, 71, 87, 0.3)',
|
| 58 |
+
line=dict(color='#ff4757', width=2),
|
| 59 |
+
name='Audio Pattern',
|
| 60 |
+
hovertemplate="<b>%{theta}</b><br>Score: %{r:.2f}<extra></extra>"
|
| 61 |
+
))
|
| 62 |
+
|
| 63 |
+
fig.update_layout(
|
| 64 |
+
polar=dict(
|
| 65 |
+
radialaxis=dict(
|
| 66 |
+
visible=True,
|
| 67 |
+
range=[0, 1],
|
| 68 |
+
tickfont=dict(size=10, color='#aaa'),
|
| 69 |
+
gridcolor='#333',
|
| 70 |
+
),
|
| 71 |
+
angularaxis=dict(
|
| 72 |
+
tickfont=dict(size=11, color='white'),
|
| 73 |
+
gridcolor='#333',
|
| 74 |
+
),
|
| 75 |
+
bgcolor='#16213e',
|
| 76 |
+
),
|
| 77 |
+
plot_bgcolor='#1a1a2e',
|
| 78 |
+
paper_bgcolor='#1a1a2e',
|
| 79 |
+
font=dict(color='white'),
|
| 80 |
+
margin=dict(l=80, r=80, t=40, b=40),
|
| 81 |
+
height=400,
|
| 82 |
+
showlegend=True,
|
| 83 |
+
legend=dict(
|
| 84 |
+
x=0.5, xanchor='center',
|
| 85 |
+
y=-0.15, yanchor='top',
|
| 86 |
+
orientation='h',
|
| 87 |
+
font=dict(size=10)
|
| 88 |
+
),
|
| 89 |
+
title=dict(
|
| 90 |
+
text="Forensic Feature Profile",
|
| 91 |
+
font=dict(size=14),
|
| 92 |
+
x=0.5, xanchor='center'
|
| 93 |
+
)
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
return fig
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def forensic_features_explanation() -> str:
|
| 100 |
+
"""7개 포렌식 피처에 대한 상세 설명 HTML 반환."""
|
| 101 |
+
return """
|
| 102 |
+
<div style="background:#16213e;padding:20px;border-radius:12px;margin-top:10px;">
|
| 103 |
+
<h3 style="color:#00d2ff;margin-top:0;font-size:16px;">🔬 7-Channel Forensic Features</h3>
|
| 104 |
+
<div style="font-size:13px;color:#ccc;line-height:1.6;">
|
| 105 |
+
<details style="margin-bottom:10px;">
|
| 106 |
+
<summary style="cursor:pointer;color:#ffa502;font-weight:bold;">
|
| 107 |
+
📊 1. Residual Energy (잔차 에너지)
|
| 108 |
+
</summary>
|
| 109 |
+
<p style="margin:8px 0 0 20px;color:#aaa;">
|
| 110 |
+
AI 뉴럴 코덱이 남기는 미세한 코덱 흔적.
|
| 111 |
+
AI 음악은 인간이 만든 음악과 다른 <b>양자화 패턴</b>을 보입니다.
|
| 112 |
+
</p>
|
| 113 |
+
</details>
|
| 114 |
+
|
| 115 |
+
<details style="margin-bottom:10px;">
|
| 116 |
+
<summary style="cursor:pointer;color:#ffa502;font-weight:bold;">
|
| 117 |
+
🎵 2-3. Harmonic/Percussive Strength (하모닉/타악기 강도)
|
| 118 |
+
</summary>
|
| 119 |
+
<p style="margin:8px 0 0 20px;color:#aaa;">
|
| 120 |
+
음악을 멜로디 성분과 리듬 성분으로 분리.
|
| 121 |
+
AI는 두 요소의 <b>에너지 비율</b>이 부자연스럽게 일정합니다.
|
| 122 |
+
</p>
|
| 123 |
+
</details>
|
| 124 |
+
|
| 125 |
+
<details style="margin-bottom:10px;">
|
| 126 |
+
<summary style="cursor:pointer;color:#ffa502;font-weight:bold;">
|
| 127 |
+
⚡ 4-5. Temporal Delta & Accel (시간 변화율/가속도)
|
| 128 |
+
</summary>
|
| 129 |
+
<p style="margin:8px 0 0 20px;color:#aaa;">
|
| 130 |
+
스펙트럼의 시간축 변화 패턴.
|
| 131 |
+
AI 음악은 변화가 <b>너무 부드럽고 규칙적</b>입니다 (생성 과정의 smoothing 효과).
|
| 132 |
+
</p>
|
| 133 |
+
</details>
|
| 134 |
+
|
| 135 |
+
<details style="margin-bottom:10px;">
|
| 136 |
+
<summary style="cursor:pointer;color:#ffa502;font-weight:bold;">
|
| 137 |
+
🎚️ 6. H/P Ratio (하모닉/타악기 비율)
|
| 138 |
+
</summary>
|
| 139 |
+
<p style="margin:8px 0 0 20px;color:#aaa;">
|
| 140 |
+
멜로디와 리듬의 균형.
|
| 141 |
+
AI는 장르와 무관하게 <b>특정 비율로 수렴</b>하는 경향을 보입니다.
|
| 142 |
+
</p>
|
| 143 |
+
</details>
|
| 144 |
+
|
| 145 |
+
<details>
|
| 146 |
+
<summary style="cursor:pointer;color:#ffa502;font-weight:bold;">
|
| 147 |
+
🌊 7. Spectral Flux (스펙트럼 변화량)
|
| 148 |
+
</summary>
|
| 149 |
+
<p style="margin:8px 0 0 20px;color:#aaa;">
|
| 150 |
+
주파수 성분의 프레임간 변화 절댓값.
|
| 151 |
+
AI는 변화가 <b>일관적이고 예측 가능</b>합니다 (확률적 생성의 특성).
|
| 152 |
+
</p>
|
| 153 |
+
</details>
|
| 154 |
+
</div>
|
| 155 |
+
|
| 156 |
+
<div style="margin-top:15px;padding:12px;background:#1a1a2e;border-radius:8px;border-left:3px solid #00d2ff;">
|
| 157 |
+
<p style="margin:0;font-size:12px;color:#aaa;">
|
| 158 |
+
<b style="color:#00d2ff;">💡 핵심 원리:</b>
|
| 159 |
+
AI 생성 모델은 <b>물리적 악기의 불규칙성</b>을 완벽히 재현하지 못합니다.
|
| 160 |
+
이 7개 피처는 그러한 미세한 차이를 정량화하여 AI 시그니처를 탐지합니다.
|
| 161 |
+
</p>
|
| 162 |
+
</div>
|
| 163 |
+
</div>
|
| 164 |
+
"""
|
visualization/spectrogram.py
CHANGED
|
@@ -10,9 +10,8 @@ matplotlib.use('Agg')
|
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
|
| 12 |
from config import SR, N_FFT, HOP_LENGTH
|
| 13 |
-
from core import get_params
|
| 14 |
|
| 15 |
-
N_MELS =
|
| 16 |
|
| 17 |
|
| 18 |
def _compute_mel_spectrogram(audio_1d: np.ndarray) -> np.ndarray:
|
|
|
|
| 10 |
import matplotlib.pyplot as plt
|
| 11 |
|
| 12 |
from config import SR, N_FFT, HOP_LENGTH
|
|
|
|
| 13 |
|
| 14 |
+
N_MELS = 128
|
| 15 |
|
| 16 |
|
| 17 |
def _compute_mel_spectrogram(audio_1d: np.ndarray) -> np.ndarray:
|
visualization/timeline.py
CHANGED
|
@@ -1,62 +1,166 @@
|
|
| 1 |
# Created: 2026-02-18
|
| 2 |
-
# Purpose: P(AI) per-segment timeline bar chart (plotly)
|
| 3 |
-
# Dependencies: plotly
|
| 4 |
|
| 5 |
-
"""Per-segment (chunk) AI probability timeline visualization."""
|
| 6 |
|
|
|
|
| 7 |
import plotly.graph_objects as go
|
|
|
|
| 8 |
|
| 9 |
-
from config import CHUNK_SEC
|
| 10 |
|
| 11 |
|
| 12 |
-
def plot_timeline(
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
Args:
|
| 16 |
chunk_probs: P(AI) list for each 4-second chunk
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
Returns:
|
| 19 |
-
plotly Figure
|
| 20 |
"""
|
| 21 |
n = len(chunk_probs)
|
| 22 |
times = [f"{i * CHUNK_SEC:.0f}-{(i + 1) * CHUNK_SEC:.0f}s" for i in range(n)]
|
| 23 |
colors = ['#ff4757' if p >= 0.5 else '#2ed573' for p in chunk_probs]
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
tickvals=list(range(n)),
|
| 49 |
ticktext=times,
|
| 50 |
tickangle=-45,
|
| 51 |
tickfont=dict(size=9),
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
return fig
|
|
|
|
| 1 |
# Created: 2026-02-18
|
| 2 |
+
# Purpose: P(AI) per-segment timeline bar chart with waveform (plotly)
|
| 3 |
+
# Dependencies: plotly, numpy
|
| 4 |
|
| 5 |
+
"""Per-segment (chunk) AI probability timeline visualization with waveform."""
|
| 6 |
|
| 7 |
+
import numpy as np
|
| 8 |
import plotly.graph_objects as go
|
| 9 |
+
from plotly.subplots import make_subplots
|
| 10 |
|
| 11 |
+
from config import CHUNK_SEC, SR, CHUNK_SAMPLES
|
| 12 |
|
| 13 |
|
| 14 |
+
def plot_timeline(
|
| 15 |
+
chunk_probs: list[float],
|
| 16 |
+
waveform: np.ndarray = None,
|
| 17 |
+
chunk_metadata: list[dict] = None,
|
| 18 |
+
weighted_median: float = None
|
| 19 |
+
) -> go.Figure:
|
| 20 |
+
"""Per-chunk P(AI) timeline bar chart with optional waveform.
|
| 21 |
|
| 22 |
Args:
|
| 23 |
chunk_probs: P(AI) list for each 4-second chunk
|
| 24 |
+
waveform: Optional mono waveform array for envelope visualization
|
| 25 |
+
chunk_metadata: Optional metadata with start_sample info
|
| 26 |
+
weighted_median: Energy-weighted median P(AI) for reference line
|
| 27 |
|
| 28 |
Returns:
|
| 29 |
+
plotly Figure with waveform (top) + P(AI) bars (bottom)
|
| 30 |
"""
|
| 31 |
n = len(chunk_probs)
|
| 32 |
times = [f"{i * CHUNK_SEC:.0f}-{(i + 1) * CHUNK_SEC:.0f}s" for i in range(n)]
|
| 33 |
colors = ['#ff4757' if p >= 0.5 else '#2ed573' for p in chunk_probs]
|
| 34 |
|
| 35 |
+
# 파형이 있으면 subplot, 없으면 단순 bar chart
|
| 36 |
+
if waveform is not None and len(waveform) > 0:
|
| 37 |
+
fig = make_subplots(
|
| 38 |
+
rows=2, cols=1,
|
| 39 |
+
row_heights=[0.3, 0.7],
|
| 40 |
+
vertical_spacing=0.08,
|
| 41 |
+
subplot_titles=("Waveform Envelope", "Segment-level AI Probability"),
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Waveform envelope (unipolar - 절댓값의 상단만)
|
| 45 |
+
time_axis = np.arange(len(waveform)) / SR
|
| 46 |
+
envelope = np.abs(waveform)
|
| 47 |
+
|
| 48 |
+
# Downsample for plotting (매 100 샘플마다)
|
| 49 |
+
downsample_factor = 100
|
| 50 |
+
time_ds = time_axis[::downsample_factor]
|
| 51 |
+
envelope_ds = envelope[::downsample_factor]
|
| 52 |
+
|
| 53 |
+
fig.add_trace(
|
| 54 |
+
go.Scatter(
|
| 55 |
+
x=time_ds,
|
| 56 |
+
y=envelope_ds,
|
| 57 |
+
mode='lines',
|
| 58 |
+
line=dict(color='#5f9ea0', width=0.5),
|
| 59 |
+
fill='tozeroy',
|
| 60 |
+
fillcolor='rgba(95, 158, 160, 0.3)',
|
| 61 |
+
name='Envelope',
|
| 62 |
+
hovertemplate="Time: %{x:.2f}s<br>Amplitude: %{y:.3f}<extra></extra>",
|
| 63 |
+
),
|
| 64 |
+
row=1, col=1
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
# 세그먼트 경계선 표시 (chunk metadata 사용)
|
| 68 |
+
if chunk_metadata:
|
| 69 |
+
for meta in chunk_metadata:
|
| 70 |
+
start_sec = meta['start_sample'] / SR
|
| 71 |
+
fig.add_vline(
|
| 72 |
+
x=start_sec,
|
| 73 |
+
line=dict(color='#ffa502', width=1, dash='dot'),
|
| 74 |
+
opacity=0.5,
|
| 75 |
+
row=1, col=1
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
# P(AI) bar chart
|
| 79 |
+
fig.add_trace(
|
| 80 |
+
go.Bar(
|
| 81 |
+
x=list(range(n)),
|
| 82 |
+
y=chunk_probs,
|
| 83 |
+
marker_color=colors,
|
| 84 |
+
text=[f"{p:.2f}" for p in chunk_probs],
|
| 85 |
+
textposition='outside',
|
| 86 |
+
textfont=dict(size=10, color='white'),
|
| 87 |
+
hovertemplate="<b>%{customdata}</b><br>P(AI): %{y:.3f}<extra></extra>",
|
| 88 |
+
customdata=times,
|
| 89 |
+
name='P(AI)',
|
| 90 |
+
),
|
| 91 |
+
row=2, col=1
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# Energy-weighted median reference line
|
| 95 |
+
if weighted_median is not None:
|
| 96 |
+
fig.add_hline(
|
| 97 |
+
y=weighted_median, line_dash="dash", line_color="#00d2ff",
|
| 98 |
+
annotation_text=f"Weighted Median ({weighted_median:.2f})",
|
| 99 |
+
annotation_position="top right",
|
| 100 |
+
annotation_font_color="#00d2ff",
|
| 101 |
+
annotation_font_size=10,
|
| 102 |
+
row=2, col=1
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Layout
|
| 106 |
+
fig.update_xaxes(title_text="Time (s)", row=1, col=1)
|
| 107 |
+
fig.update_yaxes(title_text="Amplitude", row=1, col=1)
|
| 108 |
+
fig.update_xaxes(
|
| 109 |
+
title_text="Segment",
|
| 110 |
tickvals=list(range(n)),
|
| 111 |
ticktext=times,
|
| 112 |
tickangle=-45,
|
| 113 |
tickfont=dict(size=9),
|
| 114 |
+
row=2, col=1
|
| 115 |
+
)
|
| 116 |
+
fig.update_yaxes(title_text="P(AI)", range=[0, 1.05], row=2, col=1)
|
| 117 |
+
|
| 118 |
+
fig.update_layout(
|
| 119 |
+
plot_bgcolor='#1a1a2e',
|
| 120 |
+
paper_bgcolor='#1a1a2e',
|
| 121 |
+
font=dict(color='white'),
|
| 122 |
+
margin=dict(l=50, r=20, t=60, b=60),
|
| 123 |
+
height=500,
|
| 124 |
+
showlegend=False,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
else:
|
| 128 |
+
# Fallback: 기존 단순 bar chart
|
| 129 |
+
fig = go.Figure()
|
| 130 |
+
|
| 131 |
+
fig.add_trace(go.Bar(
|
| 132 |
+
x=list(range(n)),
|
| 133 |
+
y=chunk_probs,
|
| 134 |
+
marker_color=colors,
|
| 135 |
+
text=[f"{p:.2f}" for p in chunk_probs],
|
| 136 |
+
textposition='outside',
|
| 137 |
+
textfont=dict(size=10, color='white'),
|
| 138 |
+
hovertemplate="<b>%{customdata}</b><br>P(AI): %{y:.3f}<extra></extra>",
|
| 139 |
+
customdata=times,
|
| 140 |
+
))
|
| 141 |
+
|
| 142 |
+
if weighted_median is not None:
|
| 143 |
+
fig.add_hline(y=weighted_median, line_dash="dash", line_color="#00d2ff",
|
| 144 |
+
annotation_text=f"Weighted Median ({weighted_median:.2f})",
|
| 145 |
+
annotation_position="top right",
|
| 146 |
+
annotation_font_color="#00d2ff")
|
| 147 |
+
|
| 148 |
+
fig.update_layout(
|
| 149 |
+
title=dict(text="Segment-level AI Probability", font=dict(size=14)),
|
| 150 |
+
xaxis=dict(
|
| 151 |
+
title="Segment",
|
| 152 |
+
tickvals=list(range(n)),
|
| 153 |
+
ticktext=times,
|
| 154 |
+
tickangle=-45,
|
| 155 |
+
tickfont=dict(size=9),
|
| 156 |
+
),
|
| 157 |
+
yaxis=dict(title="P(AI)", range=[0, 1.05]),
|
| 158 |
+
plot_bgcolor='#1a1a2e',
|
| 159 |
+
paper_bgcolor='#1a1a2e',
|
| 160 |
+
font=dict(color='white'),
|
| 161 |
+
margin=dict(l=50, r=20, t=40, b=60),
|
| 162 |
+
height=300,
|
| 163 |
+
showlegend=False,
|
| 164 |
+
)
|
| 165 |
|
| 166 |
return fig
|
youtube_proxy_server.py
DELETED
|
@@ -1,180 +0,0 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""
|
| 3 |
-
YouTube Audio Proxy Server — yt-dlp wrapper with API
|
| 4 |
-
|
| 5 |
-
환경변수:
|
| 6 |
-
- YOUTUBE_PROXY_API_KEY: 인증 토큰 (Bearer token)
|
| 7 |
-
- LOG_LEVEL: DEBUG/INFO/WARNING (기본값: INFO)
|
| 8 |
-
"""
|
| 9 |
-
|
| 10 |
-
import os
|
| 11 |
-
import sys
|
| 12 |
-
import json
|
| 13 |
-
import logging
|
| 14 |
-
import tempfile
|
| 15 |
-
import subprocess
|
| 16 |
-
from typing import Optional
|
| 17 |
-
|
| 18 |
-
from fastapi import FastAPI, HTTPException, Header
|
| 19 |
-
from fastapi.responses import FileResponse, JSONResponse
|
| 20 |
-
from pydantic import BaseModel
|
| 21 |
-
|
| 22 |
-
# ============================================================
|
| 23 |
-
# Config
|
| 24 |
-
# ============================================================
|
| 25 |
-
|
| 26 |
-
API_KEY = os.environ.get("YOUTUBE_PROXY_API_KEY", "default-key")
|
| 27 |
-
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
| 28 |
-
|
| 29 |
-
logging.basicConfig(
|
| 30 |
-
level=getattr(logging, LOG_LEVEL),
|
| 31 |
-
format="%(asctime)s — [%(levelname)s] %(message)s"
|
| 32 |
-
)
|
| 33 |
-
logger = logging.getLogger(__name__)
|
| 34 |
-
|
| 35 |
-
# ============================================================
|
| 36 |
-
# FastAPI app
|
| 37 |
-
# ============================================================
|
| 38 |
-
|
| 39 |
-
app = FastAPI(title="YouTube Proxy Server", version="1.0")
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
# Global exception handler to ensure all errors return JSON
|
| 43 |
-
@app.exception_handler(Exception)
|
| 44 |
-
async def global_exception_handler(request, exc):
|
| 45 |
-
"""Catch all exceptions and return JSON error response."""
|
| 46 |
-
logger.error(f"Unhandled exception: {type(exc).__name__}: {str(exc)}")
|
| 47 |
-
return JSONResponse(
|
| 48 |
-
status_code=500,
|
| 49 |
-
content={"detail": f"Internal error: {str(exc)[:200]}"}
|
| 50 |
-
)
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
class YouTubeRequest(BaseModel):
|
| 54 |
-
"""YouTube URL download request."""
|
| 55 |
-
url: str
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
@app.get("/health")
|
| 59 |
-
def health_check():
|
| 60 |
-
"""Health check endpoint."""
|
| 61 |
-
return {"status": "healthy", "service": "youtube-proxy"}
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
@app.post("/download-youtube")
|
| 65 |
-
def download_youtube(
|
| 66 |
-
req: YouTubeRequest,
|
| 67 |
-
authorization: Optional[str] = Header(None),
|
| 68 |
-
):
|
| 69 |
-
"""
|
| 70 |
-
Download audio from YouTube URL.
|
| 71 |
-
|
| 72 |
-
Headers:
|
| 73 |
-
Authorization: "Bearer {API_KEY}"
|
| 74 |
-
|
| 75 |
-
Returns:
|
| 76 |
-
WAV file (binary)
|
| 77 |
-
"""
|
| 78 |
-
|
| 79 |
-
# Verify API key
|
| 80 |
-
if not authorization or not authorization.startswith("Bearer "):
|
| 81 |
-
logger.warning(f"Missing/invalid auth header: {authorization}")
|
| 82 |
-
raise HTTPException(status_code=401, detail="Unauthorized")
|
| 83 |
-
|
| 84 |
-
token = authorization[7:] # Strip "Bearer "
|
| 85 |
-
if token != API_KEY:
|
| 86 |
-
logger.warning(f"Invalid API key: {token}")
|
| 87 |
-
raise HTTPException(status_code=403, detail="Forbidden")
|
| 88 |
-
|
| 89 |
-
url = req.url.strip()
|
| 90 |
-
if not url:
|
| 91 |
-
raise HTTPException(status_code=400, detail="Empty URL")
|
| 92 |
-
|
| 93 |
-
logger.info(f"Downloading: {url}")
|
| 94 |
-
|
| 95 |
-
try:
|
| 96 |
-
# Create temp directory
|
| 97 |
-
tmpdir = tempfile.mkdtemp(prefix="yt_audio_")
|
| 98 |
-
out_path = os.path.join(tmpdir, "audio.wav")
|
| 99 |
-
|
| 100 |
-
# Get absolute path to yt-dlp
|
| 101 |
-
# If in venv, use venv's yt-dlp; else use system yt-dlp
|
| 102 |
-
yt_dlp_path = os.path.join(
|
| 103 |
-
os.path.dirname(sys.executable), "yt-dlp"
|
| 104 |
-
)
|
| 105 |
-
if not os.path.exists(yt_dlp_path):
|
| 106 |
-
yt_dlp_path = "yt-dlp" # Fallback to system
|
| 107 |
-
|
| 108 |
-
# Execute yt-dlp
|
| 109 |
-
cmd = [
|
| 110 |
-
yt_dlp_path,
|
| 111 |
-
"--no-playlist",
|
| 112 |
-
"-x",
|
| 113 |
-
"--audio-format", "wav",
|
| 114 |
-
"--audio-quality", "0",
|
| 115 |
-
"--max-filesize", "50M",
|
| 116 |
-
"-o", out_path,
|
| 117 |
-
url,
|
| 118 |
-
]
|
| 119 |
-
|
| 120 |
-
logger.debug(f"Command: {' '.join(cmd)}")
|
| 121 |
-
result = subprocess.run(
|
| 122 |
-
cmd,
|
| 123 |
-
capture_output=True,
|
| 124 |
-
text=True,
|
| 125 |
-
timeout=120,
|
| 126 |
-
)
|
| 127 |
-
|
| 128 |
-
if result.returncode != 0:
|
| 129 |
-
logger.error(f"yt-dlp failed: {result.stderr[:500]}")
|
| 130 |
-
raise HTTPException(
|
| 131 |
-
status_code=400,
|
| 132 |
-
detail=f"Download failed: {result.stderr[:200]}"
|
| 133 |
-
)
|
| 134 |
-
|
| 135 |
-
# Find the downloaded file
|
| 136 |
-
downloaded_file = None
|
| 137 |
-
for f in os.listdir(tmpdir):
|
| 138 |
-
downloaded_file = os.path.join(tmpdir, f)
|
| 139 |
-
break
|
| 140 |
-
|
| 141 |
-
if not downloaded_file or not os.path.exists(downloaded_file):
|
| 142 |
-
logger.error(f"Download completed but no file found in {tmpdir}")
|
| 143 |
-
raise HTTPException(
|
| 144 |
-
status_code=500,
|
| 145 |
-
detail="Download completed but no file found"
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
logger.info(f"Downloaded successfully: {downloaded_file}")
|
| 149 |
-
|
| 150 |
-
# Return file
|
| 151 |
-
return FileResponse(
|
| 152 |
-
path=downloaded_file,
|
| 153 |
-
media_type="audio/wav",
|
| 154 |
-
filename="audio.wav",
|
| 155 |
-
)
|
| 156 |
-
|
| 157 |
-
except subprocess.TimeoutExpired:
|
| 158 |
-
logger.error(f"Timeout downloading {url}")
|
| 159 |
-
raise HTTPException(status_code=504, detail="Download timeout")
|
| 160 |
-
|
| 161 |
-
except Exception as e:
|
| 162 |
-
logger.error(f"Error: {type(e).__name__}: {str(e)}")
|
| 163 |
-
raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
if __name__ == "__main__":
|
| 167 |
-
import uvicorn
|
| 168 |
-
|
| 169 |
-
host = os.environ.get("HOST", "0.0.0.0")
|
| 170 |
-
port = int(os.environ.get("PORT", "8765"))
|
| 171 |
-
|
| 172 |
-
logger.info(f"Starting YouTube Proxy Server on {host}:{port}")
|
| 173 |
-
logger.info(f"API Key configured: {bool(API_KEY)}")
|
| 174 |
-
|
| 175 |
-
uvicorn.run(
|
| 176 |
-
app,
|
| 177 |
-
host=host,
|
| 178 |
-
port=port,
|
| 179 |
-
log_level=LOG_LEVEL.lower(),
|
| 180 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|