Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- .gitignore +41 -0
- README.md +71 -8
- app.py +725 -0
- audiobook_generator.py +499 -0
- backends/espeech_backend.py +135 -0
- epub_processing_pipeline.py +425 -0
- epub_processor.py +401 -0
- requirements.txt +31 -0
- samples/001/sample.text +3 -0
- synth_batch.py +86 -0
- utils/audio.py +35 -0
- utils/text.py +40 -0
.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
.Python
|
| 7 |
+
env/
|
| 8 |
+
venv/
|
| 9 |
+
.venv/
|
| 10 |
+
pip-log.txt
|
| 11 |
+
pip-delete-this-directory.txt
|
| 12 |
+
.tox/
|
| 13 |
+
.coverage
|
| 14 |
+
.coverage.*
|
| 15 |
+
.cache
|
| 16 |
+
nosetests.xml
|
| 17 |
+
coverage.xml
|
| 18 |
+
*.cover
|
| 19 |
+
*.log
|
| 20 |
+
.git
|
| 21 |
+
.mypy_cache
|
| 22 |
+
.pytest_cache
|
| 23 |
+
.hypothesis
|
| 24 |
+
|
| 25 |
+
# Audio files
|
| 26 |
+
temp_audio/
|
| 27 |
+
output/
|
| 28 |
+
*.wav
|
| 29 |
+
*.mp3
|
| 30 |
+
*.m4a
|
| 31 |
+
|
| 32 |
+
# Model cache
|
| 33 |
+
.cache/
|
| 34 |
+
models/
|
| 35 |
+
checkpoints/
|
| 36 |
+
|
| 37 |
+
# Temporary files
|
| 38 |
+
*.tmp
|
| 39 |
+
*.temp
|
| 40 |
+
.DS_Store
|
| 41 |
+
Thumbs.db
|
README.md
CHANGED
|
@@ -1,14 +1,77 @@
|
|
| 1 |
---
|
| 2 |
-
title: Audiobook
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
license:
|
| 11 |
-
short_description:
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Russian Audiobook Studio
|
| 3 |
+
emoji: 🎧
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
short_description: Create Russian audiobooks from EPUB files with advanced TTS
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# 🎧 Russian Audiobook Studio
|
| 15 |
+
|
| 16 |
+
Transform your Russian EPUB books into professional-quality audiobooks using advanced Text-to-Speech technology with automatic accent placement and voice customization.
|
| 17 |
+
|
| 18 |
+
## ✨ Features
|
| 19 |
+
|
| 20 |
+
- **📚 EPUB Processing**: Upload EPUB files and automatically extract chapters
|
| 21 |
+
- **🎯 Chapter Selection**: Choose specific chapters to process with checkbox interface
|
| 22 |
+
- **🎵 Advanced TTS**: High-quality Russian TTS with automatic accent placement using ruaccent
|
| 23 |
+
- **🎛️ Voice Controls**: Customize voice clarity, variation, and volume levels
|
| 24 |
+
- **🎧 Audio Preview**: Built-in audio players for chapter previews
|
| 25 |
+
- **📥 Download Options**: Download individual chapters or complete audiobook
|
| 26 |
+
- **📱 Responsive Design**: Works on desktop and mobile devices
|
| 27 |
+
- **♿ Accessibility**: Full keyboard navigation and screen reader support
|
| 28 |
+
|
| 29 |
+
## 🚀 How to Use
|
| 30 |
+
|
| 31 |
+
1. **Upload EPUB**: Click "Choose File" and select your Russian EPUB book
|
| 32 |
+
2. **Validate**: Click "Validate EPUB" to extract chapters and book information
|
| 33 |
+
3. **Select Chapters**: Choose which chapters to process using the checkbox interface
|
| 34 |
+
4. **Adjust Settings**: Customize voice parameters in Advanced Settings
|
| 35 |
+
5. **Process**: Click "Process Selected Chapters" to generate audiobook
|
| 36 |
+
6. **Preview & Download**: Listen to previews and download your audiobook
|
| 37 |
+
|
| 38 |
+
## ⚙️ Advanced Settings
|
| 39 |
+
|
| 40 |
+
- **Speech Speed**: Adjust playback speed (0.6x to 1.4x)
|
| 41 |
+
- **Quality Steps**: Higher values = better quality, longer processing time
|
| 42 |
+
- **Voice Clarity**: Control how closely voice follows reference (1.0-4.0)
|
| 43 |
+
- **Voice Variation**: Control natural voice variation (-2.0 to 1.0)
|
| 44 |
+
- **Volume Level**: Target volume level for generated audio
|
| 45 |
+
|
| 46 |
+
## 🎯 Technical Features
|
| 47 |
+
|
| 48 |
+
- **Automatic Accent Placement**: Uses ruaccent library for perfect Russian pronunciation
|
| 49 |
+
- **Professional Audio Quality**: 24kHz, 16-bit, mono WAV output
|
| 50 |
+
- **Real-time Progress**: Live updates during processing with cancellation support
|
| 51 |
+
- **Error Handling**: Robust error handling with detailed status messages
|
| 52 |
+
- **File Management**: Automatic file serving and download functionality
|
| 53 |
+
|
| 54 |
+
## 📋 Requirements
|
| 55 |
+
|
| 56 |
+
- Russian EPUB files with proper text encoding
|
| 57 |
+
- Reference audio sample (6-12 seconds) for voice cloning
|
| 58 |
+
- Reference text (transcript of the reference audio)
|
| 59 |
+
|
| 60 |
+
## 🔧 Technical Stack
|
| 61 |
+
|
| 62 |
+
- **TTS Engine**: ESpeech-TTS-1_RL-V2 (F5-TTS)
|
| 63 |
+
- **Accent Processing**: ruaccent for automatic Russian stress placement
|
| 64 |
+
- **Audio Processing**: librosa, soundfile, pydub
|
| 65 |
+
- **Web Interface**: Gradio with responsive design
|
| 66 |
+
- **EPUB Processing**: ebooklib for book parsing
|
| 67 |
+
|
| 68 |
+
## 📝 Notes
|
| 69 |
+
|
| 70 |
+
- Processing time depends on chapter length and quality settings
|
| 71 |
+
- Higher quality settings require more processing time
|
| 72 |
+
- All audio files are temporarily stored for preview and download
|
| 73 |
+
- The system automatically handles Russian text normalization and accent placement
|
| 74 |
+
|
| 75 |
+
## 🎉 Ready to Create Audiobooks?
|
| 76 |
+
|
| 77 |
+
Upload your Russian EPUB file and start creating professional-quality audiobooks in minutes!
|
app.py
ADDED
|
@@ -0,0 +1,725 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import os
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import numpy as np
|
| 5 |
+
import soundfile as sf
|
| 6 |
+
from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent, maybe_ruaccent_advanced
|
| 7 |
+
from utils.audio import crossfade_concat, normalize_lufs, save_wav
|
| 8 |
+
from backends.espeech_backend import EspeechBackend
|
| 9 |
+
from epub_processor import EpubProcessor
|
| 10 |
+
from epub_processing_pipeline import EpubProcessingPipeline, ProcessingSettings
|
| 11 |
+
|
| 12 |
+
DEFAULT_MODEL = os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2")
|
| 13 |
+
|
| 14 |
+
backend = EspeechBackend(model_id=DEFAULT_MODEL)
|
| 15 |
+
epub_processor = EpubProcessor()
|
| 16 |
+
processing_pipeline = EpubProcessingPipeline(epub_processor, backend)
|
| 17 |
+
|
| 18 |
+
# Global processing state
|
| 19 |
+
processing_cancelled = False
|
| 20 |
+
|
| 21 |
+
# Load default reference text from local sample
|
| 22 |
+
def load_default_ref_text():
|
| 23 |
+
try:
|
| 24 |
+
with open("samples/001/sample.text", "r", encoding="utf-8") as f:
|
| 25 |
+
return f.read().strip()
|
| 26 |
+
except FileNotFoundError:
|
| 27 |
+
return ""
|
| 28 |
+
|
| 29 |
+
# EPUB upload validation function
|
| 30 |
+
def validate_epub_upload(file_path):
|
| 31 |
+
"""Validate uploaded EPUB file and return status."""
|
| 32 |
+
if not file_path:
|
| 33 |
+
return (
|
| 34 |
+
"📁 **Upload Status:** No file uploaded",
|
| 35 |
+
"⏳ **Validation:** Waiting for file upload",
|
| 36 |
+
"⏳ **Processing:** Not started",
|
| 37 |
+
None, None, None, [], []
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
try:
|
| 41 |
+
# Update validation status
|
| 42 |
+
validation_status = "🔄 **Validation:** Processing EPUB file..."
|
| 43 |
+
|
| 44 |
+
result = epub_processor.process_epub_upload(file_path)
|
| 45 |
+
|
| 46 |
+
if result.is_valid:
|
| 47 |
+
# Create enhanced chapter list for display
|
| 48 |
+
chapter_list = []
|
| 49 |
+
total_words = 0
|
| 50 |
+
total_duration = 0.0
|
| 51 |
+
|
| 52 |
+
for i, chapter in enumerate(result.chapters):
|
| 53 |
+
total_words += chapter.word_count
|
| 54 |
+
total_duration += chapter.estimated_duration
|
| 55 |
+
|
| 56 |
+
# Create chapter info with metadata
|
| 57 |
+
chapter_info = f"**{i+1}. {chapter.title}**\n"
|
| 58 |
+
chapter_info += f"📊 Words: {chapter.word_count:,} | ⏱️ Est. Duration: {chapter.estimated_duration:.1f} min\n"
|
| 59 |
+
chapter_info += f"📄 Preview: {chapter.preview}\n"
|
| 60 |
+
chapter_info += f"📁 File: {chapter.file_name}"
|
| 61 |
+
|
| 62 |
+
chapter_list.append(chapter_info)
|
| 63 |
+
|
| 64 |
+
chapter_display = "\n\n".join(chapter_list)
|
| 65 |
+
|
| 66 |
+
upload_status = f"✅ **Upload Status:** EPUB file uploaded successfully"
|
| 67 |
+
validation_status = f"✅ **Validation:** EPUB file is valid and ready for processing"
|
| 68 |
+
processing_status = f"🚀 **Processing:** Ready to process {result.total_chapters} chapters"
|
| 69 |
+
|
| 70 |
+
# Enhanced status message with statistics
|
| 71 |
+
status_message = f"""**📚 Book Information:**
|
| 72 |
+
**Title:** {result.book_title}
|
| 73 |
+
**Author:** {result.book_author}
|
| 74 |
+
**Chapters:** {result.total_chapters}
|
| 75 |
+
**Total Words:** {total_words:,}
|
| 76 |
+
**Estimated Total Duration:** {total_duration:.1f} minutes
|
| 77 |
+
|
| 78 |
+
**📖 Chapter Details:**
|
| 79 |
+
{chapter_display}"""
|
| 80 |
+
|
| 81 |
+
# Create chapter selection interface
|
| 82 |
+
chapter_choices, default_selection = create_chapter_selection_interface(result.chapters)
|
| 83 |
+
|
| 84 |
+
return (
|
| 85 |
+
upload_status,
|
| 86 |
+
validation_status,
|
| 87 |
+
processing_status,
|
| 88 |
+
status_message,
|
| 89 |
+
result.chapters,
|
| 90 |
+
result.book_title,
|
| 91 |
+
result.book_author,
|
| 92 |
+
chapter_choices,
|
| 93 |
+
default_selection
|
| 94 |
+
)
|
| 95 |
+
else:
|
| 96 |
+
upload_status = f"❌ **Upload Status:** EPUB file validation failed"
|
| 97 |
+
validation_status = f"❌ **Validation:** {result.error_message}"
|
| 98 |
+
processing_status = f"⏳ **Processing:** Cannot proceed due to validation error"
|
| 99 |
+
|
| 100 |
+
return (
|
| 101 |
+
upload_status,
|
| 102 |
+
validation_status,
|
| 103 |
+
processing_status,
|
| 104 |
+
f"**Error Details:** {result.error_message}",
|
| 105 |
+
None, None, None, [], []
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
except Exception as e:
|
| 109 |
+
upload_status = f"❌ **Upload Status:** Error processing file"
|
| 110 |
+
validation_status = f"❌ **Validation:** {str(e)}"
|
| 111 |
+
processing_status = f"⏳ **Processing:** Cannot proceed due to error"
|
| 112 |
+
|
| 113 |
+
return (
|
| 114 |
+
upload_status,
|
| 115 |
+
validation_status,
|
| 116 |
+
processing_status,
|
| 117 |
+
f"**Error Details:** {str(e)}",
|
| 118 |
+
None, None, None, [], []
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def stop_epub_processing():
|
| 123 |
+
"""Stop the current EPUB processing."""
|
| 124 |
+
global processing_cancelled
|
| 125 |
+
processing_cancelled = True
|
| 126 |
+
return "🛑 **Processing Stopped:** User requested to stop processing"
|
| 127 |
+
|
| 128 |
+
def create_chapter_selection_interface(chapters):
|
| 129 |
+
"""Create chapter selection interface data structure."""
|
| 130 |
+
if not chapters:
|
| 131 |
+
return [], []
|
| 132 |
+
|
| 133 |
+
# Create choices for checkbox group
|
| 134 |
+
choices = []
|
| 135 |
+
for i, chapter in enumerate(chapters):
|
| 136 |
+
choice_text = f"{i+1}. {chapter.title}"
|
| 137 |
+
choices.append(choice_text)
|
| 138 |
+
|
| 139 |
+
# All chapters selected by default (return the choice strings, not indices)
|
| 140 |
+
default_selection = choices # Select all choices by default
|
| 141 |
+
|
| 142 |
+
return choices, default_selection
|
| 143 |
+
|
| 144 |
+
def get_selected_chapters(chapters, selected_choices):
|
| 145 |
+
"""Get the actual chapter objects for selected choices."""
|
| 146 |
+
if not chapters or not selected_choices:
|
| 147 |
+
return []
|
| 148 |
+
|
| 149 |
+
selected_chapters = []
|
| 150 |
+
for choice in selected_choices:
|
| 151 |
+
# Extract chapter index from choice string (format: "1. Chapter Title")
|
| 152 |
+
try:
|
| 153 |
+
chapter_index = int(choice.split('.')[0]) - 1 # Convert to 0-based index
|
| 154 |
+
if 0 <= chapter_index < len(chapters):
|
| 155 |
+
selected_chapters.append(chapters[chapter_index])
|
| 156 |
+
except (ValueError, IndexError):
|
| 157 |
+
# Skip invalid choices
|
| 158 |
+
continue
|
| 159 |
+
|
| 160 |
+
return selected_chapters
|
| 161 |
+
|
| 162 |
+
def update_chapter_selection_interface(chapters, choices, default_selection):
|
| 163 |
+
"""Update the chapter selection interface after EPUB validation."""
|
| 164 |
+
if chapters is not None:
|
| 165 |
+
selection_summary = get_selection_summary(default_selection, len(chapters))
|
| 166 |
+
return (
|
| 167 |
+
gr.update(visible=True),
|
| 168 |
+
gr.update(visible=False),
|
| 169 |
+
gr.update(choices=choices, value=default_selection, visible=True),
|
| 170 |
+
gr.update(visible=True), # Show selection buttons
|
| 171 |
+
gr.update(value=f"📊 **Selection:** {selection_summary}", visible=True)
|
| 172 |
+
)
|
| 173 |
+
else:
|
| 174 |
+
return (
|
| 175 |
+
gr.update(visible=False),
|
| 176 |
+
gr.update(visible=False),
|
| 177 |
+
gr.update(choices=[], value=[], visible=False),
|
| 178 |
+
gr.update(visible=False), # Hide selection buttons
|
| 179 |
+
gr.update(value="", visible=False)
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
def save_chapter_selection_state(selected_choices, book_title, book_author):
|
| 183 |
+
"""Save chapter selection state to session."""
|
| 184 |
+
# This will be handled by Gradio's built-in state management
|
| 185 |
+
# The selected_choices will be automatically stored in the component's state
|
| 186 |
+
return selected_choices
|
| 187 |
+
|
| 188 |
+
def restore_chapter_selection_state(chapters, book_title, book_author):
|
| 189 |
+
"""Restore chapter selection state from session."""
|
| 190 |
+
# If we have chapters and this is the same book, restore previous selection
|
| 191 |
+
# Otherwise, return all chapters selected by default
|
| 192 |
+
if chapters is not None:
|
| 193 |
+
# For now, we'll use all chapters selected by default
|
| 194 |
+
# In a more advanced implementation, we could check session state
|
| 195 |
+
# and restore previous selections for the same book
|
| 196 |
+
choices, _ = create_chapter_selection_interface(chapters)
|
| 197 |
+
return choices # Return all choices selected by default
|
| 198 |
+
return []
|
| 199 |
+
|
| 200 |
+
def get_selection_summary(selected_choices, total_chapters):
|
| 201 |
+
"""Get a summary of the current selection."""
|
| 202 |
+
if not selected_choices:
|
| 203 |
+
return "No chapters selected"
|
| 204 |
+
|
| 205 |
+
selected_count = len(selected_choices)
|
| 206 |
+
if selected_count == total_chapters:
|
| 207 |
+
return f"All {total_chapters} chapters selected"
|
| 208 |
+
else:
|
| 209 |
+
return f"{selected_count} of {total_chapters} chapters selected"
|
| 210 |
+
|
| 211 |
+
def select_all_chapters(chapter_choices):
|
| 212 |
+
"""Select all available chapters."""
|
| 213 |
+
return chapter_choices if chapter_choices else []
|
| 214 |
+
|
| 215 |
+
def deselect_all_chapters():
|
| 216 |
+
"""Deselect all chapters."""
|
| 217 |
+
return []
|
| 218 |
+
|
| 219 |
+
def create_chapter_preview_players(processing_results, book_title, exported_files=None):
|
| 220 |
+
"""Create chapter preview information for display."""
|
| 221 |
+
if not processing_results or not hasattr(processing_results, 'chapters'):
|
| 222 |
+
return []
|
| 223 |
+
|
| 224 |
+
chapter_info = []
|
| 225 |
+
for i, chapter in enumerate(processing_results.chapters):
|
| 226 |
+
# Use exported file path if available, otherwise construct expected path
|
| 227 |
+
chapter_key = f"chapter_{i}"
|
| 228 |
+
if exported_files and chapter_key in exported_files:
|
| 229 |
+
audio_file_path = exported_files[chapter_key]
|
| 230 |
+
else:
|
| 231 |
+
# Fallback to expected path based on export naming pattern
|
| 232 |
+
safe_title = chapter.title.replace(' ', '_').replace(':', '').replace(',', '').replace('.', '')
|
| 233 |
+
audio_file_path = f"output/{book_title.replace(' ', '_')}/chapter_{i:03d}_{safe_title}.wav"
|
| 234 |
+
|
| 235 |
+
chapter_info.append({
|
| 236 |
+
'title': chapter.title,
|
| 237 |
+
'file_path': audio_file_path,
|
| 238 |
+
'duration': chapter.estimated_duration,
|
| 239 |
+
'word_count': chapter.word_count
|
| 240 |
+
})
|
| 241 |
+
|
| 242 |
+
return chapter_info
|
| 243 |
+
|
| 244 |
+
def create_preview_ui_html(players):
|
| 245 |
+
"""Create the complete preview UI HTML."""
|
| 246 |
+
if not players:
|
| 247 |
+
return "<p>No chapters available for preview.</p>"
|
| 248 |
+
|
| 249 |
+
html = f"""
|
| 250 |
+
<div class="chapter-preview-container" style="margin: 20px 0; padding: 20px; border: 2px solid #e1e8ed; border-radius: 12px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);" role="region" aria-label="Chapter previews section">
|
| 251 |
+
<h3 style="margin: 0 0 15px 0; color: #2c3e50; font-size: 24px; text-align: center;" id="previews-heading">🎵 Chapter Previews</h3>
|
| 252 |
+
<p style="color: #666; margin-bottom: 20px; text-align: center; font-size: 14px;" aria-describedby="previews-heading">
|
| 253 |
+
Listen to individual chapters before downloading the complete audiobook.
|
| 254 |
+
Each player includes full controls for play, pause, seeking, and volume adjustment.
|
| 255 |
+
</p>
|
| 256 |
+
<div class="players-list" style="max-height: 600px; overflow-y: auto; padding-right: 10px;" role="list" aria-label="List of chapter audio players">
|
| 257 |
+
{''.join(players)}
|
| 258 |
+
</div>
|
| 259 |
+
<style>
|
| 260 |
+
@media (max-width: 768px) {{
|
| 261 |
+
.chapter-preview-container {{
|
| 262 |
+
padding: 15px !important;
|
| 263 |
+
margin: 10px 0 !important;
|
| 264 |
+
}}
|
| 265 |
+
.chapter-preview-container h3 {{
|
| 266 |
+
font-size: 20px !important;
|
| 267 |
+
}}
|
| 268 |
+
.chapter-preview-container p {{
|
| 269 |
+
font-size: 12px !important;
|
| 270 |
+
}}
|
| 271 |
+
.players-list {{
|
| 272 |
+
max-height: 400px !important;
|
| 273 |
+
}}
|
| 274 |
+
}}
|
| 275 |
+
@media (max-width: 480px) {{
|
| 276 |
+
.chapter-preview-container {{
|
| 277 |
+
padding: 10px !important;
|
| 278 |
+
}}
|
| 279 |
+
.chapter-preview-container h3 {{
|
| 280 |
+
font-size: 18px !important;
|
| 281 |
+
}}
|
| 282 |
+
.players-list {{
|
| 283 |
+
max-height: 300px !important;
|
| 284 |
+
}}
|
| 285 |
+
}}
|
| 286 |
+
</style>
|
| 287 |
+
</div>
|
| 288 |
+
"""
|
| 289 |
+
return html
|
| 290 |
+
|
| 291 |
+
def get_audio_file_url(file_path):
|
| 292 |
+
"""Convert file path to URL for serving audio files."""
|
| 293 |
+
if not file_path:
|
| 294 |
+
return ""
|
| 295 |
+
|
| 296 |
+
# For Gradio, we need to use the file path directly
|
| 297 |
+
# Gradio will handle the file serving automatically
|
| 298 |
+
if os.path.isabs(file_path):
|
| 299 |
+
# Return the absolute path - Gradio can serve it
|
| 300 |
+
return file_path
|
| 301 |
+
else:
|
| 302 |
+
# Return the relative path
|
| 303 |
+
return file_path
|
| 304 |
+
|
| 305 |
+
def copy_file_for_serving(file_path, target_dir="temp_audio"):
|
| 306 |
+
"""Copy file to a directory that Gradio can serve."""
|
| 307 |
+
if not file_path or not os.path.exists(file_path):
|
| 308 |
+
return None
|
| 309 |
+
|
| 310 |
+
# Create target directory if it doesn't exist
|
| 311 |
+
os.makedirs(target_dir, exist_ok=True)
|
| 312 |
+
|
| 313 |
+
# Get filename and copy to target directory
|
| 314 |
+
filename = os.path.basename(file_path)
|
| 315 |
+
target_path = os.path.join(target_dir, filename)
|
| 316 |
+
|
| 317 |
+
try:
|
| 318 |
+
import shutil
|
| 319 |
+
shutil.copy2(file_path, target_path)
|
| 320 |
+
return target_path
|
| 321 |
+
except Exception as e:
|
| 322 |
+
print(f"Error copying file {file_path} to {target_path}: {e}")
|
| 323 |
+
return None
|
| 324 |
+
|
| 325 |
+
def process_epub_book(chapters, selected_choices, book_title, book_author, speed, nfe_steps, crossfade_ms, seed, target_rms, cfg_strength, sway_coef):
|
| 326 |
+
"""Process selected chapters from an EPUB book into audiobook."""
|
| 327 |
+
global processing_cancelled
|
| 328 |
+
processing_cancelled = False # Reset cancellation flag
|
| 329 |
+
|
| 330 |
+
if not chapters:
|
| 331 |
+
return "❌ **Error:** No chapters available", None, None, ""
|
| 332 |
+
|
| 333 |
+
# Get selected chapters
|
| 334 |
+
selected_chapters = get_selected_chapters(chapters, selected_choices)
|
| 335 |
+
|
| 336 |
+
if not selected_chapters:
|
| 337 |
+
return "❌ **Error:** No chapters selected for processing", None, None, ""
|
| 338 |
+
|
| 339 |
+
try:
|
| 340 |
+
# Create processing settings with enhanced voice parameters
|
| 341 |
+
settings = ProcessingSettings(
|
| 342 |
+
speed=speed,
|
| 343 |
+
nfe_steps=nfe_steps,
|
| 344 |
+
crossfade_ms=crossfade_ms,
|
| 345 |
+
seed=seed if seed > 0 else None,
|
| 346 |
+
target_rms=target_rms,
|
| 347 |
+
cfg_strength=cfg_strength,
|
| 348 |
+
sway_sampling_coef=sway_coef
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
# Progress tracking
|
| 352 |
+
progress_messages = []
|
| 353 |
+
|
| 354 |
+
def progress_callback(chapter_index, status, message):
|
| 355 |
+
# Check for cancellation
|
| 356 |
+
if processing_cancelled:
|
| 357 |
+
return
|
| 358 |
+
# Show actual chapter title instead of index for better clarity
|
| 359 |
+
if chapter_index < len(selected_chapters):
|
| 360 |
+
chapter_title = selected_chapters[chapter_index].title
|
| 361 |
+
# Add visual status indicators
|
| 362 |
+
status_emoji = {
|
| 363 |
+
'pending': '⏳',
|
| 364 |
+
'processing': '🔄',
|
| 365 |
+
'completed': '✅',
|
| 366 |
+
'error': '❌',
|
| 367 |
+
'cancelled': '🛑'
|
| 368 |
+
}.get(status.lower(), '📝')
|
| 369 |
+
progress_messages.append(f"{status_emoji} **{chapter_title}**: {status.title()} - {message}")
|
| 370 |
+
else:
|
| 371 |
+
progress_messages.append(f"📝 Chapter {chapter_index + 1}: {status} - {message}")
|
| 372 |
+
|
| 373 |
+
def cancellation_check():
|
| 374 |
+
return processing_cancelled
|
| 375 |
+
|
| 376 |
+
# Process the selected chapters
|
| 377 |
+
results = processing_pipeline.process_book(selected_chapters, settings, progress_callback, cancellation_check=cancellation_check)
|
| 378 |
+
|
| 379 |
+
# Check if processing was cancelled
|
| 380 |
+
if processing_cancelled:
|
| 381 |
+
return "🛑 **Processing Cancelled:** User stopped the processing", None, None, ""
|
| 382 |
+
|
| 383 |
+
# Get statistics
|
| 384 |
+
stats = processing_pipeline.get_processing_statistics(results)
|
| 385 |
+
|
| 386 |
+
# Create enhanced status message with visual feedback
|
| 387 |
+
completion_emoji = "🎉" if stats.failed_chapters == 0 else "⚠️"
|
| 388 |
+
status_message = f"""{completion_emoji} **Processing Complete!**
|
| 389 |
+
|
| 390 |
+
**📊 Statistics:**
|
| 391 |
+
- **Selected Chapters:** {len(selected_chapters)} of {len(chapters)} total chapters
|
| 392 |
+
- **✅ Completed:** {stats.completed_chapters}
|
| 393 |
+
- **❌ Failed:** {stats.failed_chapters}
|
| 394 |
+
- **⏱️ Total Processing Time:** {stats.total_processing_time:.1f} seconds
|
| 395 |
+
- **📈 Average Time per Chapter:** {stats.average_processing_time:.1f} seconds
|
| 396 |
+
- **📝 Total Words:** {stats.total_word_count:,}
|
| 397 |
+
- **🎵 Estimated Duration:** {stats.total_audio_duration:.1f} minutes
|
| 398 |
+
|
| 399 |
+
**📋 Processing Log:**
|
| 400 |
+
{chr(10).join(progress_messages[-10:])} # Show last 10 messages
|
| 401 |
+
|
| 402 |
+
**🎯 Next Steps:**
|
| 403 |
+
- Listen to individual chapter previews below
|
| 404 |
+
- Download the complete audiobook when ready
|
| 405 |
+
- Check processing log for any issues
|
| 406 |
+
"""
|
| 407 |
+
|
| 408 |
+
# Export audiobook
|
| 409 |
+
output_dir = f"output/{book_title.replace(' ', '_')}"
|
| 410 |
+
exported_files = processing_pipeline.export_audiobook(results, f"{output_dir}/audiobook.wav")
|
| 411 |
+
|
| 412 |
+
# Create download links - will be updated after file copying
|
| 413 |
+
download_info = f"""**📥 Download Files:**
|
| 414 |
+
- **Individual Chapters:** {len([k for k in exported_files.keys() if k.startswith('chapter')])} files available below
|
| 415 |
+
"""
|
| 416 |
+
|
| 417 |
+
# Create chapter preview information
|
| 418 |
+
chapter_audio_file = None
|
| 419 |
+
chapter_info_text = ""
|
| 420 |
+
if results and len(results) > 0:
|
| 421 |
+
# Get the first chapter's audio file for preview and copy it for serving
|
| 422 |
+
chapter_key = "chapter_0"
|
| 423 |
+
if chapter_key in exported_files:
|
| 424 |
+
original_file = exported_files[chapter_key]
|
| 425 |
+
chapter_audio_file = copy_file_for_serving(original_file)
|
| 426 |
+
|
| 427 |
+
# Also copy the complete audiobook for serving
|
| 428 |
+
complete_audiobook_original = exported_files.get('complete')
|
| 429 |
+
complete_audiobook_served = None
|
| 430 |
+
if complete_audiobook_original:
|
| 431 |
+
complete_audiobook_served = copy_file_for_serving(complete_audiobook_original)
|
| 432 |
+
|
| 433 |
+
# Create chapter information text with download links
|
| 434 |
+
chapter_info_list = []
|
| 435 |
+
for i, chapter in enumerate(selected_chapters):
|
| 436 |
+
chapter_key = f"chapter_{i}"
|
| 437 |
+
if chapter_key in exported_files:
|
| 438 |
+
chapter_file = exported_files[chapter_key]
|
| 439 |
+
chapter_info_list.append(f"**📖 {chapter.title}** - {chapter.estimated_duration:.1f} min ({chapter.word_count:,} words)")
|
| 440 |
+
|
| 441 |
+
if chapter_info_list:
|
| 442 |
+
chapter_info_text = f"""**🎵 Chapter Previews:**
|
| 443 |
+
{chr(10).join(chapter_info_list)}
|
| 444 |
+
|
| 445 |
+
**📥 Individual Chapter Downloads:**
|
| 446 |
+
"""
|
| 447 |
+
# Add download links for each chapter
|
| 448 |
+
for i, chapter in enumerate(selected_chapters):
|
| 449 |
+
chapter_key = f"chapter_{i}"
|
| 450 |
+
if chapter_key in exported_files:
|
| 451 |
+
chapter_file = exported_files[chapter_key]
|
| 452 |
+
safe_title = chapter.title.replace(' ', '_').replace(':', '').replace(',', '').replace('.', '')
|
| 453 |
+
chapter_info_text += f"- [{chapter.title}]({chapter_file})\n"
|
| 454 |
+
|
| 455 |
+
# Update download info with the copied complete audiobook
|
| 456 |
+
if complete_audiobook_served:
|
| 457 |
+
download_info = f"""**📥 Download Files:**
|
| 458 |
+
<div style="margin: 10px 0;">
|
| 459 |
+
<a href="{complete_audiobook_served}" download="{book_title.replace(' ', '_')}_complete_audiobook.wav"
|
| 460 |
+
style="display: inline-block; padding: 12px 24px; background: #28a745; color: white; text-decoration: none; border-radius: 6px; font-size: 16px; font-weight: bold;"
|
| 461 |
+
role="button" aria-label="Download complete audiobook">
|
| 462 |
+
📥 Download Complete Audiobook
|
| 463 |
+
</a>
|
| 464 |
+
</div>
|
| 465 |
+
- **Individual Chapters:** {len([k for k in exported_files.keys() if k.startswith('chapter')])} files available below
|
| 466 |
+
"""
|
| 467 |
+
|
| 468 |
+
return status_message, download_info, complete_audiobook_served, chapter_audio_file, chapter_info_text
|
| 469 |
+
|
| 470 |
+
except Exception as e:
|
| 471 |
+
error_message = f"❌ **Processing Error:** {str(e)}"
|
| 472 |
+
return error_message, None, None, None, ""
|
| 473 |
+
|
| 474 |
+
def synthesize_ui(text, ref_audio, ref_text, speed, nfe_steps, crossfade_ms, seed):
|
| 475 |
+
if not text or text.strip() == "":
|
| 476 |
+
return None, "Введите текст."
|
| 477 |
+
# Prepare paragraphs
|
| 478 |
+
text_norm = normalize_text(text)
|
| 479 |
+
paras = split_into_paragraphs(text_norm)
|
| 480 |
+
# Optional auto-accents for RU
|
| 481 |
+
paras = [maybe_ruaccent(p) for p in paras]
|
| 482 |
+
|
| 483 |
+
# Use local sample as default reference if no audio provided
|
| 484 |
+
ref_path = "samples/001/sample.mp3"
|
| 485 |
+
if ref_audio is not None:
|
| 486 |
+
ref_path = "ref_tmp.wav"
|
| 487 |
+
# Gradio gives (sr, np.ndarray) or file path depending on component
|
| 488 |
+
if isinstance(ref_audio, tuple) and isinstance(ref_audio[1], np.ndarray):
|
| 489 |
+
sf.write(ref_path, ref_audio[1], ref_audio[0])
|
| 490 |
+
elif isinstance(ref_audio, str):
|
| 491 |
+
ref_path = ref_audio
|
| 492 |
+
|
| 493 |
+
# Generate per paragraph
|
| 494 |
+
pieces = []
|
| 495 |
+
sr = None
|
| 496 |
+
rng_seed = int(seed) if seed is not None else None
|
| 497 |
+
for i, para in enumerate(paras):
|
| 498 |
+
audio, sr = backend.synthesize(
|
| 499 |
+
text=para,
|
| 500 |
+
ref_audio_path=ref_path,
|
| 501 |
+
ref_text=ref_text or "",
|
| 502 |
+
speed=float(speed),
|
| 503 |
+
nfe_steps=int(nfe_steps),
|
| 504 |
+
seed=rng_seed,
|
| 505 |
+
)
|
| 506 |
+
pieces.append(audio)
|
| 507 |
+
|
| 508 |
+
# Crossfade and normalize
|
| 509 |
+
if len(pieces) == 1:
|
| 510 |
+
final = pieces[0]
|
| 511 |
+
else:
|
| 512 |
+
final = crossfade_concat(pieces, crossfade_ms=int(crossfade_ms), sample_rate=sr)
|
| 513 |
+
|
| 514 |
+
# Target loudness (for personal listening; tweak as you like)
|
| 515 |
+
final = normalize_lufs(final, sr, target_lufs=-20.0)
|
| 516 |
+
|
| 517 |
+
out_path = "out_preview.wav"
|
| 518 |
+
save_wav(out_path, final, sr)
|
| 519 |
+
return (sr, final), f"Готово: {len(pieces)} фрагм., длительность ~{len(final)/sr:.1f}с. Сохранено: {out_path}"
|
| 520 |
+
|
| 521 |
+
with gr.Blocks(title="RU Audiobook Studio") as demo:
|
| 522 |
+
gr.Markdown("# RU Audiobook Studio — ESpeech TTS\nГенерируйте главы аудиокниг с готовыми голосами (zero/one‑shot).")
|
| 523 |
+
|
| 524 |
+
# EPUB Upload Section
|
| 525 |
+
with gr.Tab("📚 EPUB Book Processing"):
|
| 526 |
+
gr.Markdown("## Upload EPUB Book\nUpload an EPUB file to create a complete audiobook with automatic chapter detection.")
|
| 527 |
+
|
| 528 |
+
# Main content area with responsive layout
|
| 529 |
+
with gr.Row(equal_height=True):
|
| 530 |
+
# Left column - Upload and Settings (responsive)
|
| 531 |
+
with gr.Column(scale=1, min_width=350):
|
| 532 |
+
# File Upload Section
|
| 533 |
+
with gr.Group():
|
| 534 |
+
gr.Markdown("### 📁 File Upload")
|
| 535 |
+
epub_upload = gr.File(
|
| 536 |
+
label="Upload EPUB File",
|
| 537 |
+
file_types=[".epub"],
|
| 538 |
+
file_count="single",
|
| 539 |
+
height=100
|
| 540 |
+
)
|
| 541 |
+
epub_validate_btn = gr.Button("🔍 Validate EPUB", variant="secondary", size="lg", elem_id="validate-epub-btn")
|
| 542 |
+
|
| 543 |
+
# Processing Settings Section
|
| 544 |
+
with gr.Group():
|
| 545 |
+
gr.Markdown("### ⚙️ Processing Settings")
|
| 546 |
+
with gr.Accordion("Advanced Settings", open=False):
|
| 547 |
+
epub_speed = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Speech Speed", info="Adjust the speed of speech generation (0.6 = slower, 1.4 = faster)")
|
| 548 |
+
epub_nfe_steps = gr.Slider(12, 96, value=48, step=1, label="Quality Steps (NFE)", info="Higher values produce better quality but take longer to process")
|
| 549 |
+
epub_crossfade_ms = gr.Slider(0, 400, value=150, step=10, label="Crossfade (ms)", info="Smooth transition between audio segments")
|
| 550 |
+
epub_seed = gr.Number(value=0, label="Seed (0 = auto)", info="Random seed for reproducible results (0 = random)")
|
| 551 |
+
|
| 552 |
+
# Voice and tone parameters
|
| 553 |
+
gr.Markdown("### 🎵 Voice & Tone Settings")
|
| 554 |
+
epub_target_rms = gr.Slider(0.05, 0.3, value=0.1, step=0.01, label="Volume Level (RMS)", info="Target volume level for generated audio (0.05 = quiet, 0.3 = loud)")
|
| 555 |
+
epub_cfg_strength = gr.Slider(1.0, 4.0, value=2.0, step=0.1, label="Voice Clarity", info="Controls how closely the voice follows the reference (1.0 = more variation, 4.0 = more consistent)")
|
| 556 |
+
epub_sway_coef = gr.Slider(-2.0, 1.0, value=-1.0, step=0.1, label="Voice Variation", info="Controls natural voice variation (-2.0 = more monotone, 1.0 = more expressive)")
|
| 557 |
+
|
| 558 |
+
# Right column - Chapter Selection and Processing (responsive)
|
| 559 |
+
with gr.Column(scale=2, min_width=500):
|
| 560 |
+
# Chapter Selection Section
|
| 561 |
+
with gr.Group():
|
| 562 |
+
gr.Markdown("### 📖 Chapter Selection")
|
| 563 |
+
epub_chapter_selection = gr.CheckboxGroup(
|
| 564 |
+
label="Select Chapters to Process",
|
| 565 |
+
choices=[],
|
| 566 |
+
value=[],
|
| 567 |
+
visible=False,
|
| 568 |
+
info="All chapters are selected by default. Uncheck chapters you don't want to process.",
|
| 569 |
+
elem_id="chapter-selection"
|
| 570 |
+
)
|
| 571 |
+
|
| 572 |
+
# Selection control buttons
|
| 573 |
+
with gr.Row(visible=False) as epub_selection_buttons:
|
| 574 |
+
epub_select_all_btn = gr.Button("✅ Select All", variant="secondary", size="sm")
|
| 575 |
+
epub_deselect_all_btn = gr.Button("❌ Deselect All", variant="secondary", size="sm")
|
| 576 |
+
|
| 577 |
+
epub_selection_summary = gr.Markdown("", visible=False)
|
| 578 |
+
|
| 579 |
+
# Processing Section
|
| 580 |
+
with gr.Group():
|
| 581 |
+
gr.Markdown("### 🚀 Processing")
|
| 582 |
+
with gr.Row():
|
| 583 |
+
epub_process_btn = gr.Button("🚀 Process Selected Chapters", variant="primary", visible=False, elem_id="process-btn")
|
| 584 |
+
epub_stop_btn = gr.Button("🛑 Stop Processing", variant="stop", visible=False, elem_id="stop-btn")
|
| 585 |
+
|
| 586 |
+
# Right column - Status and Results (responsive)
|
| 587 |
+
with gr.Column(scale=3, min_width=500):
|
| 588 |
+
# Status Section
|
| 589 |
+
with gr.Group():
|
| 590 |
+
gr.Markdown("### 📊 Status & Results")
|
| 591 |
+
epub_status = gr.Markdown("📁 **Upload Status:** Ready to upload EPUB file")
|
| 592 |
+
epub_progress = gr.Progress()
|
| 593 |
+
epub_validation_status = gr.Markdown("⏳ **Validation:** Waiting for file upload")
|
| 594 |
+
epub_processing_status = gr.Markdown("⏳ **Processing:** Not started")
|
| 595 |
+
epub_details = gr.Markdown("")
|
| 596 |
+
|
| 597 |
+
# Results Section
|
| 598 |
+
with gr.Group():
|
| 599 |
+
gr.Markdown("### 🎵 Results")
|
| 600 |
+
epub_download_audio = gr.Audio(label="Download Complete Audiobook", visible=False)
|
| 601 |
+
epub_chapter_audio = gr.Audio(label="Chapter Audio Preview", visible=False)
|
| 602 |
+
epub_chapter_info = gr.Markdown("", visible=False)
|
| 603 |
+
|
| 604 |
+
# State management (hidden)
|
| 605 |
+
epub_chapters = gr.State() # Store chapter data
|
| 606 |
+
epub_book_title = gr.State() # Store book title
|
| 607 |
+
epub_book_author = gr.State() # Store book author
|
| 608 |
+
epub_chapter_choices = gr.State() # Store chapter choices for selection
|
| 609 |
+
epub_selected_choices = gr.State() # Store selected chapter choices
|
| 610 |
+
|
| 611 |
+
# Manual Text Processing Section
|
| 612 |
+
with gr.Tab("✏️ Manual Text Processing"):
|
| 613 |
+
gr.Markdown("## Manual Text Input\nEnter text manually for quick processing and testing.")
|
| 614 |
+
|
| 615 |
+
with gr.Row():
|
| 616 |
+
with gr.Column(scale=3):
|
| 617 |
+
text = gr.Textbox(lines=12, label="Текст (глава/абзацы)")
|
| 618 |
+
ref_text = gr.Textbox(lines=3, label="Reference text (текст из референса)", value=load_default_ref_text())
|
| 619 |
+
ref_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Reference audio (6–12 с, опционально - используется samples/001/sample.mp3 по умолчанию)")
|
| 620 |
+
with gr.Row():
|
| 621 |
+
speed = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Speed")
|
| 622 |
+
nfe_steps = gr.Slider(12, 96, value=48, step=1, label="NFE steps")
|
| 623 |
+
crossfade_ms = gr.Slider(0, 400, value=150, step=10, label="Crossfade (ms) между абзацами")
|
| 624 |
+
seed = gr.Number(value=0, label="Seed (0 = авто)")
|
| 625 |
+
btn = gr.Button("Synthesize", variant="primary")
|
| 626 |
+
with gr.Column(scale=2):
|
| 627 |
+
audio_out = gr.Audio(label="Предпрослушка", autoplay=False)
|
| 628 |
+
status = gr.Markdown()
|
| 629 |
+
|
| 630 |
+
# Event handlers for manual text processing
|
| 631 |
+
btn.click(
|
| 632 |
+
synthesize_ui,
|
| 633 |
+
inputs=[text, ref_audio, ref_text, speed, nfe_steps, crossfade_ms, seed],
|
| 634 |
+
outputs=[audio_out, status]
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
# Event handlers for EPUB processing
|
| 638 |
+
epub_validate_btn.click(
|
| 639 |
+
validate_epub_upload,
|
| 640 |
+
inputs=[epub_upload],
|
| 641 |
+
outputs=[epub_status, epub_validation_status, epub_processing_status, epub_details, epub_chapters, epub_book_title, epub_book_author, epub_chapter_choices, epub_selected_choices]
|
| 642 |
+
).then(
|
| 643 |
+
update_chapter_selection_interface,
|
| 644 |
+
inputs=[epub_chapters, epub_chapter_choices, epub_selected_choices],
|
| 645 |
+
outputs=[epub_process_btn, epub_stop_btn, epub_chapter_selection, epub_selection_buttons, epub_selection_summary]
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
# EPUB processing event handler
|
| 649 |
+
epub_process_btn.click(
|
| 650 |
+
lambda: (gr.update(visible=False), gr.update(visible=True)),
|
| 651 |
+
outputs=[epub_process_btn, epub_stop_btn]
|
| 652 |
+
).then(
|
| 653 |
+
process_epub_book,
|
| 654 |
+
inputs=[epub_chapters, epub_chapter_selection, epub_book_title, epub_book_author, epub_speed, epub_nfe_steps, epub_crossfade_ms, epub_seed, epub_target_rms, epub_cfg_strength, epub_sway_coef],
|
| 655 |
+
outputs=[epub_processing_status, epub_details, epub_download_audio, epub_chapter_audio, epub_chapter_info]
|
| 656 |
+
).then(
|
| 657 |
+
lambda: (gr.update(visible=True), gr.update(visible=False)),
|
| 658 |
+
outputs=[epub_process_btn, epub_stop_btn]
|
| 659 |
+
).then(
|
| 660 |
+
lambda audio_file, info_text, download_audio: (
|
| 661 |
+
gr.update(visible=bool(audio_file)),
|
| 662 |
+
gr.update(visible=bool(info_text.strip())),
|
| 663 |
+
gr.update(visible=bool(download_audio), value=download_audio)
|
| 664 |
+
),
|
| 665 |
+
inputs=[epub_chapter_audio, epub_chapter_info, epub_download_audio],
|
| 666 |
+
outputs=[epub_chapter_audio, epub_chapter_info, epub_download_audio]
|
| 667 |
+
)
|
| 668 |
+
|
| 669 |
+
# Chapter selection change handler
|
| 670 |
+
epub_chapter_selection.change(
|
| 671 |
+
save_chapter_selection_state,
|
| 672 |
+
inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
|
| 673 |
+
outputs=[epub_selected_choices]
|
| 674 |
+
).then(
|
| 675 |
+
lambda selected_choices, chapters: gr.update(
|
| 676 |
+
value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
|
| 677 |
+
),
|
| 678 |
+
inputs=[epub_selected_choices, epub_chapters],
|
| 679 |
+
outputs=[epub_selection_summary]
|
| 680 |
+
)
|
| 681 |
+
|
| 682 |
+
# Select All button handler
|
| 683 |
+
epub_select_all_btn.click(
|
| 684 |
+
select_all_chapters,
|
| 685 |
+
inputs=[epub_chapter_choices],
|
| 686 |
+
outputs=[epub_chapter_selection]
|
| 687 |
+
).then(
|
| 688 |
+
save_chapter_selection_state,
|
| 689 |
+
inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
|
| 690 |
+
outputs=[epub_selected_choices]
|
| 691 |
+
).then(
|
| 692 |
+
lambda selected_choices, chapters: gr.update(
|
| 693 |
+
value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
|
| 694 |
+
),
|
| 695 |
+
inputs=[epub_selected_choices, epub_chapters],
|
| 696 |
+
outputs=[epub_selection_summary]
|
| 697 |
+
)
|
| 698 |
+
|
| 699 |
+
# Deselect All button handler
|
| 700 |
+
epub_deselect_all_btn.click(
|
| 701 |
+
deselect_all_chapters,
|
| 702 |
+
outputs=[epub_chapter_selection]
|
| 703 |
+
).then(
|
| 704 |
+
save_chapter_selection_state,
|
| 705 |
+
inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
|
| 706 |
+
outputs=[epub_selected_choices]
|
| 707 |
+
).then(
|
| 708 |
+
lambda selected_choices, chapters: gr.update(
|
| 709 |
+
value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
|
| 710 |
+
),
|
| 711 |
+
inputs=[epub_selected_choices, epub_chapters],
|
| 712 |
+
outputs=[epub_selection_summary]
|
| 713 |
+
)
|
| 714 |
+
|
| 715 |
+
# EPUB stop processing event handler
|
| 716 |
+
epub_stop_btn.click(
|
| 717 |
+
stop_epub_processing,
|
| 718 |
+
outputs=[epub_processing_status]
|
| 719 |
+
).then(
|
| 720 |
+
lambda: (gr.update(visible=True), gr.update(visible=False)),
|
| 721 |
+
outputs=[epub_process_btn, epub_stop_btn]
|
| 722 |
+
)
|
| 723 |
+
|
| 724 |
+
if __name__ == "__main__":
|
| 725 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
|
audiobook_generator.py
ADDED
|
@@ -0,0 +1,499 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Audiobook package generation module for Russian Audiobook Studio.
|
| 4 |
+
Creates high-quality audiobook packages with metadata, playlists, and proper file structure.
|
| 5 |
+
"""
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
import zipfile
|
| 9 |
+
import re
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import List, Dict, Any, Optional, Tuple
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
import numpy as np
|
| 14 |
+
import soundfile as sf
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
|
| 17 |
+
# Import mutagen for metadata handling
|
| 18 |
+
try:
|
| 19 |
+
from mutagen import File as MutagenFile
|
| 20 |
+
from mutagen.id3 import ID3, TIT2, TPE1, TALB, TCON, TDRC, TRCK
|
| 21 |
+
from mutagen.mp3 import MP3
|
| 22 |
+
MUTAGEN_AVAILABLE = True
|
| 23 |
+
except ImportError:
|
| 24 |
+
MUTAGEN_AVAILABLE = False
|
| 25 |
+
|
| 26 |
+
from epub_processing_pipeline import ChapterProcessingResult
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class AudiobookMetadata:
|
| 31 |
+
"""Metadata for an audiobook package."""
|
| 32 |
+
title: str
|
| 33 |
+
author: str
|
| 34 |
+
total_chapters: int
|
| 35 |
+
total_duration: float
|
| 36 |
+
total_word_count: int
|
| 37 |
+
chapters: List[Dict[str, Any]]
|
| 38 |
+
creation_date: str
|
| 39 |
+
version: str = "1.0"
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class AudiobookGenerator:
|
| 43 |
+
"""Generator for creating audiobook packages with metadata and proper structure."""
|
| 44 |
+
|
| 45 |
+
def __init__(self):
|
| 46 |
+
"""Initialize the audiobook generator."""
|
| 47 |
+
self.target_sample_rate = 24000 # 24kHz
|
| 48 |
+
self.target_bit_depth = 16 # 16-bit
|
| 49 |
+
self.target_channels = 1 # Mono
|
| 50 |
+
self.version = "1.0"
|
| 51 |
+
|
| 52 |
+
def sanitize_filename(self, filename: str, max_length: int = 100) -> str:
|
| 53 |
+
"""
|
| 54 |
+
Sanitize filename for cross-platform compatibility.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
filename: Original filename
|
| 58 |
+
max_length: Maximum length for the filename
|
| 59 |
+
|
| 60 |
+
Returns:
|
| 61 |
+
Sanitized filename
|
| 62 |
+
"""
|
| 63 |
+
# Remove or replace problematic characters
|
| 64 |
+
sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
| 65 |
+
sanitized = re.sub(r'[^\w\s\-_\.]', '_', sanitized)
|
| 66 |
+
sanitized = re.sub(r'\s+', '_', sanitized) # Replace spaces with underscores
|
| 67 |
+
sanitized = re.sub(r'_+', '_', sanitized) # Replace multiple underscores with single
|
| 68 |
+
|
| 69 |
+
# Remove leading/trailing underscores and dots
|
| 70 |
+
sanitized = sanitized.strip('_.')
|
| 71 |
+
|
| 72 |
+
# Truncate if too long
|
| 73 |
+
if len(sanitized) > max_length:
|
| 74 |
+
sanitized = sanitized[:max_length].rstrip('_.')
|
| 75 |
+
|
| 76 |
+
return sanitized
|
| 77 |
+
|
| 78 |
+
def save_high_quality_audio(
|
| 79 |
+
self,
|
| 80 |
+
audio_data: np.ndarray,
|
| 81 |
+
sample_rate: int,
|
| 82 |
+
output_path: str
|
| 83 |
+
) -> None:
|
| 84 |
+
"""
|
| 85 |
+
Save audio data as high-quality WAV file (24kHz, 16-bit, mono).
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
audio_data: Audio data as numpy array
|
| 89 |
+
sample_rate: Original sample rate
|
| 90 |
+
output_path: Path to save the audio file
|
| 91 |
+
|
| 92 |
+
Raises:
|
| 93 |
+
ValueError: If audio data or sample rate is invalid
|
| 94 |
+
"""
|
| 95 |
+
if audio_data is None:
|
| 96 |
+
raise ValueError("Invalid audio data")
|
| 97 |
+
|
| 98 |
+
if sample_rate <= 0:
|
| 99 |
+
raise ValueError("Invalid sample rate")
|
| 100 |
+
|
| 101 |
+
# Ensure mono audio
|
| 102 |
+
if len(audio_data.shape) > 1:
|
| 103 |
+
audio_data = np.mean(audio_data, axis=1)
|
| 104 |
+
|
| 105 |
+
# Resample to target sample rate if needed
|
| 106 |
+
if sample_rate != self.target_sample_rate:
|
| 107 |
+
audio_data = self._resample_audio(audio_data, sample_rate, self.target_sample_rate)
|
| 108 |
+
|
| 109 |
+
# Ensure 16-bit depth
|
| 110 |
+
if audio_data.dtype != np.int16:
|
| 111 |
+
# Convert to 16-bit
|
| 112 |
+
audio_data = (audio_data * 32767).astype(np.int16)
|
| 113 |
+
|
| 114 |
+
# Create output directory if it doesn't exist
|
| 115 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 116 |
+
|
| 117 |
+
# Save as WAV file
|
| 118 |
+
sf.write(output_path, audio_data, self.target_sample_rate, subtype='PCM_16')
|
| 119 |
+
|
| 120 |
+
def _resample_audio(self, audio_data: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
|
| 121 |
+
"""
|
| 122 |
+
Resample audio data to target sample rate.
|
| 123 |
+
|
| 124 |
+
Args:
|
| 125 |
+
audio_data: Original audio data
|
| 126 |
+
orig_sr: Original sample rate
|
| 127 |
+
target_sr: Target sample rate
|
| 128 |
+
|
| 129 |
+
Returns:
|
| 130 |
+
Resampled audio data
|
| 131 |
+
"""
|
| 132 |
+
if orig_sr == target_sr:
|
| 133 |
+
return audio_data
|
| 134 |
+
|
| 135 |
+
# Simple linear interpolation resampling
|
| 136 |
+
# For production use, consider using librosa or scipy.signal.resample
|
| 137 |
+
ratio = target_sr / orig_sr
|
| 138 |
+
new_length = int(len(audio_data) * ratio)
|
| 139 |
+
|
| 140 |
+
# Create new time indices
|
| 141 |
+
old_indices = np.linspace(0, len(audio_data) - 1, len(audio_data))
|
| 142 |
+
new_indices = np.linspace(0, len(audio_data) - 1, new_length)
|
| 143 |
+
|
| 144 |
+
# Interpolate
|
| 145 |
+
resampled = np.interp(new_indices, old_indices, audio_data)
|
| 146 |
+
|
| 147 |
+
return resampled.astype(audio_data.dtype)
|
| 148 |
+
|
| 149 |
+
def create_metadata(
|
| 150 |
+
self,
|
| 151 |
+
title: str,
|
| 152 |
+
author: str,
|
| 153 |
+
chapters: List[Any],
|
| 154 |
+
total_duration: float
|
| 155 |
+
) -> Dict[str, Any]:
|
| 156 |
+
"""
|
| 157 |
+
Create comprehensive metadata for the audiobook.
|
| 158 |
+
|
| 159 |
+
Args:
|
| 160 |
+
title: Audiobook title
|
| 161 |
+
author: Audiobook author
|
| 162 |
+
chapters: List of chapter objects
|
| 163 |
+
total_duration: Total duration in minutes
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
Dictionary containing metadata
|
| 167 |
+
"""
|
| 168 |
+
total_word_count = sum(getattr(chapter, 'word_count', 0) for chapter in chapters)
|
| 169 |
+
|
| 170 |
+
metadata = {
|
| 171 |
+
'title': title,
|
| 172 |
+
'artist': author,
|
| 173 |
+
'album': title,
|
| 174 |
+
'genre': 'Audiobook',
|
| 175 |
+
'date': datetime.now().strftime('%Y-%m-%d'),
|
| 176 |
+
'total_chapters': len(chapters),
|
| 177 |
+
'total_duration': total_duration,
|
| 178 |
+
'total_word_count': total_word_count,
|
| 179 |
+
'chapters': []
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
# Add chapter metadata
|
| 183 |
+
for i, chapter in enumerate(chapters):
|
| 184 |
+
chapter_meta = {
|
| 185 |
+
'title': getattr(chapter, 'title', f'Chapter {i+1}'),
|
| 186 |
+
'order': i + 1,
|
| 187 |
+
'word_count': getattr(chapter, 'word_count', 0),
|
| 188 |
+
'estimated_duration': getattr(chapter, 'estimated_duration', 0.0),
|
| 189 |
+
'file_name': f"chapter_{i+1:03d}_{self.sanitize_filename(getattr(chapter, 'title', f'Chapter_{i+1}'))}.wav"
|
| 190 |
+
}
|
| 191 |
+
metadata['chapters'].append(chapter_meta)
|
| 192 |
+
|
| 193 |
+
return metadata
|
| 194 |
+
|
| 195 |
+
def generate_m3u_playlist(
|
| 196 |
+
self,
|
| 197 |
+
processing_results: List[ChapterProcessingResult],
|
| 198 |
+
output_path: str
|
| 199 |
+
) -> None:
|
| 200 |
+
"""
|
| 201 |
+
Generate M3U playlist file for chapter navigation.
|
| 202 |
+
|
| 203 |
+
Args:
|
| 204 |
+
processing_results: List of chapter processing results
|
| 205 |
+
output_path: Path to save the playlist file
|
| 206 |
+
"""
|
| 207 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 208 |
+
|
| 209 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 210 |
+
f.write("#EXTM3U\n")
|
| 211 |
+
f.write(f"#EXTINF:-1,Complete Audiobook\n")
|
| 212 |
+
f.write("audio/complete_audiobook.wav\n\n")
|
| 213 |
+
|
| 214 |
+
for result in processing_results:
|
| 215 |
+
if result.status == "completed":
|
| 216 |
+
duration_seconds = int(result.estimated_duration * 60)
|
| 217 |
+
sanitized_title = self.sanitize_filename(result.chapter_title)
|
| 218 |
+
filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
|
| 219 |
+
|
| 220 |
+
f.write(f"#EXTINF:{duration_seconds},{result.chapter_title}\n")
|
| 221 |
+
f.write(f"audio/{filename}\n")
|
| 222 |
+
|
| 223 |
+
def create_metadata_json(
|
| 224 |
+
self,
|
| 225 |
+
processing_results: List[ChapterProcessingResult],
|
| 226 |
+
title: str,
|
| 227 |
+
author: str,
|
| 228 |
+
output_path: str
|
| 229 |
+
) -> None:
|
| 230 |
+
"""
|
| 231 |
+
Create JSON metadata file.
|
| 232 |
+
|
| 233 |
+
Args:
|
| 234 |
+
processing_results: List of chapter processing results
|
| 235 |
+
title: Audiobook title
|
| 236 |
+
author: Audiobook author
|
| 237 |
+
output_path: Path to save the metadata file
|
| 238 |
+
"""
|
| 239 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
| 240 |
+
|
| 241 |
+
# Calculate totals
|
| 242 |
+
total_duration = sum(result.estimated_duration for result in processing_results)
|
| 243 |
+
total_word_count = sum(result.word_count for result in processing_results)
|
| 244 |
+
|
| 245 |
+
# Create metadata
|
| 246 |
+
metadata = {
|
| 247 |
+
'title': title,
|
| 248 |
+
'author': author,
|
| 249 |
+
'total_chapters': len(processing_results),
|
| 250 |
+
'total_duration': total_duration,
|
| 251 |
+
'total_word_count': total_word_count,
|
| 252 |
+
'creation_date': datetime.now().isoformat(),
|
| 253 |
+
'version': self.version,
|
| 254 |
+
'chapters': []
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
# Add chapter information
|
| 258 |
+
for result in processing_results:
|
| 259 |
+
if result.status == "completed":
|
| 260 |
+
sanitized_title = self.sanitize_filename(result.chapter_title)
|
| 261 |
+
filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
|
| 262 |
+
|
| 263 |
+
chapter_info = {
|
| 264 |
+
'title': result.chapter_title,
|
| 265 |
+
'order': result.chapter_index + 1,
|
| 266 |
+
'word_count': result.word_count,
|
| 267 |
+
'estimated_duration': result.estimated_duration,
|
| 268 |
+
'file_name': filename,
|
| 269 |
+
'processing_time': result.processing_time,
|
| 270 |
+
'sample_rate': result.sample_rate
|
| 271 |
+
}
|
| 272 |
+
metadata['chapters'].append(chapter_info)
|
| 273 |
+
|
| 274 |
+
# Save metadata
|
| 275 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 276 |
+
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
| 277 |
+
|
| 278 |
+
def generate_individual_chapters(
|
| 279 |
+
self,
|
| 280 |
+
processing_results: List[ChapterProcessingResult],
|
| 281 |
+
output_dir: str
|
| 282 |
+
) -> List[str]:
|
| 283 |
+
"""
|
| 284 |
+
Generate individual chapter audio files.
|
| 285 |
+
|
| 286 |
+
Args:
|
| 287 |
+
processing_results: List of chapter processing results
|
| 288 |
+
output_dir: Directory to save chapter files
|
| 289 |
+
|
| 290 |
+
Returns:
|
| 291 |
+
List of paths to generated chapter files
|
| 292 |
+
"""
|
| 293 |
+
if not processing_results:
|
| 294 |
+
raise ValueError("No chapters to process")
|
| 295 |
+
|
| 296 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 297 |
+
chapter_files = []
|
| 298 |
+
|
| 299 |
+
for result in processing_results:
|
| 300 |
+
if result.status == "completed" and result.audio_data is not None:
|
| 301 |
+
sanitized_title = self.sanitize_filename(result.chapter_title)
|
| 302 |
+
filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
|
| 303 |
+
file_path = os.path.join(output_dir, filename)
|
| 304 |
+
|
| 305 |
+
self.save_high_quality_audio(
|
| 306 |
+
result.audio_data,
|
| 307 |
+
result.sample_rate,
|
| 308 |
+
file_path
|
| 309 |
+
)
|
| 310 |
+
chapter_files.append(file_path)
|
| 311 |
+
|
| 312 |
+
return chapter_files
|
| 313 |
+
|
| 314 |
+
def generate_complete_audiobook(
|
| 315 |
+
self,
|
| 316 |
+
processing_results: List[ChapterProcessingResult],
|
| 317 |
+
output_path: str
|
| 318 |
+
) -> str:
|
| 319 |
+
"""
|
| 320 |
+
Generate complete audiobook by concatenating all chapters.
|
| 321 |
+
|
| 322 |
+
Args:
|
| 323 |
+
processing_results: List of chapter processing results
|
| 324 |
+
output_path: Path to save the complete audiobook
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
Path to the generated complete audiobook file
|
| 328 |
+
"""
|
| 329 |
+
if not processing_results:
|
| 330 |
+
raise ValueError("No chapters to process")
|
| 331 |
+
|
| 332 |
+
# Collect valid audio data
|
| 333 |
+
valid_audio = []
|
| 334 |
+
sample_rate = None
|
| 335 |
+
|
| 336 |
+
for result in processing_results:
|
| 337 |
+
if result.status == "completed" and result.audio_data is not None:
|
| 338 |
+
valid_audio.append(result.audio_data)
|
| 339 |
+
if sample_rate is None:
|
| 340 |
+
sample_rate = result.sample_rate
|
| 341 |
+
|
| 342 |
+
if not valid_audio:
|
| 343 |
+
raise ValueError("No valid audio data found")
|
| 344 |
+
|
| 345 |
+
# Concatenate audio
|
| 346 |
+
from utils.audio import crossfade_concat
|
| 347 |
+
# Use smaller crossfade for short audio segments
|
| 348 |
+
crossfade_ms = min(150, int(min(len(audio) for audio in valid_audio) / sample_rate * 1000 * 0.5))
|
| 349 |
+
complete_audio = crossfade_concat(valid_audio, crossfade_ms=crossfade_ms, sample_rate=sample_rate)
|
| 350 |
+
|
| 351 |
+
# Save complete audiobook
|
| 352 |
+
self.save_high_quality_audio(complete_audio, sample_rate, output_path)
|
| 353 |
+
|
| 354 |
+
return output_path
|
| 355 |
+
|
| 356 |
+
def generate_audiobook_package(
|
| 357 |
+
self,
|
| 358 |
+
processing_results: List[ChapterProcessingResult],
|
| 359 |
+
title: str,
|
| 360 |
+
author: str,
|
| 361 |
+
output_dir: str
|
| 362 |
+
) -> Dict[str, str]:
|
| 363 |
+
"""
|
| 364 |
+
Generate complete audiobook package with all files and metadata.
|
| 365 |
+
|
| 366 |
+
Args:
|
| 367 |
+
processing_results: List of chapter processing results
|
| 368 |
+
title: Audiobook title
|
| 369 |
+
author: Audiobook author
|
| 370 |
+
output_dir: Directory to create the package
|
| 371 |
+
|
| 372 |
+
Returns:
|
| 373 |
+
Dictionary with paths to generated files
|
| 374 |
+
"""
|
| 375 |
+
if processing_results is None:
|
| 376 |
+
raise ValueError("Processing results cannot be None")
|
| 377 |
+
if not processing_results:
|
| 378 |
+
raise ValueError("No chapters to process")
|
| 379 |
+
|
| 380 |
+
# Create directory structure
|
| 381 |
+
sanitized_title = self.sanitize_filename(title)
|
| 382 |
+
package_dir = os.path.join(output_dir, sanitized_title)
|
| 383 |
+
audio_dir = os.path.join(package_dir, "audio")
|
| 384 |
+
metadata_dir = os.path.join(package_dir, "metadata")
|
| 385 |
+
|
| 386 |
+
os.makedirs(audio_dir, exist_ok=True)
|
| 387 |
+
os.makedirs(metadata_dir, exist_ok=True)
|
| 388 |
+
|
| 389 |
+
generated_files = {}
|
| 390 |
+
|
| 391 |
+
# Generate individual chapter files
|
| 392 |
+
chapter_files = self.generate_individual_chapters(processing_results, audio_dir)
|
| 393 |
+
generated_files['chapters'] = chapter_files
|
| 394 |
+
|
| 395 |
+
# Generate complete audiobook
|
| 396 |
+
complete_path = os.path.join(audio_dir, "complete_audiobook.wav")
|
| 397 |
+
self.generate_complete_audiobook(processing_results, complete_path)
|
| 398 |
+
generated_files['complete'] = complete_path
|
| 399 |
+
|
| 400 |
+
# Generate metadata files
|
| 401 |
+
metadata_json_path = os.path.join(metadata_dir, "metadata.json")
|
| 402 |
+
self.create_metadata_json(processing_results, title, author, metadata_json_path)
|
| 403 |
+
generated_files['metadata_json'] = metadata_json_path
|
| 404 |
+
|
| 405 |
+
# Generate M3U playlist
|
| 406 |
+
playlist_path = os.path.join(metadata_dir, "playlist.m3u")
|
| 407 |
+
self.generate_m3u_playlist(processing_results, playlist_path)
|
| 408 |
+
generated_files['playlist'] = playlist_path
|
| 409 |
+
|
| 410 |
+
return generated_files
|
| 411 |
+
|
| 412 |
+
def create_complete_package_zip(
|
| 413 |
+
self,
|
| 414 |
+
processing_results: List[ChapterProcessingResult],
|
| 415 |
+
title: str,
|
| 416 |
+
author: str,
|
| 417 |
+
output_dir: str
|
| 418 |
+
) -> str:
|
| 419 |
+
"""
|
| 420 |
+
Create a complete ZIP package of the audiobook.
|
| 421 |
+
|
| 422 |
+
Args:
|
| 423 |
+
processing_results: List of chapter processing results
|
| 424 |
+
title: Audiobook title
|
| 425 |
+
author: Audiobook author
|
| 426 |
+
output_dir: Directory to save the ZIP file
|
| 427 |
+
|
| 428 |
+
Returns:
|
| 429 |
+
Path to the created ZIP file
|
| 430 |
+
"""
|
| 431 |
+
# Generate the audiobook package
|
| 432 |
+
package_files = self.generate_audiobook_package(processing_results, title, author, output_dir)
|
| 433 |
+
|
| 434 |
+
# Create ZIP file
|
| 435 |
+
sanitized_title = self.sanitize_filename(title)
|
| 436 |
+
zip_filename = f"{sanitized_title}_audiobook.zip"
|
| 437 |
+
zip_path = os.path.join(output_dir, zip_filename)
|
| 438 |
+
|
| 439 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
|
| 440 |
+
# Add all files from the package directory
|
| 441 |
+
package_dir = os.path.join(output_dir, sanitized_title)
|
| 442 |
+
|
| 443 |
+
for root, dirs, files in os.walk(package_dir):
|
| 444 |
+
for file in files:
|
| 445 |
+
file_path = os.path.join(root, file)
|
| 446 |
+
arcname = os.path.relpath(file_path, output_dir)
|
| 447 |
+
zip_file.write(file_path, arcname)
|
| 448 |
+
|
| 449 |
+
return zip_path
|
| 450 |
+
|
| 451 |
+
def estimate_package_size(self, processing_results: List[ChapterProcessingResult]) -> int:
|
| 452 |
+
"""
|
| 453 |
+
Estimate the size of the audiobook package in bytes.
|
| 454 |
+
|
| 455 |
+
Args:
|
| 456 |
+
processing_results: List of chapter processing results
|
| 457 |
+
|
| 458 |
+
Returns:
|
| 459 |
+
Estimated package size in bytes
|
| 460 |
+
"""
|
| 461 |
+
if not processing_results:
|
| 462 |
+
return 0
|
| 463 |
+
|
| 464 |
+
total_audio_size = 0
|
| 465 |
+
|
| 466 |
+
for result in processing_results:
|
| 467 |
+
if result.status == "completed" and result.audio_data is not None:
|
| 468 |
+
# Estimate WAV file size: sample_rate * duration * 2 bytes (16-bit) * channels
|
| 469 |
+
duration_seconds = result.estimated_duration * 60
|
| 470 |
+
audio_size = int(self.target_sample_rate * duration_seconds * 2 * self.target_channels)
|
| 471 |
+
total_audio_size += audio_size
|
| 472 |
+
|
| 473 |
+
# Add overhead for metadata, playlist, and ZIP compression
|
| 474 |
+
overhead = total_audio_size * 0.1 # 10% overhead
|
| 475 |
+
|
| 476 |
+
return int(total_audio_size + overhead)
|
| 477 |
+
|
| 478 |
+
def add_audio_metadata(self, audio_path: str, metadata: Dict[str, Any]) -> None:
|
| 479 |
+
"""
|
| 480 |
+
Add metadata to audio files using mutagen (if available).
|
| 481 |
+
|
| 482 |
+
Args:
|
| 483 |
+
audio_path: Path to the audio file
|
| 484 |
+
metadata: Metadata dictionary
|
| 485 |
+
"""
|
| 486 |
+
if not MUTAGEN_AVAILABLE:
|
| 487 |
+
return # Skip if mutagen is not available
|
| 488 |
+
|
| 489 |
+
try:
|
| 490 |
+
# For WAV files, we can't add ID3 tags directly
|
| 491 |
+
# This is a placeholder for future MP3 support
|
| 492 |
+
pass
|
| 493 |
+
except Exception as e:
|
| 494 |
+
# Silently fail if metadata addition fails
|
| 495 |
+
pass
|
| 496 |
+
|
| 497 |
+
def cleanup(self):
|
| 498 |
+
"""Clean up any temporary resources."""
|
| 499 |
+
pass
|
backends/espeech_backend.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# backends/espeech_backend.py
|
| 2 |
+
# Полная интеграция ESpeech/ESpeech-TTS-1_RL-V2 (F5-TTS) для инференса.
|
| 3 |
+
# Основано на коде из model card: загрузка весов, препроцессинг референса,
|
| 4 |
+
# вызов infer_process и возврат (wave, sample_rate).
|
| 5 |
+
from __future__ import annotations
|
| 6 |
+
from typing import Tuple, Optional
|
| 7 |
+
import os
|
| 8 |
+
import gc
|
| 9 |
+
import numpy as np
|
| 10 |
+
import torch
|
| 11 |
+
import torchaudio
|
| 12 |
+
|
| 13 |
+
# Force CPU usage on macOS to avoid MPS issues
|
| 14 |
+
if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
| 15 |
+
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
|
| 16 |
+
# Disable MPS to force CPU usage
|
| 17 |
+
torch.backends.mps.is_available = lambda: False
|
| 18 |
+
torch.backends.mps.is_built = lambda: False
|
| 19 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
| 20 |
+
|
| 21 |
+
# F5-TTS imports (как в карточке модели)
|
| 22 |
+
from f5_tts.infer.utils_infer import (
|
| 23 |
+
infer_process,
|
| 24 |
+
load_model,
|
| 25 |
+
load_vocoder,
|
| 26 |
+
preprocess_ref_audio_text,
|
| 27 |
+
)
|
| 28 |
+
from f5_tts.model import DiT
|
| 29 |
+
|
| 30 |
+
# Конфиг модели из карточки
|
| 31 |
+
MODEL_CFG = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
|
| 32 |
+
|
| 33 |
+
class EspeechBackend:
|
| 34 |
+
def __init__(self, model_id: str = "ESpeech/ESpeech-TTS-1_RL-V2"):
|
| 35 |
+
self.model_id = model_id
|
| 36 |
+
self.model_file = "espeech_tts_rlv2.pt"
|
| 37 |
+
self.vocab_file = "vocab.txt"
|
| 38 |
+
# Force CPU on macOS to avoid MPS issues
|
| 39 |
+
if torch.cuda.is_available():
|
| 40 |
+
self.device = torch.device("cuda")
|
| 41 |
+
else:
|
| 42 |
+
self.device = torch.device("cpu")
|
| 43 |
+
self.model = None
|
| 44 |
+
self.vocoder = None
|
| 45 |
+
self._ensure_loaded()
|
| 46 |
+
|
| 47 |
+
def _download(self, repo: str, filename: str) -> str:
|
| 48 |
+
try:
|
| 49 |
+
return hf_hub_download(repo_id=repo, filename=filename)
|
| 50 |
+
except Exception:
|
| 51 |
+
# запасной путь: snapshot целиком
|
| 52 |
+
local_dir = f"cache_{repo.replace('/', '_')}"
|
| 53 |
+
snap_dir = snapshot_download(repo_id=repo, local_dir=local_dir)
|
| 54 |
+
path = os.path.join(snap_dir, filename)
|
| 55 |
+
if not os.path.exists(path):
|
| 56 |
+
raise FileNotFoundError(f"{filename} not found in snapshot {snap_dir}")
|
| 57 |
+
return path
|
| 58 |
+
|
| 59 |
+
def _ensure_loaded(self):
|
| 60 |
+
# загрузка весов модели и словаря
|
| 61 |
+
model_path = self._download(self.model_id, self.model_file)
|
| 62 |
+
vocab_path = self._download(self.model_id, self.vocab_file)
|
| 63 |
+
# инициализация модели и вокодера
|
| 64 |
+
self.model = load_model(DiT, MODEL_CFG, model_path, vocab_file=vocab_path)
|
| 65 |
+
self.vocoder = load_vocoder()
|
| 66 |
+
# перенос на устройство
|
| 67 |
+
try:
|
| 68 |
+
self.model.to(self.device)
|
| 69 |
+
self.vocoder.to(self.device)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
# Fallback to CPU if device transfer fails
|
| 72 |
+
print(f"Warning: Failed to move model to {self.device}, falling back to CPU: {e}")
|
| 73 |
+
self.device = torch.device("cpu")
|
| 74 |
+
self.model.to(self.device)
|
| 75 |
+
self.vocoder.to(self.device)
|
| 76 |
+
|
| 77 |
+
def synthesize(
|
| 78 |
+
self,
|
| 79 |
+
text: str,
|
| 80 |
+
ref_audio_path: Optional[str],
|
| 81 |
+
ref_text: str,
|
| 82 |
+
speed: float = 1.0,
|
| 83 |
+
nfe_steps: int = 48,
|
| 84 |
+
seed: Optional[int] = None,
|
| 85 |
+
cross_fade_sec: float = 0.15,
|
| 86 |
+
target_rms: float = 0.1,
|
| 87 |
+
cfg_strength: float = 2.0,
|
| 88 |
+
sway_sampling_coef: float = -1.0,
|
| 89 |
+
) -> Tuple[np.ndarray, int]:
|
| 90 |
+
"""
|
| 91 |
+
Возвращает (audio_float32_mono, sample_rate).
|
| 92 |
+
Требования: float32 [-1..1], моно.
|
| 93 |
+
"""
|
| 94 |
+
if not text or not text.strip():
|
| 95 |
+
raise ValueError("Пустой текст для синтеза.")
|
| 96 |
+
if not ref_audio_path or not os.path.exists(ref_audio_path):
|
| 97 |
+
raise FileNotFoundError("Укажите путь к reference audio (6–12 с).")
|
| 98 |
+
if not ref_text or not ref_text.strip():
|
| 99 |
+
raise ValueError("Укажите reference text (транскрипт того же reference audio).")
|
| 100 |
+
|
| 101 |
+
if seed is not None:
|
| 102 |
+
torch.manual_seed(int(seed))
|
| 103 |
+
|
| 104 |
+
# Подготовка референса (функция сама сделает ресэмплинг/моно)
|
| 105 |
+
ref_audio_proc, ref_text_proc = preprocess_ref_audio_text(ref_audio_path, ref_text)
|
| 106 |
+
|
| 107 |
+
# Основной вызов инференса с дополнительными параметрами для улучшения качества голоса
|
| 108 |
+
final_wave, final_sample_rate, _ = infer_process(
|
| 109 |
+
ref_audio_proc,
|
| 110 |
+
ref_text_proc,
|
| 111 |
+
text,
|
| 112 |
+
self.model,
|
| 113 |
+
self.vocoder,
|
| 114 |
+
cross_fade_duration=float(cross_fade_sec),
|
| 115 |
+
nfe_step=int(nfe_steps),
|
| 116 |
+
speed=float(speed),
|
| 117 |
+
target_rms=float(target_rms),
|
| 118 |
+
cfg_strength=float(cfg_strength),
|
| 119 |
+
sway_sampling_coef=float(sway_sampling_coef),
|
| 120 |
+
)
|
| 121 |
+
|
| 122 |
+
# На всякий случай приводим тип/диапазон
|
| 123 |
+
wav = np.asarray(final_wave, dtype=np.float32)
|
| 124 |
+
wav = np.clip(wav, -1.0, 1.0)
|
| 125 |
+
sr = int(final_sample_rate)
|
| 126 |
+
|
| 127 |
+
# Освобождение памяти на CUDA (длинные книги)
|
| 128 |
+
if self.device.type == "cuda":
|
| 129 |
+
try:
|
| 130 |
+
torch.cuda.empty_cache()
|
| 131 |
+
gc.collect()
|
| 132 |
+
except Exception:
|
| 133 |
+
pass
|
| 134 |
+
|
| 135 |
+
return wav, sr
|
epub_processing_pipeline.py
ADDED
|
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
EPUB processing pipeline for Russian Audiobook Studio.
|
| 4 |
+
Integrates EPUB chapter detection with ESpeechBackend for TTS processing.
|
| 5 |
+
"""
|
| 6 |
+
import os
|
| 7 |
+
import time
|
| 8 |
+
import gc
|
| 9 |
+
from typing import List, Optional, Callable, Dict, Any, Tuple
|
| 10 |
+
from dataclasses import dataclass
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import numpy as np
|
| 13 |
+
import soundfile as sf
|
| 14 |
+
|
| 15 |
+
from epub_processor import EpubProcessor, Chapter
|
| 16 |
+
from backends.espeech_backend import EspeechBackend
|
| 17 |
+
from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent
|
| 18 |
+
from utils.audio import crossfade_concat, normalize_lufs, save_wav
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass
|
| 22 |
+
class ProcessingSettings:
|
| 23 |
+
"""Settings for EPUB processing pipeline."""
|
| 24 |
+
speed: float = 1.0
|
| 25 |
+
nfe_steps: int = 48
|
| 26 |
+
crossfade_ms: int = 150
|
| 27 |
+
target_lufs: float = -20.0
|
| 28 |
+
seed: Optional[int] = None
|
| 29 |
+
ref_audio_path: str = "samples/001/sample.mp3"
|
| 30 |
+
ref_text: str = ""
|
| 31 |
+
# Voice and tone parameters
|
| 32 |
+
target_rms: float = 0.1
|
| 33 |
+
cfg_strength: float = 2.0
|
| 34 |
+
sway_sampling_coef: float = -1.0
|
| 35 |
+
|
| 36 |
+
def __post_init__(self):
|
| 37 |
+
"""Load default reference text if not provided."""
|
| 38 |
+
if not self.ref_text:
|
| 39 |
+
try:
|
| 40 |
+
with open("samples/001/sample.text", "r", encoding="utf-8") as f:
|
| 41 |
+
self.ref_text = f.read().strip()
|
| 42 |
+
except FileNotFoundError:
|
| 43 |
+
self.ref_text = ""
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@dataclass
|
| 47 |
+
class ChapterProcessingResult:
|
| 48 |
+
"""Result of processing a single chapter."""
|
| 49 |
+
chapter_index: int
|
| 50 |
+
chapter_title: str
|
| 51 |
+
status: str # pending, processing, completed, error
|
| 52 |
+
audio_data: Optional[np.ndarray] = None
|
| 53 |
+
sample_rate: Optional[int] = None
|
| 54 |
+
processing_time: float = 0.0
|
| 55 |
+
error_message: Optional[str] = None
|
| 56 |
+
word_count: int = 0
|
| 57 |
+
estimated_duration: float = 0.0
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
@dataclass
|
| 61 |
+
class ProcessingStatistics:
|
| 62 |
+
"""Statistics for book processing."""
|
| 63 |
+
total_chapters: int = 0
|
| 64 |
+
completed_chapters: int = 0
|
| 65 |
+
failed_chapters: int = 0
|
| 66 |
+
total_processing_time: float = 0.0
|
| 67 |
+
average_processing_time: float = 0.0
|
| 68 |
+
total_audio_duration: float = 0.0
|
| 69 |
+
total_word_count: int = 0
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
class EpubProcessingPipeline:
|
| 73 |
+
"""Main pipeline for processing EPUB books into audiobooks."""
|
| 74 |
+
|
| 75 |
+
def __init__(self, epub_processor: EpubProcessor, tts_backend: EspeechBackend):
|
| 76 |
+
"""
|
| 77 |
+
Initialize the processing pipeline.
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
epub_processor: EPUB processor for chapter extraction
|
| 81 |
+
tts_backend: TTS backend for audio synthesis
|
| 82 |
+
"""
|
| 83 |
+
self.epub_processor = epub_processor
|
| 84 |
+
self.tts_backend = tts_backend
|
| 85 |
+
self.processing_settings = ProcessingSettings()
|
| 86 |
+
|
| 87 |
+
# Ensure backend is loaded
|
| 88 |
+
self.tts_backend._ensure_loaded()
|
| 89 |
+
|
| 90 |
+
def process_chapter(
|
| 91 |
+
self,
|
| 92 |
+
chapter: Chapter,
|
| 93 |
+
settings: ProcessingSettings,
|
| 94 |
+
progress_callback: Optional[Callable[[int, str, str], None]] = None,
|
| 95 |
+
cancellation_check: Optional[Callable[[], bool]] = None
|
| 96 |
+
) -> ChapterProcessingResult:
|
| 97 |
+
"""
|
| 98 |
+
Process a single chapter into audio.
|
| 99 |
+
|
| 100 |
+
Args:
|
| 101 |
+
chapter: Chapter to process
|
| 102 |
+
settings: Processing settings
|
| 103 |
+
progress_callback: Optional callback for progress updates
|
| 104 |
+
|
| 105 |
+
Returns:
|
| 106 |
+
ChapterProcessingResult with processing status and audio data
|
| 107 |
+
"""
|
| 108 |
+
start_time = time.time()
|
| 109 |
+
|
| 110 |
+
if progress_callback:
|
| 111 |
+
progress_callback(chapter.order, "processing", f"Processing {chapter.title}")
|
| 112 |
+
|
| 113 |
+
# Check for cancellation before starting
|
| 114 |
+
if cancellation_check and cancellation_check():
|
| 115 |
+
return ChapterProcessingResult(
|
| 116 |
+
chapter_index=chapter.order,
|
| 117 |
+
chapter_title=chapter.title,
|
| 118 |
+
status="cancelled",
|
| 119 |
+
processing_time=0.0,
|
| 120 |
+
error_message="Processing cancelled by user",
|
| 121 |
+
word_count=chapter.word_count,
|
| 122 |
+
estimated_duration=chapter.estimated_duration
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
# Normalize and prepare text
|
| 127 |
+
normalized_text = normalize_text(chapter.content)
|
| 128 |
+
paragraphs = split_into_paragraphs(normalized_text)
|
| 129 |
+
paragraphs = [maybe_ruaccent(p) for p in paragraphs]
|
| 130 |
+
|
| 131 |
+
if not paragraphs:
|
| 132 |
+
raise ValueError("No text content to process")
|
| 133 |
+
|
| 134 |
+
# Process each paragraph
|
| 135 |
+
audio_pieces = []
|
| 136 |
+
sample_rate = None
|
| 137 |
+
|
| 138 |
+
for i, paragraph in enumerate(paragraphs):
|
| 139 |
+
if not paragraph.strip():
|
| 140 |
+
continue
|
| 141 |
+
|
| 142 |
+
# Check for cancellation before each paragraph
|
| 143 |
+
if cancellation_check and cancellation_check():
|
| 144 |
+
return ChapterProcessingResult(
|
| 145 |
+
chapter_index=chapter.order,
|
| 146 |
+
chapter_title=chapter.title,
|
| 147 |
+
status="cancelled",
|
| 148 |
+
processing_time=time.time() - start_time,
|
| 149 |
+
error_message="Processing cancelled by user",
|
| 150 |
+
word_count=chapter.word_count,
|
| 151 |
+
estimated_duration=chapter.estimated_duration
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
# Synthesize audio for paragraph with enhanced voice parameters
|
| 155 |
+
audio, sr = self.tts_backend.synthesize(
|
| 156 |
+
text=paragraph,
|
| 157 |
+
ref_audio_path=settings.ref_audio_path,
|
| 158 |
+
ref_text=settings.ref_text,
|
| 159 |
+
speed=settings.speed,
|
| 160 |
+
nfe_steps=settings.nfe_steps,
|
| 161 |
+
seed=settings.seed,
|
| 162 |
+
cross_fade_sec=settings.crossfade_ms / 1000.0,
|
| 163 |
+
target_rms=settings.target_rms,
|
| 164 |
+
cfg_strength=settings.cfg_strength,
|
| 165 |
+
sway_sampling_coef=settings.sway_sampling_coef
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
audio_pieces.append(audio)
|
| 169 |
+
sample_rate = sr
|
| 170 |
+
|
| 171 |
+
# Memory cleanup after each paragraph
|
| 172 |
+
if i % 5 == 0: # Every 5 paragraphs
|
| 173 |
+
gc.collect()
|
| 174 |
+
|
| 175 |
+
if not audio_pieces:
|
| 176 |
+
raise ValueError("No audio generated")
|
| 177 |
+
|
| 178 |
+
# Concatenate audio pieces with crossfade
|
| 179 |
+
final_audio = crossfade_concat(
|
| 180 |
+
audio_pieces,
|
| 181 |
+
crossfade_ms=settings.crossfade_ms,
|
| 182 |
+
sample_rate=sample_rate
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# Normalize audio levels
|
| 186 |
+
final_audio = normalize_lufs(
|
| 187 |
+
final_audio,
|
| 188 |
+
sample_rate,
|
| 189 |
+
target_lufs=settings.target_lufs
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
processing_time = time.time() - start_time
|
| 193 |
+
|
| 194 |
+
if progress_callback:
|
| 195 |
+
progress_callback(chapter.order, "completed", f"Completed {chapter.title}")
|
| 196 |
+
|
| 197 |
+
return ChapterProcessingResult(
|
| 198 |
+
chapter_index=chapter.order,
|
| 199 |
+
chapter_title=chapter.title,
|
| 200 |
+
status="completed",
|
| 201 |
+
audio_data=final_audio,
|
| 202 |
+
sample_rate=sample_rate,
|
| 203 |
+
processing_time=processing_time,
|
| 204 |
+
word_count=chapter.word_count,
|
| 205 |
+
estimated_duration=chapter.estimated_duration
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
except Exception as e:
|
| 209 |
+
processing_time = time.time() - start_time
|
| 210 |
+
error_msg = f"Error processing {chapter.title}: {str(e)}"
|
| 211 |
+
|
| 212 |
+
if progress_callback:
|
| 213 |
+
progress_callback(chapter.order, "error", error_msg)
|
| 214 |
+
|
| 215 |
+
return ChapterProcessingResult(
|
| 216 |
+
chapter_index=chapter.order,
|
| 217 |
+
chapter_title=chapter.title,
|
| 218 |
+
status="error",
|
| 219 |
+
processing_time=processing_time,
|
| 220 |
+
error_message=error_msg,
|
| 221 |
+
word_count=chapter.word_count,
|
| 222 |
+
estimated_duration=chapter.estimated_duration
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
def process_chapter_with_retry(
|
| 226 |
+
self,
|
| 227 |
+
chapter: Chapter,
|
| 228 |
+
settings: ProcessingSettings,
|
| 229 |
+
max_retries: int = 2,
|
| 230 |
+
progress_callback: Optional[Callable[[int, str, str], None]] = None,
|
| 231 |
+
cancellation_check: Optional[Callable[[], bool]] = None
|
| 232 |
+
) -> ChapterProcessingResult:
|
| 233 |
+
"""
|
| 234 |
+
Process a chapter with retry mechanism for failed attempts.
|
| 235 |
+
|
| 236 |
+
Args:
|
| 237 |
+
chapter: Chapter to process
|
| 238 |
+
settings: Processing settings
|
| 239 |
+
max_retries: Maximum number of retry attempts
|
| 240 |
+
progress_callback: Optional callback for progress updates
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
ChapterProcessingResult with processing status
|
| 244 |
+
"""
|
| 245 |
+
last_result = None
|
| 246 |
+
|
| 247 |
+
for attempt in range(max_retries + 1):
|
| 248 |
+
if attempt > 0:
|
| 249 |
+
if progress_callback:
|
| 250 |
+
progress_callback(chapter.order, "processing", f"Retry {attempt} for {chapter.title}")
|
| 251 |
+
time.sleep(1) # Brief pause before retry
|
| 252 |
+
|
| 253 |
+
result = self.process_chapter(chapter, settings, progress_callback, cancellation_check)
|
| 254 |
+
last_result = result
|
| 255 |
+
|
| 256 |
+
if result.status == "completed":
|
| 257 |
+
return result
|
| 258 |
+
|
| 259 |
+
# All retries failed
|
| 260 |
+
if progress_callback:
|
| 261 |
+
progress_callback(chapter.order, "error", f"Failed after {max_retries} retries")
|
| 262 |
+
|
| 263 |
+
return last_result
|
| 264 |
+
|
| 265 |
+
def process_book(
|
| 266 |
+
self,
|
| 267 |
+
chapters: List[Chapter],
|
| 268 |
+
settings: ProcessingSettings,
|
| 269 |
+
progress_callback: Optional[Callable[[int, str, str], None]] = None,
|
| 270 |
+
max_retries: int = 2,
|
| 271 |
+
cancellation_check: Optional[Callable[[], bool]] = None
|
| 272 |
+
) -> List[ChapterProcessingResult]:
|
| 273 |
+
"""
|
| 274 |
+
Process an entire book with multiple chapters.
|
| 275 |
+
|
| 276 |
+
Args:
|
| 277 |
+
chapters: List of chapters to process
|
| 278 |
+
settings: Processing settings
|
| 279 |
+
progress_callback: Optional callback for progress updates
|
| 280 |
+
max_retries: Maximum retries per chapter
|
| 281 |
+
|
| 282 |
+
Returns:
|
| 283 |
+
List of ChapterProcessingResult objects
|
| 284 |
+
"""
|
| 285 |
+
results = []
|
| 286 |
+
|
| 287 |
+
for chapter in chapters:
|
| 288 |
+
# Check for cancellation before each chapter
|
| 289 |
+
if cancellation_check and cancellation_check():
|
| 290 |
+
break
|
| 291 |
+
|
| 292 |
+
# Update chapter status
|
| 293 |
+
self.epub_processor.update_chapter_status(chapters, chapter.order, "processing")
|
| 294 |
+
|
| 295 |
+
# Process chapter with retry
|
| 296 |
+
result = self.process_chapter_with_retry(
|
| 297 |
+
chapter,
|
| 298 |
+
settings,
|
| 299 |
+
max_retries,
|
| 300 |
+
progress_callback,
|
| 301 |
+
cancellation_check
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Update chapter status based on result
|
| 305 |
+
if result.status == "completed":
|
| 306 |
+
self.epub_processor.update_chapter_status(chapters, chapter.order, "completed")
|
| 307 |
+
else:
|
| 308 |
+
self.epub_processor.update_chapter_status(
|
| 309 |
+
chapters,
|
| 310 |
+
chapter.order,
|
| 311 |
+
"error",
|
| 312 |
+
result.error_message
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
results.append(result)
|
| 316 |
+
|
| 317 |
+
# Memory cleanup after each chapter
|
| 318 |
+
gc.collect()
|
| 319 |
+
|
| 320 |
+
return results
|
| 321 |
+
|
| 322 |
+
def concatenate_chapter_audio(self, results: List[ChapterProcessingResult]) -> Optional[np.ndarray]:
|
| 323 |
+
"""
|
| 324 |
+
Concatenate audio from multiple chapter results.
|
| 325 |
+
|
| 326 |
+
Args:
|
| 327 |
+
results: List of ChapterProcessingResult objects
|
| 328 |
+
|
| 329 |
+
Returns:
|
| 330 |
+
Concatenated audio array or None if no valid audio
|
| 331 |
+
"""
|
| 332 |
+
valid_audio = []
|
| 333 |
+
sample_rate = None
|
| 334 |
+
|
| 335 |
+
for result in results:
|
| 336 |
+
if result.status == "completed" and result.audio_data is not None:
|
| 337 |
+
valid_audio.append(result.audio_data)
|
| 338 |
+
if sample_rate is None:
|
| 339 |
+
sample_rate = result.sample_rate
|
| 340 |
+
|
| 341 |
+
if not valid_audio:
|
| 342 |
+
return None
|
| 343 |
+
|
| 344 |
+
# Concatenate with crossfade
|
| 345 |
+
return crossfade_concat(valid_audio, crossfade_ms=150, sample_rate=sample_rate)
|
| 346 |
+
|
| 347 |
+
def export_audiobook(
|
| 348 |
+
self,
|
| 349 |
+
results: List[ChapterProcessingResult],
|
| 350 |
+
output_path: str,
|
| 351 |
+
export_individual_chapters: bool = True
|
| 352 |
+
) -> Dict[str, str]:
|
| 353 |
+
"""
|
| 354 |
+
Export processed audiobook to files.
|
| 355 |
+
|
| 356 |
+
Args:
|
| 357 |
+
results: List of ChapterProcessingResult objects
|
| 358 |
+
output_path: Base path for output files
|
| 359 |
+
export_individual_chapters: Whether to export individual chapter files
|
| 360 |
+
|
| 361 |
+
Returns:
|
| 362 |
+
Dictionary with paths to exported files
|
| 363 |
+
"""
|
| 364 |
+
output_dir = Path(output_path).parent
|
| 365 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 366 |
+
|
| 367 |
+
exported_files = {}
|
| 368 |
+
sample_rate = None
|
| 369 |
+
|
| 370 |
+
# Export individual chapters
|
| 371 |
+
if export_individual_chapters:
|
| 372 |
+
for result in results:
|
| 373 |
+
if result.status == "completed" and result.audio_data is not None:
|
| 374 |
+
chapter_filename = f"chapter_{result.chapter_index:03d}_{result.chapter_title.replace(' ', '_')}.wav"
|
| 375 |
+
chapter_path = output_dir / chapter_filename
|
| 376 |
+
|
| 377 |
+
save_wav(str(chapter_path), result.audio_data, result.sample_rate)
|
| 378 |
+
exported_files[f"chapter_{result.chapter_index}"] = str(chapter_path)
|
| 379 |
+
|
| 380 |
+
if sample_rate is None:
|
| 381 |
+
sample_rate = result.sample_rate
|
| 382 |
+
|
| 383 |
+
# Export complete audiobook
|
| 384 |
+
concatenated_audio = self.concatenate_chapter_audio(results)
|
| 385 |
+
if concatenated_audio is not None:
|
| 386 |
+
# Use the exact output path specified by the user
|
| 387 |
+
complete_path = Path(output_path)
|
| 388 |
+
save_wav(str(complete_path), concatenated_audio, sample_rate)
|
| 389 |
+
exported_files["complete"] = str(complete_path)
|
| 390 |
+
|
| 391 |
+
return exported_files
|
| 392 |
+
|
| 393 |
+
def get_processing_statistics(self, results: List[ChapterProcessingResult]) -> ProcessingStatistics:
|
| 394 |
+
"""
|
| 395 |
+
Get processing statistics from results.
|
| 396 |
+
|
| 397 |
+
Args:
|
| 398 |
+
results: List of ChapterProcessingResult objects
|
| 399 |
+
|
| 400 |
+
Returns:
|
| 401 |
+
ProcessingStatistics object
|
| 402 |
+
"""
|
| 403 |
+
total_chapters = len(results)
|
| 404 |
+
completed_chapters = sum(1 for r in results if r.status == "completed")
|
| 405 |
+
failed_chapters = sum(1 for r in results if r.status == "error")
|
| 406 |
+
total_processing_time = sum(r.processing_time for r in results)
|
| 407 |
+
total_word_count = sum(r.word_count for r in results)
|
| 408 |
+
total_audio_duration = sum(r.estimated_duration for r in results)
|
| 409 |
+
|
| 410 |
+
average_processing_time = total_processing_time / total_chapters if total_chapters > 0 else 0.0
|
| 411 |
+
|
| 412 |
+
return ProcessingStatistics(
|
| 413 |
+
total_chapters=total_chapters,
|
| 414 |
+
completed_chapters=completed_chapters,
|
| 415 |
+
failed_chapters=failed_chapters,
|
| 416 |
+
total_processing_time=total_processing_time,
|
| 417 |
+
average_processing_time=average_processing_time,
|
| 418 |
+
total_audio_duration=total_audio_duration,
|
| 419 |
+
total_word_count=total_word_count
|
| 420 |
+
)
|
| 421 |
+
|
| 422 |
+
def cleanup(self):
|
| 423 |
+
"""Clean up resources."""
|
| 424 |
+
self.epub_processor.cleanup_temp_files()
|
| 425 |
+
gc.collect()
|
epub_processor.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
EPUB processing module for Russian Audiobook Studio.
|
| 4 |
+
Handles EPUB file validation, chapter extraction, and processing coordination.
|
| 5 |
+
"""
|
| 6 |
+
import os
|
| 7 |
+
import tempfile
|
| 8 |
+
from typing import List, Optional, Dict, Any
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
import zipfile
|
| 12 |
+
from ebooklib import epub
|
| 13 |
+
from ebooklib.epub import EpubException
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass
|
| 17 |
+
class Chapter:
|
| 18 |
+
"""Represents a chapter in an EPUB book."""
|
| 19 |
+
title: str
|
| 20 |
+
content: str
|
| 21 |
+
file_name: str
|
| 22 |
+
order: int
|
| 23 |
+
preview: str # First 100-200 characters for preview
|
| 24 |
+
status: str = "pending" # pending, processing, completed, error
|
| 25 |
+
word_count: int = 0
|
| 26 |
+
estimated_duration: float = 0.0 # Estimated duration in minutes
|
| 27 |
+
error_message: Optional[str] = None
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class EpubValidationResult:
|
| 32 |
+
"""Result of EPUB file validation."""
|
| 33 |
+
is_valid: bool
|
| 34 |
+
error_message: Optional[str]
|
| 35 |
+
chapters: List[Chapter]
|
| 36 |
+
book_title: Optional[str]
|
| 37 |
+
book_author: Optional[str]
|
| 38 |
+
total_chapters: int
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class EpubValidationError(Exception):
|
| 42 |
+
"""Custom exception for EPUB validation errors."""
|
| 43 |
+
pass
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class EpubValidator:
|
| 47 |
+
"""Validates EPUB files and extracts chapter information."""
|
| 48 |
+
|
| 49 |
+
MAX_FILE_SIZE = 500 * 1024 * 1024 # 500MB limit
|
| 50 |
+
MIN_PREVIEW_LENGTH = 100
|
| 51 |
+
MAX_PREVIEW_LENGTH = 200
|
| 52 |
+
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self.supported_extensions = ['.epub']
|
| 55 |
+
|
| 56 |
+
def validate_file(self, file_path: str) -> EpubValidationResult:
|
| 57 |
+
"""
|
| 58 |
+
Validate an EPUB file and extract chapter information.
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
file_path: Path to the EPUB file
|
| 62 |
+
|
| 63 |
+
Returns:
|
| 64 |
+
EpubValidationResult with validation status and chapter information
|
| 65 |
+
|
| 66 |
+
Raises:
|
| 67 |
+
EpubValidationError: If validation fails
|
| 68 |
+
"""
|
| 69 |
+
if not file_path:
|
| 70 |
+
return EpubValidationResult(
|
| 71 |
+
is_valid=False,
|
| 72 |
+
error_message="No file path provided",
|
| 73 |
+
chapters=[],
|
| 74 |
+
book_title=None,
|
| 75 |
+
book_author=None,
|
| 76 |
+
total_chapters=0
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
# Check if file exists
|
| 80 |
+
if not os.path.exists(file_path):
|
| 81 |
+
return EpubValidationResult(
|
| 82 |
+
is_valid=False,
|
| 83 |
+
error_message=f"File does not exist: {file_path}",
|
| 84 |
+
chapters=[],
|
| 85 |
+
book_title=None,
|
| 86 |
+
book_author=None,
|
| 87 |
+
total_chapters=0
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
# Check file extension
|
| 91 |
+
if not self._is_epub_file(file_path):
|
| 92 |
+
return EpubValidationResult(
|
| 93 |
+
is_valid=False,
|
| 94 |
+
error_message="File is not an EPUB file. Please upload a .epub file.",
|
| 95 |
+
chapters=[],
|
| 96 |
+
book_title=None,
|
| 97 |
+
book_author=None,
|
| 98 |
+
total_chapters=0
|
| 99 |
+
)
|
| 100 |
+
|
| 101 |
+
# Check file size
|
| 102 |
+
file_size = os.path.getsize(file_path)
|
| 103 |
+
if file_size == 0:
|
| 104 |
+
return EpubValidationResult(
|
| 105 |
+
is_valid=False,
|
| 106 |
+
error_message="File is empty",
|
| 107 |
+
chapters=[],
|
| 108 |
+
book_title=None,
|
| 109 |
+
book_author=None,
|
| 110 |
+
total_chapters=0
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
if file_size > self.MAX_FILE_SIZE:
|
| 114 |
+
return EpubValidationResult(
|
| 115 |
+
is_valid=False,
|
| 116 |
+
error_message=f"File is too large. Maximum size is {self.MAX_FILE_SIZE // (1024*1024)}MB",
|
| 117 |
+
chapters=[],
|
| 118 |
+
book_title=None,
|
| 119 |
+
book_author=None,
|
| 120 |
+
total_chapters=0
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Try to parse the EPUB
|
| 124 |
+
try:
|
| 125 |
+
return self._parse_epub(file_path)
|
| 126 |
+
except EpubException as e:
|
| 127 |
+
return EpubValidationResult(
|
| 128 |
+
is_valid=False,
|
| 129 |
+
error_message=f"Invalid EPUB file: {str(e)}",
|
| 130 |
+
chapters=[],
|
| 131 |
+
book_title=None,
|
| 132 |
+
book_author=None,
|
| 133 |
+
total_chapters=0
|
| 134 |
+
)
|
| 135 |
+
except Exception as e:
|
| 136 |
+
return EpubValidationResult(
|
| 137 |
+
is_valid=False,
|
| 138 |
+
error_message=f"Error reading EPUB file: {str(e)}",
|
| 139 |
+
chapters=[],
|
| 140 |
+
book_title=None,
|
| 141 |
+
book_author=None,
|
| 142 |
+
total_chapters=0
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
def _is_epub_file(self, file_path: str) -> bool:
|
| 146 |
+
"""Check if file has EPUB extension."""
|
| 147 |
+
return Path(file_path).suffix.lower() in self.supported_extensions
|
| 148 |
+
|
| 149 |
+
def _parse_epub(self, file_path: str) -> EpubValidationResult:
|
| 150 |
+
"""Parse EPUB file and extract chapter information."""
|
| 151 |
+
try:
|
| 152 |
+
book = epub.read_epub(file_path)
|
| 153 |
+
|
| 154 |
+
# Extract book metadata
|
| 155 |
+
book_title = book.get_metadata('DC', 'title')
|
| 156 |
+
book_author = book.get_metadata('DC', 'creator')
|
| 157 |
+
|
| 158 |
+
title = book_title[0][0] if book_title else "Unknown Title"
|
| 159 |
+
author = book_author[0][0] if book_author else "Unknown Author"
|
| 160 |
+
|
| 161 |
+
# Extract chapters
|
| 162 |
+
chapters = self._extract_chapters(book)
|
| 163 |
+
|
| 164 |
+
if not chapters:
|
| 165 |
+
return EpubValidationResult(
|
| 166 |
+
is_valid=False,
|
| 167 |
+
error_message="No readable chapters found in EPUB file",
|
| 168 |
+
chapters=[],
|
| 169 |
+
book_title=title,
|
| 170 |
+
book_author=author,
|
| 171 |
+
total_chapters=0
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
return EpubValidationResult(
|
| 175 |
+
is_valid=True,
|
| 176 |
+
error_message=None,
|
| 177 |
+
chapters=chapters,
|
| 178 |
+
book_title=title,
|
| 179 |
+
book_author=author,
|
| 180 |
+
total_chapters=len(chapters)
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
except Exception as e:
|
| 184 |
+
raise EpubValidationError(f"Failed to parse EPUB: {str(e)}")
|
| 185 |
+
|
| 186 |
+
def _extract_chapters(self, book: epub.EpubBook) -> List[Chapter]:
|
| 187 |
+
"""Extract chapters from EPUB book."""
|
| 188 |
+
chapters = []
|
| 189 |
+
chapter_order = 0
|
| 190 |
+
|
| 191 |
+
# Try to get items from spine first (reading order)
|
| 192 |
+
spine_items = []
|
| 193 |
+
if hasattr(book, 'spine') and book.spine:
|
| 194 |
+
for item_id, linear in book.spine:
|
| 195 |
+
if not linear:
|
| 196 |
+
continue
|
| 197 |
+
item = book.get_item_with_id(item_id)
|
| 198 |
+
if item:
|
| 199 |
+
spine_items.append(item)
|
| 200 |
+
|
| 201 |
+
# If no spine items, get all document items
|
| 202 |
+
if not spine_items:
|
| 203 |
+
spine_items = [item for item in book.get_items() if item.get_type() == 9] # 9 = HTML document type
|
| 204 |
+
|
| 205 |
+
# Process each item
|
| 206 |
+
for item in spine_items:
|
| 207 |
+
# Check if item is HTML content
|
| 208 |
+
if item.get_type() != 9: # 9 = HTML document type
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
+
# Extract text content
|
| 212 |
+
content = self._extract_text_content(item)
|
| 213 |
+
if not content or len(content.strip()) < 50: # Skip very short chapters
|
| 214 |
+
continue
|
| 215 |
+
|
| 216 |
+
# Create chapter
|
| 217 |
+
chapter = Chapter(
|
| 218 |
+
title=self._get_chapter_title(item, chapter_order),
|
| 219 |
+
content=content,
|
| 220 |
+
file_name=item.get_name(),
|
| 221 |
+
order=chapter_order,
|
| 222 |
+
preview=self._create_preview(content),
|
| 223 |
+
word_count=self._count_words(content),
|
| 224 |
+
estimated_duration=self._estimate_duration(content)
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
chapters.append(chapter)
|
| 228 |
+
chapter_order += 1
|
| 229 |
+
|
| 230 |
+
return chapters
|
| 231 |
+
|
| 232 |
+
def _extract_text_content(self, item) -> str:
|
| 233 |
+
"""Extract text content from EPUB item."""
|
| 234 |
+
try:
|
| 235 |
+
# Get content and handle different encodings
|
| 236 |
+
raw_content = item.get_content()
|
| 237 |
+
if isinstance(raw_content, bytes):
|
| 238 |
+
# Try different encodings
|
| 239 |
+
for encoding in ['utf-8', 'latin-1', 'cp1252']:
|
| 240 |
+
try:
|
| 241 |
+
content = raw_content.decode(encoding)
|
| 242 |
+
break
|
| 243 |
+
except UnicodeDecodeError:
|
| 244 |
+
continue
|
| 245 |
+
else:
|
| 246 |
+
# Fallback to utf-8 with errors='ignore'
|
| 247 |
+
content = raw_content.decode('utf-8', errors='ignore')
|
| 248 |
+
else:
|
| 249 |
+
content = str(raw_content)
|
| 250 |
+
|
| 251 |
+
# Basic HTML tag removal (simple approach)
|
| 252 |
+
import re
|
| 253 |
+
# Remove HTML tags
|
| 254 |
+
content = re.sub(r'<[^>]+>', '', content)
|
| 255 |
+
# Clean up whitespace
|
| 256 |
+
content = re.sub(r'\s+', ' ', content).strip()
|
| 257 |
+
return content
|
| 258 |
+
except Exception as e:
|
| 259 |
+
print(f"Warning: Could not extract content from {item.get_name()}: {e}")
|
| 260 |
+
return ""
|
| 261 |
+
|
| 262 |
+
def _get_chapter_title(self, item, order: int) -> str:
|
| 263 |
+
"""Get chapter title from item or generate default."""
|
| 264 |
+
# Try to extract title from content
|
| 265 |
+
try:
|
| 266 |
+
raw_content = item.get_content()
|
| 267 |
+
if isinstance(raw_content, bytes):
|
| 268 |
+
content = raw_content.decode('utf-8', errors='ignore')
|
| 269 |
+
else:
|
| 270 |
+
content = str(raw_content)
|
| 271 |
+
|
| 272 |
+
import re
|
| 273 |
+
# Look for h1, h2, h3 tags
|
| 274 |
+
title_match = re.search(r'<h[1-3][^>]*>([^<]+)</h[1-3]>', content, re.IGNORECASE)
|
| 275 |
+
if title_match:
|
| 276 |
+
title = title_match.group(1).strip()
|
| 277 |
+
# Clean up the title
|
| 278 |
+
title = re.sub(r'<[^>]+>', '', title) # Remove any remaining HTML tags
|
| 279 |
+
title = re.sub(r'\s+', ' ', title).strip() # Clean whitespace
|
| 280 |
+
if title:
|
| 281 |
+
return title
|
| 282 |
+
except Exception:
|
| 283 |
+
pass
|
| 284 |
+
|
| 285 |
+
# Try to get title from item metadata
|
| 286 |
+
try:
|
| 287 |
+
if hasattr(item, 'title') and item.title:
|
| 288 |
+
return str(item.title)
|
| 289 |
+
except Exception:
|
| 290 |
+
pass
|
| 291 |
+
|
| 292 |
+
# Fallback to file name or default
|
| 293 |
+
file_name = item.get_name()
|
| 294 |
+
if file_name:
|
| 295 |
+
# Clean up file name to make it more readable
|
| 296 |
+
clean_name = Path(file_name).stem
|
| 297 |
+
clean_name = clean_name.replace('_', ' ').replace('-', ' ')
|
| 298 |
+
clean_name = re.sub(r'\d+', '', clean_name) # Remove numbers
|
| 299 |
+
clean_name = clean_name.strip()
|
| 300 |
+
if clean_name:
|
| 301 |
+
return clean_name.title()
|
| 302 |
+
|
| 303 |
+
return f"Chapter {order + 1}"
|
| 304 |
+
|
| 305 |
+
def _create_preview(self, content: str) -> str:
|
| 306 |
+
"""Create preview text from chapter content."""
|
| 307 |
+
if not content:
|
| 308 |
+
return ""
|
| 309 |
+
|
| 310 |
+
# Clean content for preview
|
| 311 |
+
preview = content.strip()
|
| 312 |
+
|
| 313 |
+
# Truncate to reasonable length
|
| 314 |
+
if len(preview) > self.MAX_PREVIEW_LENGTH:
|
| 315 |
+
preview = preview[:self.MAX_PREVIEW_LENGTH]
|
| 316 |
+
# Try to end at a sentence boundary
|
| 317 |
+
last_period = preview.rfind('.')
|
| 318 |
+
if last_period > self.MIN_PREVIEW_LENGTH:
|
| 319 |
+
preview = preview[:last_period + 1]
|
| 320 |
+
else:
|
| 321 |
+
preview = preview + "..."
|
| 322 |
+
|
| 323 |
+
return preview
|
| 324 |
+
|
| 325 |
+
def _count_words(self, content: str) -> int:
|
| 326 |
+
"""Count words in content."""
|
| 327 |
+
if not content:
|
| 328 |
+
return 0
|
| 329 |
+
|
| 330 |
+
# Simple word counting - split by whitespace and filter empty strings
|
| 331 |
+
words = [word for word in content.split() if word.strip()]
|
| 332 |
+
return len(words)
|
| 333 |
+
|
| 334 |
+
def _estimate_duration(self, content: str) -> float:
|
| 335 |
+
"""Estimate audio duration in minutes based on content length."""
|
| 336 |
+
if not content:
|
| 337 |
+
return 0.0
|
| 338 |
+
|
| 339 |
+
# Estimate based on average reading speed
|
| 340 |
+
# Russian text: ~150-200 words per minute for speech synthesis
|
| 341 |
+
# We'll use 180 words per minute as a reasonable estimate
|
| 342 |
+
word_count = self._count_words(content)
|
| 343 |
+
duration_minutes = word_count / 180.0
|
| 344 |
+
|
| 345 |
+
# Add some buffer for processing time
|
| 346 |
+
return round(duration_minutes * 1.1, 1)
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
class EpubProcessor:
|
| 350 |
+
"""Main EPUB processor for handling EPUB files in the web interface."""
|
| 351 |
+
|
| 352 |
+
def __init__(self):
|
| 353 |
+
self.validator = EpubValidator()
|
| 354 |
+
self.temp_dir = tempfile.mkdtemp(prefix="epub_processing_")
|
| 355 |
+
|
| 356 |
+
def process_epub_upload(self, file_path: str) -> EpubValidationResult:
|
| 357 |
+
"""
|
| 358 |
+
Process an uploaded EPUB file.
|
| 359 |
+
|
| 360 |
+
Args:
|
| 361 |
+
file_path: Path to uploaded EPUB file
|
| 362 |
+
|
| 363 |
+
Returns:
|
| 364 |
+
EpubValidationResult with validation status and chapter information
|
| 365 |
+
"""
|
| 366 |
+
return self.validator.validate_file(file_path)
|
| 367 |
+
|
| 368 |
+
def update_chapter_status(self, chapters: List[Chapter], chapter_index: int, status: str, error_message: Optional[str] = None):
|
| 369 |
+
"""Update the status of a specific chapter."""
|
| 370 |
+
if 0 <= chapter_index < len(chapters):
|
| 371 |
+
chapters[chapter_index].status = status
|
| 372 |
+
if error_message:
|
| 373 |
+
chapters[chapter_index].error_message = error_message
|
| 374 |
+
|
| 375 |
+
def get_chapter_status_summary(self, chapters: List[Chapter]) -> Dict[str, int]:
|
| 376 |
+
"""Get a summary of chapter statuses."""
|
| 377 |
+
summary = {"pending": 0, "processing": 0, "completed": 0, "error": 0}
|
| 378 |
+
for chapter in chapters:
|
| 379 |
+
if chapter.status in summary:
|
| 380 |
+
summary[chapter.status] += 1
|
| 381 |
+
return summary
|
| 382 |
+
|
| 383 |
+
def get_total_estimated_duration(self, chapters: List[Chapter]) -> float:
|
| 384 |
+
"""Get total estimated duration for all chapters."""
|
| 385 |
+
return sum(chapter.estimated_duration for chapter in chapters)
|
| 386 |
+
|
| 387 |
+
def get_total_word_count(self, chapters: List[Chapter]) -> int:
|
| 388 |
+
"""Get total word count for all chapters."""
|
| 389 |
+
return sum(chapter.word_count for chapter in chapters)
|
| 390 |
+
|
| 391 |
+
def cleanup_temp_files(self):
|
| 392 |
+
"""Clean up temporary files."""
|
| 393 |
+
import shutil
|
| 394 |
+
try:
|
| 395 |
+
shutil.rmtree(self.temp_dir, ignore_errors=True)
|
| 396 |
+
except Exception:
|
| 397 |
+
pass
|
| 398 |
+
|
| 399 |
+
def __del__(self):
|
| 400 |
+
"""Cleanup on destruction."""
|
| 401 |
+
self.cleanup_temp_files()
|
requirements.txt
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies for Hugging Face Spaces deployment
|
| 2 |
+
gradio>=4.44.0
|
| 3 |
+
numpy>=1.24.0
|
| 4 |
+
soundfile>=0.12.1
|
| 5 |
+
pydub>=0.25.1
|
| 6 |
+
librosa>=0.10.1
|
| 7 |
+
tqdm>=4.66.0
|
| 8 |
+
pyloudnorm>=0.1.1
|
| 9 |
+
|
| 10 |
+
# Russian linguistics for automatic accent placement
|
| 11 |
+
ruaccent>=1.5.7
|
| 12 |
+
|
| 13 |
+
# Hugging Face / TTS stack
|
| 14 |
+
huggingface_hub>=0.23.0
|
| 15 |
+
f5-tts>=0.1.1
|
| 16 |
+
datasets>=2.20.0
|
| 17 |
+
|
| 18 |
+
# Audiobook generation and metadata
|
| 19 |
+
mutagen>=1.47.0
|
| 20 |
+
|
| 21 |
+
# PyTorch for Hugging Face Spaces (CPU version for compatibility)
|
| 22 |
+
torch>=2.0.0
|
| 23 |
+
torchaudio>=2.0.0
|
| 24 |
+
|
| 25 |
+
# Additional dependencies for EPUB processing
|
| 26 |
+
ebooklib>=0.18
|
| 27 |
+
beautifulsoup4>=4.12.0
|
| 28 |
+
lxml>=4.9.0
|
| 29 |
+
|
| 30 |
+
# Audio processing
|
| 31 |
+
scipy>=1.10.0
|
samples/001/sample.text
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
— До свидания, дорогая.
|
| 2 |
+
— До свидания, любимый.
|
| 3 |
+
Аликс Мартин стояла, прислонясь к маленькой, грубо сколоченной калитке, и смотрела вслед му
|
synth_batch.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import argparse, os, sys, re
|
| 3 |
+
import numpy as np
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
from tqdm import tqdm
|
| 6 |
+
from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent
|
| 7 |
+
from utils.audio import crossfade_concat, normalize_lufs, save_wav
|
| 8 |
+
from backends.espeech_backend import EspeechBackend
|
| 9 |
+
|
| 10 |
+
def read_input(path: str) -> str:
|
| 11 |
+
if path.lower().endswith(".txt"):
|
| 12 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 13 |
+
return f.read()
|
| 14 |
+
elif path.lower().endswith(".epub"):
|
| 15 |
+
try:
|
| 16 |
+
from ebooklib import epub
|
| 17 |
+
from bs4 import BeautifulSoup
|
| 18 |
+
except Exception:
|
| 19 |
+
print("Для EPUB установите: pip install ebooklib beautifulsoup4 lxml", file=sys.stderr)
|
| 20 |
+
sys.exit(2)
|
| 21 |
+
book = epub.read_epub(path)
|
| 22 |
+
texts = []
|
| 23 |
+
for item in book.get_items():
|
| 24 |
+
if item.get_type() == 9: # DOCUMENT
|
| 25 |
+
soup = BeautifulSoup(item.get_body_content(), "lxml")
|
| 26 |
+
texts.append(soup.get_text(" ", strip=True))
|
| 27 |
+
return "\n\n".join(texts)
|
| 28 |
+
else:
|
| 29 |
+
raise ValueError("Поддерживаются .txt и .epub")
|
| 30 |
+
|
| 31 |
+
def load_default_ref_text():
|
| 32 |
+
"""Load default reference text from local sample"""
|
| 33 |
+
try:
|
| 34 |
+
with open("samples/001/sample.text", "r", encoding="utf-8") as f:
|
| 35 |
+
return f.read().strip()
|
| 36 |
+
except FileNotFoundError:
|
| 37 |
+
return ""
|
| 38 |
+
|
| 39 |
+
def main():
|
| 40 |
+
ap = argparse.ArgumentParser()
|
| 41 |
+
ap.add_argument("--input", required=True, help="Путь к TXT/EPUB")
|
| 42 |
+
ap.add_argument("--outdir", required=True, help="Каталог для результата")
|
| 43 |
+
ap.add_argument("--ref-audio", required=False, default="samples/001/sample.mp3", help="Путь к референс-аудио (6–12с)")
|
| 44 |
+
ap.add_argument("--ref-text", required=False, default=load_default_ref_text(), help="Референс-текст")
|
| 45 |
+
ap.add_argument("--model-repo", default=os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2"))
|
| 46 |
+
ap.add_argument("--speed", type=float, default=1.0)
|
| 47 |
+
ap.add_argument("--nfe-steps", type=int, default=48)
|
| 48 |
+
ap.add_argument("--crossfade-ms", type=int, default=150)
|
| 49 |
+
ap.add_argument("--target-lufs", type=float, default=-20.0)
|
| 50 |
+
args = ap.parse_args()
|
| 51 |
+
|
| 52 |
+
os.makedirs(args.outdir, exist_ok=True)
|
| 53 |
+
|
| 54 |
+
backend = EspeechBackend(model_id=args.model_repo)
|
| 55 |
+
|
| 56 |
+
raw = read_input(args.input)
|
| 57 |
+
text = normalize_text(raw)
|
| 58 |
+
paragraphs = split_into_paragraphs(text)
|
| 59 |
+
paragraphs = [maybe_ruaccent(p) for p in paragraphs]
|
| 60 |
+
|
| 61 |
+
print(f"Абзацев: {len(paragraphs)}")
|
| 62 |
+
|
| 63 |
+
pieces = []
|
| 64 |
+
sr = None
|
| 65 |
+
for i, para in enumerate(tqdm(paragraphs, desc="Генерация")):
|
| 66 |
+
audio, sr = backend.synthesize(
|
| 67 |
+
text=para,
|
| 68 |
+
ref_audio_path=args.ref_audio,
|
| 69 |
+
ref_text=args.ref_text,
|
| 70 |
+
speed=args.speed,
|
| 71 |
+
nfe_steps=args.nfe_steps,
|
| 72 |
+
seed=None,
|
| 73 |
+
)
|
| 74 |
+
pieces.append(audio)
|
| 75 |
+
# Сохраняем черновики по абзацам (опционально)
|
| 76 |
+
# sf.write(os.path.join(args.outdir, f"para_{i:05d}.wav"), audio, sr)
|
| 77 |
+
|
| 78 |
+
final = crossfade_concat(pieces, crossfade_ms=args.crossfade_ms, sample_rate=sr)
|
| 79 |
+
final = normalize_lufs(final, sr, target_lufs=args.target_lufs)
|
| 80 |
+
|
| 81 |
+
out_path = os.path.join(args.outdir, "book.wav")
|
| 82 |
+
save_wav(out_path, final, sr)
|
| 83 |
+
print(f"ГОТОВО: {out_path}")
|
| 84 |
+
|
| 85 |
+
if __name__ == "__main__":
|
| 86 |
+
main()
|
utils/audio.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils/audio.py
|
| 2 |
+
from typing import List
|
| 3 |
+
import numpy as np
|
| 4 |
+
import soundfile as sf
|
| 5 |
+
from pydub import AudioSegment
|
| 6 |
+
import pyloudnorm as pyln
|
| 7 |
+
|
| 8 |
+
def _to_audiosegment(wav: np.ndarray, sr: int) -> AudioSegment:
|
| 9 |
+
wav16 = np.int16(np.clip(wav, -1.0, 1.0) * 32767.0)
|
| 10 |
+
return AudioSegment(
|
| 11 |
+
wav16.tobytes(), frame_rate=sr, sample_width=2, channels=1
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
def crossfade_concat(pieces: List[np.ndarray], crossfade_ms: int, sample_rate: int) -> np.ndarray:
|
| 15 |
+
if not pieces:
|
| 16 |
+
return np.zeros(1, dtype=np.float32)
|
| 17 |
+
seg = _to_audiosegment(pieces[0], sample_rate)
|
| 18 |
+
for p in pieces[1:]:
|
| 19 |
+
seg_next = _to_audiosegment(p, sample_rate)
|
| 20 |
+
seg = seg.append(seg_next, crossfade=crossfade_ms)
|
| 21 |
+
# Вернём в float32 [-1..1]
|
| 22 |
+
arr = np.array(seg.get_array_of_samples()).astype(np.int16).astype(np.float32) / 32767.0
|
| 23 |
+
return arr
|
| 24 |
+
|
| 25 |
+
def normalize_lufs(wav: np.ndarray, sr: int, target_lufs: float = -20.0) -> np.ndarray:
|
| 26 |
+
meter = pyln.Meter(sr)
|
| 27 |
+
loudness = meter.integrated_loudness(wav)
|
| 28 |
+
gain = target_lufs - loudness
|
| 29 |
+
wav_norm = pyln.normalize.loudness(wav, loudness, target_lufs)
|
| 30 |
+
# Клиппинг-сафети
|
| 31 |
+
wav_norm = np.clip(wav_norm, -0.999, 0.999).astype(np.float32)
|
| 32 |
+
return wav_norm
|
| 33 |
+
|
| 34 |
+
def save_wav(path: str, wav: np.ndarray, sr: int):
|
| 35 |
+
sf.write(path, wav, sr, subtype="PCM_16")
|
utils/text.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# utils/text.py
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
def normalize_text(text: str) -> str:
|
| 5 |
+
# Простая нормализация: убираем лишние пробелы, приводим переносы.
|
| 6 |
+
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
| 7 |
+
text = re.sub(r"[ \t]+", " ", text)
|
| 8 |
+
text = re.sub(r"\n{3,}", "\n\n", text).strip()
|
| 9 |
+
return text
|
| 10 |
+
|
| 11 |
+
def split_into_paragraphs(text: str):
|
| 12 |
+
# Делим по пустым строкам; при желании можно делить по длине/точкам.
|
| 13 |
+
parts = [p.strip() for p in text.split("\n\n")]
|
| 14 |
+
parts = [p for p in parts if p]
|
| 15 |
+
return parts
|
| 16 |
+
|
| 17 |
+
def maybe_ruaccent(text: str) -> str:
|
| 18 |
+
"""Если установлен ruaccent, проставить ударения автоматически.
|
| 19 |
+
Если нет — вернуть исходный текст.
|
| 20 |
+
"""
|
| 21 |
+
try:
|
| 22 |
+
from ruaccent import RUAccent
|
| 23 |
+
acc = RUAccent()
|
| 24 |
+
# Use turbo3.1 model for better accuracy and speed
|
| 25 |
+
acc.load(omograph_model_size="turbo3.1", use_dictionary=True, tiny_mode=False)
|
| 26 |
+
return acc.process_all(text)
|
| 27 |
+
except Exception as e:
|
| 28 |
+
print(f"Warning: ruaccent failed, using original text: {e}")
|
| 29 |
+
return text
|
| 30 |
+
|
| 31 |
+
def maybe_ruaccent_advanced(text: str, model_size: str = "turbo3.1", use_dict: bool = True) -> str:
|
| 32 |
+
"""Enhanced ruaccent with configurable parameters for better voice quality."""
|
| 33 |
+
try:
|
| 34 |
+
from ruaccent import RUAccent
|
| 35 |
+
acc = RUAccent()
|
| 36 |
+
acc.load(omograph_model_size=model_size, use_dictionary=use_dict, tiny_mode=False)
|
| 37 |
+
return acc.process_all(text)
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"Warning: ruaccent failed, using original text: {e}")
|
| 40 |
+
return text
|