danilahs commited on
Commit
4f6648e
·
verified ·
1 Parent(s): e50ddee

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ .Python
7
+ env/
8
+ venv/
9
+ .venv/
10
+ pip-log.txt
11
+ pip-delete-this-directory.txt
12
+ .tox/
13
+ .coverage
14
+ .coverage.*
15
+ .cache
16
+ nosetests.xml
17
+ coverage.xml
18
+ *.cover
19
+ *.log
20
+ .git
21
+ .mypy_cache
22
+ .pytest_cache
23
+ .hypothesis
24
+
25
+ # Audio files
26
+ temp_audio/
27
+ output/
28
+ *.wav
29
+ *.mp3
30
+ *.m4a
31
+
32
+ # Model cache
33
+ .cache/
34
+ models/
35
+ checkpoints/
36
+
37
+ # Temporary files
38
+ *.tmp
39
+ *.temp
40
+ .DS_Store
41
+ Thumbs.db
README.md CHANGED
@@ -1,14 +1,77 @@
1
  ---
2
- title: Audiobook Ru Tts
3
- emoji: 🚀
4
- colorFrom: gray
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 5.44.1
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
- short_description: 'Create high-quality Russian audiobooks from EPUB files with '
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Russian Audiobook Studio
3
+ emoji: 🎧
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Create Russian audiobooks from EPUB files with advanced TTS
12
  ---
13
 
14
+ # 🎧 Russian Audiobook Studio
15
+
16
+ Transform your Russian EPUB books into professional-quality audiobooks using advanced Text-to-Speech technology with automatic accent placement and voice customization.
17
+
18
+ ## ✨ Features
19
+
20
+ - **📚 EPUB Processing**: Upload EPUB files and automatically extract chapters
21
+ - **🎯 Chapter Selection**: Choose specific chapters to process with checkbox interface
22
+ - **🎵 Advanced TTS**: High-quality Russian TTS with automatic accent placement using ruaccent
23
+ - **🎛️ Voice Controls**: Customize voice clarity, variation, and volume levels
24
+ - **🎧 Audio Preview**: Built-in audio players for chapter previews
25
+ - **📥 Download Options**: Download individual chapters or complete audiobook
26
+ - **📱 Responsive Design**: Works on desktop and mobile devices
27
+ - **♿ Accessibility**: Full keyboard navigation and screen reader support
28
+
29
+ ## 🚀 How to Use
30
+
31
+ 1. **Upload EPUB**: Click "Choose File" and select your Russian EPUB book
32
+ 2. **Validate**: Click "Validate EPUB" to extract chapters and book information
33
+ 3. **Select Chapters**: Choose which chapters to process using the checkbox interface
34
+ 4. **Adjust Settings**: Customize voice parameters in Advanced Settings
35
+ 5. **Process**: Click "Process Selected Chapters" to generate audiobook
36
+ 6. **Preview & Download**: Listen to previews and download your audiobook
37
+
38
+ ## ⚙️ Advanced Settings
39
+
40
+ - **Speech Speed**: Adjust playback speed (0.6x to 1.4x)
41
+ - **Quality Steps**: Higher values = better quality, longer processing time
42
+ - **Voice Clarity**: Control how closely voice follows reference (1.0-4.0)
43
+ - **Voice Variation**: Control natural voice variation (-2.0 to 1.0)
44
+ - **Volume Level**: Target volume level for generated audio
45
+
46
+ ## 🎯 Technical Features
47
+
48
+ - **Automatic Accent Placement**: Uses ruaccent library for perfect Russian pronunciation
49
+ - **Professional Audio Quality**: 24kHz, 16-bit, mono WAV output
50
+ - **Real-time Progress**: Live updates during processing with cancellation support
51
+ - **Error Handling**: Robust error handling with detailed status messages
52
+ - **File Management**: Automatic file serving and download functionality
53
+
54
+ ## 📋 Requirements
55
+
56
+ - Russian EPUB files with proper text encoding
57
+ - Reference audio sample (6-12 seconds) for voice cloning
58
+ - Reference text (transcript of the reference audio)
59
+
60
+ ## 🔧 Technical Stack
61
+
62
+ - **TTS Engine**: ESpeech-TTS-1_RL-V2 (F5-TTS)
63
+ - **Accent Processing**: ruaccent for automatic Russian stress placement
64
+ - **Audio Processing**: librosa, soundfile, pydub
65
+ - **Web Interface**: Gradio with responsive design
66
+ - **EPUB Processing**: ebooklib for book parsing
67
+
68
+ ## 📝 Notes
69
+
70
+ - Processing time depends on chapter length and quality settings
71
+ - Higher quality settings require more processing time
72
+ - All audio files are temporarily stored for preview and download
73
+ - The system automatically handles Russian text normalization and accent placement
74
+
75
+ ## 🎉 Ready to Create Audiobooks?
76
+
77
+ Upload your Russian EPUB file and start creating professional-quality audiobooks in minutes!
app.py ADDED
@@ -0,0 +1,725 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import gradio as gr
4
+ import numpy as np
5
+ import soundfile as sf
6
+ from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent, maybe_ruaccent_advanced
7
+ from utils.audio import crossfade_concat, normalize_lufs, save_wav
8
+ from backends.espeech_backend import EspeechBackend
9
+ from epub_processor import EpubProcessor
10
+ from epub_processing_pipeline import EpubProcessingPipeline, ProcessingSettings
11
+
12
+ DEFAULT_MODEL = os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2")
13
+
14
+ backend = EspeechBackend(model_id=DEFAULT_MODEL)
15
+ epub_processor = EpubProcessor()
16
+ processing_pipeline = EpubProcessingPipeline(epub_processor, backend)
17
+
18
+ # Global processing state
19
+ processing_cancelled = False
20
+
21
+ # Load default reference text from local sample
22
+ def load_default_ref_text():
23
+ try:
24
+ with open("samples/001/sample.text", "r", encoding="utf-8") as f:
25
+ return f.read().strip()
26
+ except FileNotFoundError:
27
+ return ""
28
+
29
+ # EPUB upload validation function
30
+ def validate_epub_upload(file_path):
31
+ """Validate uploaded EPUB file and return status."""
32
+ if not file_path:
33
+ return (
34
+ "📁 **Upload Status:** No file uploaded",
35
+ "⏳ **Validation:** Waiting for file upload",
36
+ "⏳ **Processing:** Not started",
37
+ None, None, None, [], []
38
+ )
39
+
40
+ try:
41
+ # Update validation status
42
+ validation_status = "🔄 **Validation:** Processing EPUB file..."
43
+
44
+ result = epub_processor.process_epub_upload(file_path)
45
+
46
+ if result.is_valid:
47
+ # Create enhanced chapter list for display
48
+ chapter_list = []
49
+ total_words = 0
50
+ total_duration = 0.0
51
+
52
+ for i, chapter in enumerate(result.chapters):
53
+ total_words += chapter.word_count
54
+ total_duration += chapter.estimated_duration
55
+
56
+ # Create chapter info with metadata
57
+ chapter_info = f"**{i+1}. {chapter.title}**\n"
58
+ chapter_info += f"📊 Words: {chapter.word_count:,} | ⏱️ Est. Duration: {chapter.estimated_duration:.1f} min\n"
59
+ chapter_info += f"📄 Preview: {chapter.preview}\n"
60
+ chapter_info += f"📁 File: {chapter.file_name}"
61
+
62
+ chapter_list.append(chapter_info)
63
+
64
+ chapter_display = "\n\n".join(chapter_list)
65
+
66
+ upload_status = f"✅ **Upload Status:** EPUB file uploaded successfully"
67
+ validation_status = f"✅ **Validation:** EPUB file is valid and ready for processing"
68
+ processing_status = f"🚀 **Processing:** Ready to process {result.total_chapters} chapters"
69
+
70
+ # Enhanced status message with statistics
71
+ status_message = f"""**📚 Book Information:**
72
+ **Title:** {result.book_title}
73
+ **Author:** {result.book_author}
74
+ **Chapters:** {result.total_chapters}
75
+ **Total Words:** {total_words:,}
76
+ **Estimated Total Duration:** {total_duration:.1f} minutes
77
+
78
+ **📖 Chapter Details:**
79
+ {chapter_display}"""
80
+
81
+ # Create chapter selection interface
82
+ chapter_choices, default_selection = create_chapter_selection_interface(result.chapters)
83
+
84
+ return (
85
+ upload_status,
86
+ validation_status,
87
+ processing_status,
88
+ status_message,
89
+ result.chapters,
90
+ result.book_title,
91
+ result.book_author,
92
+ chapter_choices,
93
+ default_selection
94
+ )
95
+ else:
96
+ upload_status = f"❌ **Upload Status:** EPUB file validation failed"
97
+ validation_status = f"❌ **Validation:** {result.error_message}"
98
+ processing_status = f"⏳ **Processing:** Cannot proceed due to validation error"
99
+
100
+ return (
101
+ upload_status,
102
+ validation_status,
103
+ processing_status,
104
+ f"**Error Details:** {result.error_message}",
105
+ None, None, None, [], []
106
+ )
107
+
108
+ except Exception as e:
109
+ upload_status = f"❌ **Upload Status:** Error processing file"
110
+ validation_status = f"❌ **Validation:** {str(e)}"
111
+ processing_status = f"⏳ **Processing:** Cannot proceed due to error"
112
+
113
+ return (
114
+ upload_status,
115
+ validation_status,
116
+ processing_status,
117
+ f"**Error Details:** {str(e)}",
118
+ None, None, None, [], []
119
+ )
120
+
121
+
122
+ def stop_epub_processing():
123
+ """Stop the current EPUB processing."""
124
+ global processing_cancelled
125
+ processing_cancelled = True
126
+ return "🛑 **Processing Stopped:** User requested to stop processing"
127
+
128
+ def create_chapter_selection_interface(chapters):
129
+ """Create chapter selection interface data structure."""
130
+ if not chapters:
131
+ return [], []
132
+
133
+ # Create choices for checkbox group
134
+ choices = []
135
+ for i, chapter in enumerate(chapters):
136
+ choice_text = f"{i+1}. {chapter.title}"
137
+ choices.append(choice_text)
138
+
139
+ # All chapters selected by default (return the choice strings, not indices)
140
+ default_selection = choices # Select all choices by default
141
+
142
+ return choices, default_selection
143
+
144
+ def get_selected_chapters(chapters, selected_choices):
145
+ """Get the actual chapter objects for selected choices."""
146
+ if not chapters or not selected_choices:
147
+ return []
148
+
149
+ selected_chapters = []
150
+ for choice in selected_choices:
151
+ # Extract chapter index from choice string (format: "1. Chapter Title")
152
+ try:
153
+ chapter_index = int(choice.split('.')[0]) - 1 # Convert to 0-based index
154
+ if 0 <= chapter_index < len(chapters):
155
+ selected_chapters.append(chapters[chapter_index])
156
+ except (ValueError, IndexError):
157
+ # Skip invalid choices
158
+ continue
159
+
160
+ return selected_chapters
161
+
162
+ def update_chapter_selection_interface(chapters, choices, default_selection):
163
+ """Update the chapter selection interface after EPUB validation."""
164
+ if chapters is not None:
165
+ selection_summary = get_selection_summary(default_selection, len(chapters))
166
+ return (
167
+ gr.update(visible=True),
168
+ gr.update(visible=False),
169
+ gr.update(choices=choices, value=default_selection, visible=True),
170
+ gr.update(visible=True), # Show selection buttons
171
+ gr.update(value=f"📊 **Selection:** {selection_summary}", visible=True)
172
+ )
173
+ else:
174
+ return (
175
+ gr.update(visible=False),
176
+ gr.update(visible=False),
177
+ gr.update(choices=[], value=[], visible=False),
178
+ gr.update(visible=False), # Hide selection buttons
179
+ gr.update(value="", visible=False)
180
+ )
181
+
182
+ def save_chapter_selection_state(selected_choices, book_title, book_author):
183
+ """Save chapter selection state to session."""
184
+ # This will be handled by Gradio's built-in state management
185
+ # The selected_choices will be automatically stored in the component's state
186
+ return selected_choices
187
+
188
+ def restore_chapter_selection_state(chapters, book_title, book_author):
189
+ """Restore chapter selection state from session."""
190
+ # If we have chapters and this is the same book, restore previous selection
191
+ # Otherwise, return all chapters selected by default
192
+ if chapters is not None:
193
+ # For now, we'll use all chapters selected by default
194
+ # In a more advanced implementation, we could check session state
195
+ # and restore previous selections for the same book
196
+ choices, _ = create_chapter_selection_interface(chapters)
197
+ return choices # Return all choices selected by default
198
+ return []
199
+
200
+ def get_selection_summary(selected_choices, total_chapters):
201
+ """Get a summary of the current selection."""
202
+ if not selected_choices:
203
+ return "No chapters selected"
204
+
205
+ selected_count = len(selected_choices)
206
+ if selected_count == total_chapters:
207
+ return f"All {total_chapters} chapters selected"
208
+ else:
209
+ return f"{selected_count} of {total_chapters} chapters selected"
210
+
211
+ def select_all_chapters(chapter_choices):
212
+ """Select all available chapters."""
213
+ return chapter_choices if chapter_choices else []
214
+
215
+ def deselect_all_chapters():
216
+ """Deselect all chapters."""
217
+ return []
218
+
219
+ def create_chapter_preview_players(processing_results, book_title, exported_files=None):
220
+ """Create chapter preview information for display."""
221
+ if not processing_results or not hasattr(processing_results, 'chapters'):
222
+ return []
223
+
224
+ chapter_info = []
225
+ for i, chapter in enumerate(processing_results.chapters):
226
+ # Use exported file path if available, otherwise construct expected path
227
+ chapter_key = f"chapter_{i}"
228
+ if exported_files and chapter_key in exported_files:
229
+ audio_file_path = exported_files[chapter_key]
230
+ else:
231
+ # Fallback to expected path based on export naming pattern
232
+ safe_title = chapter.title.replace(' ', '_').replace(':', '').replace(',', '').replace('.', '')
233
+ audio_file_path = f"output/{book_title.replace(' ', '_')}/chapter_{i:03d}_{safe_title}.wav"
234
+
235
+ chapter_info.append({
236
+ 'title': chapter.title,
237
+ 'file_path': audio_file_path,
238
+ 'duration': chapter.estimated_duration,
239
+ 'word_count': chapter.word_count
240
+ })
241
+
242
+ return chapter_info
243
+
244
+ def create_preview_ui_html(players):
245
+ """Create the complete preview UI HTML."""
246
+ if not players:
247
+ return "<p>No chapters available for preview.</p>"
248
+
249
+ html = f"""
250
+ <div class="chapter-preview-container" style="margin: 20px 0; padding: 20px; border: 2px solid #e1e8ed; border-radius: 12px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);" role="region" aria-label="Chapter previews section">
251
+ <h3 style="margin: 0 0 15px 0; color: #2c3e50; font-size: 24px; text-align: center;" id="previews-heading">🎵 Chapter Previews</h3>
252
+ <p style="color: #666; margin-bottom: 20px; text-align: center; font-size: 14px;" aria-describedby="previews-heading">
253
+ Listen to individual chapters before downloading the complete audiobook.
254
+ Each player includes full controls for play, pause, seeking, and volume adjustment.
255
+ </p>
256
+ <div class="players-list" style="max-height: 600px; overflow-y: auto; padding-right: 10px;" role="list" aria-label="List of chapter audio players">
257
+ {''.join(players)}
258
+ </div>
259
+ <style>
260
+ @media (max-width: 768px) {{
261
+ .chapter-preview-container {{
262
+ padding: 15px !important;
263
+ margin: 10px 0 !important;
264
+ }}
265
+ .chapter-preview-container h3 {{
266
+ font-size: 20px !important;
267
+ }}
268
+ .chapter-preview-container p {{
269
+ font-size: 12px !important;
270
+ }}
271
+ .players-list {{
272
+ max-height: 400px !important;
273
+ }}
274
+ }}
275
+ @media (max-width: 480px) {{
276
+ .chapter-preview-container {{
277
+ padding: 10px !important;
278
+ }}
279
+ .chapter-preview-container h3 {{
280
+ font-size: 18px !important;
281
+ }}
282
+ .players-list {{
283
+ max-height: 300px !important;
284
+ }}
285
+ }}
286
+ </style>
287
+ </div>
288
+ """
289
+ return html
290
+
291
+ def get_audio_file_url(file_path):
292
+ """Convert file path to URL for serving audio files."""
293
+ if not file_path:
294
+ return ""
295
+
296
+ # For Gradio, we need to use the file path directly
297
+ # Gradio will handle the file serving automatically
298
+ if os.path.isabs(file_path):
299
+ # Return the absolute path - Gradio can serve it
300
+ return file_path
301
+ else:
302
+ # Return the relative path
303
+ return file_path
304
+
305
+ def copy_file_for_serving(file_path, target_dir="temp_audio"):
306
+ """Copy file to a directory that Gradio can serve."""
307
+ if not file_path or not os.path.exists(file_path):
308
+ return None
309
+
310
+ # Create target directory if it doesn't exist
311
+ os.makedirs(target_dir, exist_ok=True)
312
+
313
+ # Get filename and copy to target directory
314
+ filename = os.path.basename(file_path)
315
+ target_path = os.path.join(target_dir, filename)
316
+
317
+ try:
318
+ import shutil
319
+ shutil.copy2(file_path, target_path)
320
+ return target_path
321
+ except Exception as e:
322
+ print(f"Error copying file {file_path} to {target_path}: {e}")
323
+ return None
324
+
325
+ def process_epub_book(chapters, selected_choices, book_title, book_author, speed, nfe_steps, crossfade_ms, seed, target_rms, cfg_strength, sway_coef):
326
+ """Process selected chapters from an EPUB book into audiobook."""
327
+ global processing_cancelled
328
+ processing_cancelled = False # Reset cancellation flag
329
+
330
+ if not chapters:
331
+ return "❌ **Error:** No chapters available", None, None, ""
332
+
333
+ # Get selected chapters
334
+ selected_chapters = get_selected_chapters(chapters, selected_choices)
335
+
336
+ if not selected_chapters:
337
+ return "❌ **Error:** No chapters selected for processing", None, None, ""
338
+
339
+ try:
340
+ # Create processing settings with enhanced voice parameters
341
+ settings = ProcessingSettings(
342
+ speed=speed,
343
+ nfe_steps=nfe_steps,
344
+ crossfade_ms=crossfade_ms,
345
+ seed=seed if seed > 0 else None,
346
+ target_rms=target_rms,
347
+ cfg_strength=cfg_strength,
348
+ sway_sampling_coef=sway_coef
349
+ )
350
+
351
+ # Progress tracking
352
+ progress_messages = []
353
+
354
+ def progress_callback(chapter_index, status, message):
355
+ # Check for cancellation
356
+ if processing_cancelled:
357
+ return
358
+ # Show actual chapter title instead of index for better clarity
359
+ if chapter_index < len(selected_chapters):
360
+ chapter_title = selected_chapters[chapter_index].title
361
+ # Add visual status indicators
362
+ status_emoji = {
363
+ 'pending': '⏳',
364
+ 'processing': '🔄',
365
+ 'completed': '✅',
366
+ 'error': '❌',
367
+ 'cancelled': '🛑'
368
+ }.get(status.lower(), '📝')
369
+ progress_messages.append(f"{status_emoji} **{chapter_title}**: {status.title()} - {message}")
370
+ else:
371
+ progress_messages.append(f"📝 Chapter {chapter_index + 1}: {status} - {message}")
372
+
373
+ def cancellation_check():
374
+ return processing_cancelled
375
+
376
+ # Process the selected chapters
377
+ results = processing_pipeline.process_book(selected_chapters, settings, progress_callback, cancellation_check=cancellation_check)
378
+
379
+ # Check if processing was cancelled
380
+ if processing_cancelled:
381
+ return "🛑 **Processing Cancelled:** User stopped the processing", None, None, ""
382
+
383
+ # Get statistics
384
+ stats = processing_pipeline.get_processing_statistics(results)
385
+
386
+ # Create enhanced status message with visual feedback
387
+ completion_emoji = "🎉" if stats.failed_chapters == 0 else "⚠️"
388
+ status_message = f"""{completion_emoji} **Processing Complete!**
389
+
390
+ **📊 Statistics:**
391
+ - **Selected Chapters:** {len(selected_chapters)} of {len(chapters)} total chapters
392
+ - **✅ Completed:** {stats.completed_chapters}
393
+ - **❌ Failed:** {stats.failed_chapters}
394
+ - **⏱️ Total Processing Time:** {stats.total_processing_time:.1f} seconds
395
+ - **📈 Average Time per Chapter:** {stats.average_processing_time:.1f} seconds
396
+ - **📝 Total Words:** {stats.total_word_count:,}
397
+ - **🎵 Estimated Duration:** {stats.total_audio_duration:.1f} minutes
398
+
399
+ **📋 Processing Log:**
400
+ {chr(10).join(progress_messages[-10:])} # Show last 10 messages
401
+
402
+ **🎯 Next Steps:**
403
+ - Listen to individual chapter previews below
404
+ - Download the complete audiobook when ready
405
+ - Check processing log for any issues
406
+ """
407
+
408
+ # Export audiobook
409
+ output_dir = f"output/{book_title.replace(' ', '_')}"
410
+ exported_files = processing_pipeline.export_audiobook(results, f"{output_dir}/audiobook.wav")
411
+
412
+ # Create download links - will be updated after file copying
413
+ download_info = f"""**📥 Download Files:**
414
+ - **Individual Chapters:** {len([k for k in exported_files.keys() if k.startswith('chapter')])} files available below
415
+ """
416
+
417
+ # Create chapter preview information
418
+ chapter_audio_file = None
419
+ chapter_info_text = ""
420
+ if results and len(results) > 0:
421
+ # Get the first chapter's audio file for preview and copy it for serving
422
+ chapter_key = "chapter_0"
423
+ if chapter_key in exported_files:
424
+ original_file = exported_files[chapter_key]
425
+ chapter_audio_file = copy_file_for_serving(original_file)
426
+
427
+ # Also copy the complete audiobook for serving
428
+ complete_audiobook_original = exported_files.get('complete')
429
+ complete_audiobook_served = None
430
+ if complete_audiobook_original:
431
+ complete_audiobook_served = copy_file_for_serving(complete_audiobook_original)
432
+
433
+ # Create chapter information text with download links
434
+ chapter_info_list = []
435
+ for i, chapter in enumerate(selected_chapters):
436
+ chapter_key = f"chapter_{i}"
437
+ if chapter_key in exported_files:
438
+ chapter_file = exported_files[chapter_key]
439
+ chapter_info_list.append(f"**📖 {chapter.title}** - {chapter.estimated_duration:.1f} min ({chapter.word_count:,} words)")
440
+
441
+ if chapter_info_list:
442
+ chapter_info_text = f"""**🎵 Chapter Previews:**
443
+ {chr(10).join(chapter_info_list)}
444
+
445
+ **📥 Individual Chapter Downloads:**
446
+ """
447
+ # Add download links for each chapter
448
+ for i, chapter in enumerate(selected_chapters):
449
+ chapter_key = f"chapter_{i}"
450
+ if chapter_key in exported_files:
451
+ chapter_file = exported_files[chapter_key]
452
+ safe_title = chapter.title.replace(' ', '_').replace(':', '').replace(',', '').replace('.', '')
453
+ chapter_info_text += f"- [{chapter.title}]({chapter_file})\n"
454
+
455
+ # Update download info with the copied complete audiobook
456
+ if complete_audiobook_served:
457
+ download_info = f"""**📥 Download Files:**
458
+ <div style="margin: 10px 0;">
459
+ <a href="{complete_audiobook_served}" download="{book_title.replace(' ', '_')}_complete_audiobook.wav"
460
+ style="display: inline-block; padding: 12px 24px; background: #28a745; color: white; text-decoration: none; border-radius: 6px; font-size: 16px; font-weight: bold;"
461
+ role="button" aria-label="Download complete audiobook">
462
+ 📥 Download Complete Audiobook
463
+ </a>
464
+ </div>
465
+ - **Individual Chapters:** {len([k for k in exported_files.keys() if k.startswith('chapter')])} files available below
466
+ """
467
+
468
+ return status_message, download_info, complete_audiobook_served, chapter_audio_file, chapter_info_text
469
+
470
+ except Exception as e:
471
+ error_message = f"❌ **Processing Error:** {str(e)}"
472
+ return error_message, None, None, None, ""
473
+
474
+ def synthesize_ui(text, ref_audio, ref_text, speed, nfe_steps, crossfade_ms, seed):
475
+ if not text or text.strip() == "":
476
+ return None, "Введите текст."
477
+ # Prepare paragraphs
478
+ text_norm = normalize_text(text)
479
+ paras = split_into_paragraphs(text_norm)
480
+ # Optional auto-accents for RU
481
+ paras = [maybe_ruaccent(p) for p in paras]
482
+
483
+ # Use local sample as default reference if no audio provided
484
+ ref_path = "samples/001/sample.mp3"
485
+ if ref_audio is not None:
486
+ ref_path = "ref_tmp.wav"
487
+ # Gradio gives (sr, np.ndarray) or file path depending on component
488
+ if isinstance(ref_audio, tuple) and isinstance(ref_audio[1], np.ndarray):
489
+ sf.write(ref_path, ref_audio[1], ref_audio[0])
490
+ elif isinstance(ref_audio, str):
491
+ ref_path = ref_audio
492
+
493
+ # Generate per paragraph
494
+ pieces = []
495
+ sr = None
496
+ rng_seed = int(seed) if seed is not None else None
497
+ for i, para in enumerate(paras):
498
+ audio, sr = backend.synthesize(
499
+ text=para,
500
+ ref_audio_path=ref_path,
501
+ ref_text=ref_text or "",
502
+ speed=float(speed),
503
+ nfe_steps=int(nfe_steps),
504
+ seed=rng_seed,
505
+ )
506
+ pieces.append(audio)
507
+
508
+ # Crossfade and normalize
509
+ if len(pieces) == 1:
510
+ final = pieces[0]
511
+ else:
512
+ final = crossfade_concat(pieces, crossfade_ms=int(crossfade_ms), sample_rate=sr)
513
+
514
+ # Target loudness (for personal listening; tweak as you like)
515
+ final = normalize_lufs(final, sr, target_lufs=-20.0)
516
+
517
+ out_path = "out_preview.wav"
518
+ save_wav(out_path, final, sr)
519
+ return (sr, final), f"Готово: {len(pieces)} фрагм., длительность ~{len(final)/sr:.1f}с. Сохранено: {out_path}"
520
+
521
+ with gr.Blocks(title="RU Audiobook Studio") as demo:
522
+ gr.Markdown("# RU Audiobook Studio — ESpeech TTS\nГенерируйте главы аудиокниг с готовыми голосами (zero/one‑shot).")
523
+
524
+ # EPUB Upload Section
525
+ with gr.Tab("📚 EPUB Book Processing"):
526
+ gr.Markdown("## Upload EPUB Book\nUpload an EPUB file to create a complete audiobook with automatic chapter detection.")
527
+
528
+ # Main content area with responsive layout
529
+ with gr.Row(equal_height=True):
530
+ # Left column - Upload and Settings (responsive)
531
+ with gr.Column(scale=1, min_width=350):
532
+ # File Upload Section
533
+ with gr.Group():
534
+ gr.Markdown("### 📁 File Upload")
535
+ epub_upload = gr.File(
536
+ label="Upload EPUB File",
537
+ file_types=[".epub"],
538
+ file_count="single",
539
+ height=100
540
+ )
541
+ epub_validate_btn = gr.Button("🔍 Validate EPUB", variant="secondary", size="lg", elem_id="validate-epub-btn")
542
+
543
+ # Processing Settings Section
544
+ with gr.Group():
545
+ gr.Markdown("### ⚙️ Processing Settings")
546
+ with gr.Accordion("Advanced Settings", open=False):
547
+ epub_speed = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Speech Speed", info="Adjust the speed of speech generation (0.6 = slower, 1.4 = faster)")
548
+ epub_nfe_steps = gr.Slider(12, 96, value=48, step=1, label="Quality Steps (NFE)", info="Higher values produce better quality but take longer to process")
549
+ epub_crossfade_ms = gr.Slider(0, 400, value=150, step=10, label="Crossfade (ms)", info="Smooth transition between audio segments")
550
+ epub_seed = gr.Number(value=0, label="Seed (0 = auto)", info="Random seed for reproducible results (0 = random)")
551
+
552
+ # Voice and tone parameters
553
+ gr.Markdown("### 🎵 Voice & Tone Settings")
554
+ epub_target_rms = gr.Slider(0.05, 0.3, value=0.1, step=0.01, label="Volume Level (RMS)", info="Target volume level for generated audio (0.05 = quiet, 0.3 = loud)")
555
+ epub_cfg_strength = gr.Slider(1.0, 4.0, value=2.0, step=0.1, label="Voice Clarity", info="Controls how closely the voice follows the reference (1.0 = more variation, 4.0 = more consistent)")
556
+ epub_sway_coef = gr.Slider(-2.0, 1.0, value=-1.0, step=0.1, label="Voice Variation", info="Controls natural voice variation (-2.0 = more monotone, 1.0 = more expressive)")
557
+
558
+ # Right column - Chapter Selection and Processing (responsive)
559
+ with gr.Column(scale=2, min_width=500):
560
+ # Chapter Selection Section
561
+ with gr.Group():
562
+ gr.Markdown("### 📖 Chapter Selection")
563
+ epub_chapter_selection = gr.CheckboxGroup(
564
+ label="Select Chapters to Process",
565
+ choices=[],
566
+ value=[],
567
+ visible=False,
568
+ info="All chapters are selected by default. Uncheck chapters you don't want to process.",
569
+ elem_id="chapter-selection"
570
+ )
571
+
572
+ # Selection control buttons
573
+ with gr.Row(visible=False) as epub_selection_buttons:
574
+ epub_select_all_btn = gr.Button("✅ Select All", variant="secondary", size="sm")
575
+ epub_deselect_all_btn = gr.Button("❌ Deselect All", variant="secondary", size="sm")
576
+
577
+ epub_selection_summary = gr.Markdown("", visible=False)
578
+
579
+ # Processing Section
580
+ with gr.Group():
581
+ gr.Markdown("### 🚀 Processing")
582
+ with gr.Row():
583
+ epub_process_btn = gr.Button("🚀 Process Selected Chapters", variant="primary", visible=False, elem_id="process-btn")
584
+ epub_stop_btn = gr.Button("🛑 Stop Processing", variant="stop", visible=False, elem_id="stop-btn")
585
+
586
+ # Right column - Status and Results (responsive)
587
+ with gr.Column(scale=3, min_width=500):
588
+ # Status Section
589
+ with gr.Group():
590
+ gr.Markdown("### 📊 Status & Results")
591
+ epub_status = gr.Markdown("📁 **Upload Status:** Ready to upload EPUB file")
592
+ epub_progress = gr.Progress()
593
+ epub_validation_status = gr.Markdown("⏳ **Validation:** Waiting for file upload")
594
+ epub_processing_status = gr.Markdown("⏳ **Processing:** Not started")
595
+ epub_details = gr.Markdown("")
596
+
597
+ # Results Section
598
+ with gr.Group():
599
+ gr.Markdown("### 🎵 Results")
600
+ epub_download_audio = gr.Audio(label="Download Complete Audiobook", visible=False)
601
+ epub_chapter_audio = gr.Audio(label="Chapter Audio Preview", visible=False)
602
+ epub_chapter_info = gr.Markdown("", visible=False)
603
+
604
+ # State management (hidden)
605
+ epub_chapters = gr.State() # Store chapter data
606
+ epub_book_title = gr.State() # Store book title
607
+ epub_book_author = gr.State() # Store book author
608
+ epub_chapter_choices = gr.State() # Store chapter choices for selection
609
+ epub_selected_choices = gr.State() # Store selected chapter choices
610
+
611
+ # Manual Text Processing Section
612
+ with gr.Tab("✏️ Manual Text Processing"):
613
+ gr.Markdown("## Manual Text Input\nEnter text manually for quick processing and testing.")
614
+
615
+ with gr.Row():
616
+ with gr.Column(scale=3):
617
+ text = gr.Textbox(lines=12, label="Текст (глава/абзацы)")
618
+ ref_text = gr.Textbox(lines=3, label="Reference text (текст из референса)", value=load_default_ref_text())
619
+ ref_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Reference audio (6–12 с, опционально - используется samples/001/sample.mp3 по умолчанию)")
620
+ with gr.Row():
621
+ speed = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Speed")
622
+ nfe_steps = gr.Slider(12, 96, value=48, step=1, label="NFE steps")
623
+ crossfade_ms = gr.Slider(0, 400, value=150, step=10, label="Crossfade (ms) между абзацами")
624
+ seed = gr.Number(value=0, label="Seed (0 = авто)")
625
+ btn = gr.Button("Synthesize", variant="primary")
626
+ with gr.Column(scale=2):
627
+ audio_out = gr.Audio(label="Предпрослушка", autoplay=False)
628
+ status = gr.Markdown()
629
+
630
+ # Event handlers for manual text processing
631
+ btn.click(
632
+ synthesize_ui,
633
+ inputs=[text, ref_audio, ref_text, speed, nfe_steps, crossfade_ms, seed],
634
+ outputs=[audio_out, status]
635
+ )
636
+
637
+ # Event handlers for EPUB processing
638
+ epub_validate_btn.click(
639
+ validate_epub_upload,
640
+ inputs=[epub_upload],
641
+ outputs=[epub_status, epub_validation_status, epub_processing_status, epub_details, epub_chapters, epub_book_title, epub_book_author, epub_chapter_choices, epub_selected_choices]
642
+ ).then(
643
+ update_chapter_selection_interface,
644
+ inputs=[epub_chapters, epub_chapter_choices, epub_selected_choices],
645
+ outputs=[epub_process_btn, epub_stop_btn, epub_chapter_selection, epub_selection_buttons, epub_selection_summary]
646
+ )
647
+
648
+ # EPUB processing event handler
649
+ epub_process_btn.click(
650
+ lambda: (gr.update(visible=False), gr.update(visible=True)),
651
+ outputs=[epub_process_btn, epub_stop_btn]
652
+ ).then(
653
+ process_epub_book,
654
+ inputs=[epub_chapters, epub_chapter_selection, epub_book_title, epub_book_author, epub_speed, epub_nfe_steps, epub_crossfade_ms, epub_seed, epub_target_rms, epub_cfg_strength, epub_sway_coef],
655
+ outputs=[epub_processing_status, epub_details, epub_download_audio, epub_chapter_audio, epub_chapter_info]
656
+ ).then(
657
+ lambda: (gr.update(visible=True), gr.update(visible=False)),
658
+ outputs=[epub_process_btn, epub_stop_btn]
659
+ ).then(
660
+ lambda audio_file, info_text, download_audio: (
661
+ gr.update(visible=bool(audio_file)),
662
+ gr.update(visible=bool(info_text.strip())),
663
+ gr.update(visible=bool(download_audio), value=download_audio)
664
+ ),
665
+ inputs=[epub_chapter_audio, epub_chapter_info, epub_download_audio],
666
+ outputs=[epub_chapter_audio, epub_chapter_info, epub_download_audio]
667
+ )
668
+
669
+ # Chapter selection change handler
670
+ epub_chapter_selection.change(
671
+ save_chapter_selection_state,
672
+ inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
673
+ outputs=[epub_selected_choices]
674
+ ).then(
675
+ lambda selected_choices, chapters: gr.update(
676
+ value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
677
+ ),
678
+ inputs=[epub_selected_choices, epub_chapters],
679
+ outputs=[epub_selection_summary]
680
+ )
681
+
682
+ # Select All button handler
683
+ epub_select_all_btn.click(
684
+ select_all_chapters,
685
+ inputs=[epub_chapter_choices],
686
+ outputs=[epub_chapter_selection]
687
+ ).then(
688
+ save_chapter_selection_state,
689
+ inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
690
+ outputs=[epub_selected_choices]
691
+ ).then(
692
+ lambda selected_choices, chapters: gr.update(
693
+ value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
694
+ ),
695
+ inputs=[epub_selected_choices, epub_chapters],
696
+ outputs=[epub_selection_summary]
697
+ )
698
+
699
+ # Deselect All button handler
700
+ epub_deselect_all_btn.click(
701
+ deselect_all_chapters,
702
+ outputs=[epub_chapter_selection]
703
+ ).then(
704
+ save_chapter_selection_state,
705
+ inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
706
+ outputs=[epub_selected_choices]
707
+ ).then(
708
+ lambda selected_choices, chapters: gr.update(
709
+ value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
710
+ ),
711
+ inputs=[epub_selected_choices, epub_chapters],
712
+ outputs=[epub_selection_summary]
713
+ )
714
+
715
+ # EPUB stop processing event handler
716
+ epub_stop_btn.click(
717
+ stop_epub_processing,
718
+ outputs=[epub_processing_status]
719
+ ).then(
720
+ lambda: (gr.update(visible=True), gr.update(visible=False)),
721
+ outputs=[epub_process_btn, epub_stop_btn]
722
+ )
723
+
724
+ if __name__ == "__main__":
725
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
audiobook_generator.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Audiobook package generation module for Russian Audiobook Studio.
4
+ Creates high-quality audiobook packages with metadata, playlists, and proper file structure.
5
+ """
6
+ import os
7
+ import json
8
+ import zipfile
9
+ import re
10
+ from pathlib import Path
11
+ from typing import List, Dict, Any, Optional, Tuple
12
+ from dataclasses import dataclass
13
+ import numpy as np
14
+ import soundfile as sf
15
+ from datetime import datetime
16
+
17
+ # Import mutagen for metadata handling
18
+ try:
19
+ from mutagen import File as MutagenFile
20
+ from mutagen.id3 import ID3, TIT2, TPE1, TALB, TCON, TDRC, TRCK
21
+ from mutagen.mp3 import MP3
22
+ MUTAGEN_AVAILABLE = True
23
+ except ImportError:
24
+ MUTAGEN_AVAILABLE = False
25
+
26
+ from epub_processing_pipeline import ChapterProcessingResult
27
+
28
+
29
+ @dataclass
30
+ class AudiobookMetadata:
31
+ """Metadata for an audiobook package."""
32
+ title: str
33
+ author: str
34
+ total_chapters: int
35
+ total_duration: float
36
+ total_word_count: int
37
+ chapters: List[Dict[str, Any]]
38
+ creation_date: str
39
+ version: str = "1.0"
40
+
41
+
42
+ class AudiobookGenerator:
43
+ """Generator for creating audiobook packages with metadata and proper structure."""
44
+
45
+ def __init__(self):
46
+ """Initialize the audiobook generator."""
47
+ self.target_sample_rate = 24000 # 24kHz
48
+ self.target_bit_depth = 16 # 16-bit
49
+ self.target_channels = 1 # Mono
50
+ self.version = "1.0"
51
+
52
+ def sanitize_filename(self, filename: str, max_length: int = 100) -> str:
53
+ """
54
+ Sanitize filename for cross-platform compatibility.
55
+
56
+ Args:
57
+ filename: Original filename
58
+ max_length: Maximum length for the filename
59
+
60
+ Returns:
61
+ Sanitized filename
62
+ """
63
+ # Remove or replace problematic characters
64
+ sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
65
+ sanitized = re.sub(r'[^\w\s\-_\.]', '_', sanitized)
66
+ sanitized = re.sub(r'\s+', '_', sanitized) # Replace spaces with underscores
67
+ sanitized = re.sub(r'_+', '_', sanitized) # Replace multiple underscores with single
68
+
69
+ # Remove leading/trailing underscores and dots
70
+ sanitized = sanitized.strip('_.')
71
+
72
+ # Truncate if too long
73
+ if len(sanitized) > max_length:
74
+ sanitized = sanitized[:max_length].rstrip('_.')
75
+
76
+ return sanitized
77
+
78
+ def save_high_quality_audio(
79
+ self,
80
+ audio_data: np.ndarray,
81
+ sample_rate: int,
82
+ output_path: str
83
+ ) -> None:
84
+ """
85
+ Save audio data as high-quality WAV file (24kHz, 16-bit, mono).
86
+
87
+ Args:
88
+ audio_data: Audio data as numpy array
89
+ sample_rate: Original sample rate
90
+ output_path: Path to save the audio file
91
+
92
+ Raises:
93
+ ValueError: If audio data or sample rate is invalid
94
+ """
95
+ if audio_data is None:
96
+ raise ValueError("Invalid audio data")
97
+
98
+ if sample_rate <= 0:
99
+ raise ValueError("Invalid sample rate")
100
+
101
+ # Ensure mono audio
102
+ if len(audio_data.shape) > 1:
103
+ audio_data = np.mean(audio_data, axis=1)
104
+
105
+ # Resample to target sample rate if needed
106
+ if sample_rate != self.target_sample_rate:
107
+ audio_data = self._resample_audio(audio_data, sample_rate, self.target_sample_rate)
108
+
109
+ # Ensure 16-bit depth
110
+ if audio_data.dtype != np.int16:
111
+ # Convert to 16-bit
112
+ audio_data = (audio_data * 32767).astype(np.int16)
113
+
114
+ # Create output directory if it doesn't exist
115
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
116
+
117
+ # Save as WAV file
118
+ sf.write(output_path, audio_data, self.target_sample_rate, subtype='PCM_16')
119
+
120
+ def _resample_audio(self, audio_data: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
121
+ """
122
+ Resample audio data to target sample rate.
123
+
124
+ Args:
125
+ audio_data: Original audio data
126
+ orig_sr: Original sample rate
127
+ target_sr: Target sample rate
128
+
129
+ Returns:
130
+ Resampled audio data
131
+ """
132
+ if orig_sr == target_sr:
133
+ return audio_data
134
+
135
+ # Simple linear interpolation resampling
136
+ # For production use, consider using librosa or scipy.signal.resample
137
+ ratio = target_sr / orig_sr
138
+ new_length = int(len(audio_data) * ratio)
139
+
140
+ # Create new time indices
141
+ old_indices = np.linspace(0, len(audio_data) - 1, len(audio_data))
142
+ new_indices = np.linspace(0, len(audio_data) - 1, new_length)
143
+
144
+ # Interpolate
145
+ resampled = np.interp(new_indices, old_indices, audio_data)
146
+
147
+ return resampled.astype(audio_data.dtype)
148
+
149
+ def create_metadata(
150
+ self,
151
+ title: str,
152
+ author: str,
153
+ chapters: List[Any],
154
+ total_duration: float
155
+ ) -> Dict[str, Any]:
156
+ """
157
+ Create comprehensive metadata for the audiobook.
158
+
159
+ Args:
160
+ title: Audiobook title
161
+ author: Audiobook author
162
+ chapters: List of chapter objects
163
+ total_duration: Total duration in minutes
164
+
165
+ Returns:
166
+ Dictionary containing metadata
167
+ """
168
+ total_word_count = sum(getattr(chapter, 'word_count', 0) for chapter in chapters)
169
+
170
+ metadata = {
171
+ 'title': title,
172
+ 'artist': author,
173
+ 'album': title,
174
+ 'genre': 'Audiobook',
175
+ 'date': datetime.now().strftime('%Y-%m-%d'),
176
+ 'total_chapters': len(chapters),
177
+ 'total_duration': total_duration,
178
+ 'total_word_count': total_word_count,
179
+ 'chapters': []
180
+ }
181
+
182
+ # Add chapter metadata
183
+ for i, chapter in enumerate(chapters):
184
+ chapter_meta = {
185
+ 'title': getattr(chapter, 'title', f'Chapter {i+1}'),
186
+ 'order': i + 1,
187
+ 'word_count': getattr(chapter, 'word_count', 0),
188
+ 'estimated_duration': getattr(chapter, 'estimated_duration', 0.0),
189
+ 'file_name': f"chapter_{i+1:03d}_{self.sanitize_filename(getattr(chapter, 'title', f'Chapter_{i+1}'))}.wav"
190
+ }
191
+ metadata['chapters'].append(chapter_meta)
192
+
193
+ return metadata
194
+
195
+ def generate_m3u_playlist(
196
+ self,
197
+ processing_results: List[ChapterProcessingResult],
198
+ output_path: str
199
+ ) -> None:
200
+ """
201
+ Generate M3U playlist file for chapter navigation.
202
+
203
+ Args:
204
+ processing_results: List of chapter processing results
205
+ output_path: Path to save the playlist file
206
+ """
207
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
208
+
209
+ with open(output_path, 'w', encoding='utf-8') as f:
210
+ f.write("#EXTM3U\n")
211
+ f.write(f"#EXTINF:-1,Complete Audiobook\n")
212
+ f.write("audio/complete_audiobook.wav\n\n")
213
+
214
+ for result in processing_results:
215
+ if result.status == "completed":
216
+ duration_seconds = int(result.estimated_duration * 60)
217
+ sanitized_title = self.sanitize_filename(result.chapter_title)
218
+ filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
219
+
220
+ f.write(f"#EXTINF:{duration_seconds},{result.chapter_title}\n")
221
+ f.write(f"audio/{filename}\n")
222
+
223
+ def create_metadata_json(
224
+ self,
225
+ processing_results: List[ChapterProcessingResult],
226
+ title: str,
227
+ author: str,
228
+ output_path: str
229
+ ) -> None:
230
+ """
231
+ Create JSON metadata file.
232
+
233
+ Args:
234
+ processing_results: List of chapter processing results
235
+ title: Audiobook title
236
+ author: Audiobook author
237
+ output_path: Path to save the metadata file
238
+ """
239
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
240
+
241
+ # Calculate totals
242
+ total_duration = sum(result.estimated_duration for result in processing_results)
243
+ total_word_count = sum(result.word_count for result in processing_results)
244
+
245
+ # Create metadata
246
+ metadata = {
247
+ 'title': title,
248
+ 'author': author,
249
+ 'total_chapters': len(processing_results),
250
+ 'total_duration': total_duration,
251
+ 'total_word_count': total_word_count,
252
+ 'creation_date': datetime.now().isoformat(),
253
+ 'version': self.version,
254
+ 'chapters': []
255
+ }
256
+
257
+ # Add chapter information
258
+ for result in processing_results:
259
+ if result.status == "completed":
260
+ sanitized_title = self.sanitize_filename(result.chapter_title)
261
+ filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
262
+
263
+ chapter_info = {
264
+ 'title': result.chapter_title,
265
+ 'order': result.chapter_index + 1,
266
+ 'word_count': result.word_count,
267
+ 'estimated_duration': result.estimated_duration,
268
+ 'file_name': filename,
269
+ 'processing_time': result.processing_time,
270
+ 'sample_rate': result.sample_rate
271
+ }
272
+ metadata['chapters'].append(chapter_info)
273
+
274
+ # Save metadata
275
+ with open(output_path, 'w', encoding='utf-8') as f:
276
+ json.dump(metadata, f, indent=2, ensure_ascii=False)
277
+
278
+ def generate_individual_chapters(
279
+ self,
280
+ processing_results: List[ChapterProcessingResult],
281
+ output_dir: str
282
+ ) -> List[str]:
283
+ """
284
+ Generate individual chapter audio files.
285
+
286
+ Args:
287
+ processing_results: List of chapter processing results
288
+ output_dir: Directory to save chapter files
289
+
290
+ Returns:
291
+ List of paths to generated chapter files
292
+ """
293
+ if not processing_results:
294
+ raise ValueError("No chapters to process")
295
+
296
+ os.makedirs(output_dir, exist_ok=True)
297
+ chapter_files = []
298
+
299
+ for result in processing_results:
300
+ if result.status == "completed" and result.audio_data is not None:
301
+ sanitized_title = self.sanitize_filename(result.chapter_title)
302
+ filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
303
+ file_path = os.path.join(output_dir, filename)
304
+
305
+ self.save_high_quality_audio(
306
+ result.audio_data,
307
+ result.sample_rate,
308
+ file_path
309
+ )
310
+ chapter_files.append(file_path)
311
+
312
+ return chapter_files
313
+
314
+ def generate_complete_audiobook(
315
+ self,
316
+ processing_results: List[ChapterProcessingResult],
317
+ output_path: str
318
+ ) -> str:
319
+ """
320
+ Generate complete audiobook by concatenating all chapters.
321
+
322
+ Args:
323
+ processing_results: List of chapter processing results
324
+ output_path: Path to save the complete audiobook
325
+
326
+ Returns:
327
+ Path to the generated complete audiobook file
328
+ """
329
+ if not processing_results:
330
+ raise ValueError("No chapters to process")
331
+
332
+ # Collect valid audio data
333
+ valid_audio = []
334
+ sample_rate = None
335
+
336
+ for result in processing_results:
337
+ if result.status == "completed" and result.audio_data is not None:
338
+ valid_audio.append(result.audio_data)
339
+ if sample_rate is None:
340
+ sample_rate = result.sample_rate
341
+
342
+ if not valid_audio:
343
+ raise ValueError("No valid audio data found")
344
+
345
+ # Concatenate audio
346
+ from utils.audio import crossfade_concat
347
+ # Use smaller crossfade for short audio segments
348
+ crossfade_ms = min(150, int(min(len(audio) for audio in valid_audio) / sample_rate * 1000 * 0.5))
349
+ complete_audio = crossfade_concat(valid_audio, crossfade_ms=crossfade_ms, sample_rate=sample_rate)
350
+
351
+ # Save complete audiobook
352
+ self.save_high_quality_audio(complete_audio, sample_rate, output_path)
353
+
354
+ return output_path
355
+
356
+ def generate_audiobook_package(
357
+ self,
358
+ processing_results: List[ChapterProcessingResult],
359
+ title: str,
360
+ author: str,
361
+ output_dir: str
362
+ ) -> Dict[str, str]:
363
+ """
364
+ Generate complete audiobook package with all files and metadata.
365
+
366
+ Args:
367
+ processing_results: List of chapter processing results
368
+ title: Audiobook title
369
+ author: Audiobook author
370
+ output_dir: Directory to create the package
371
+
372
+ Returns:
373
+ Dictionary with paths to generated files
374
+ """
375
+ if processing_results is None:
376
+ raise ValueError("Processing results cannot be None")
377
+ if not processing_results:
378
+ raise ValueError("No chapters to process")
379
+
380
+ # Create directory structure
381
+ sanitized_title = self.sanitize_filename(title)
382
+ package_dir = os.path.join(output_dir, sanitized_title)
383
+ audio_dir = os.path.join(package_dir, "audio")
384
+ metadata_dir = os.path.join(package_dir, "metadata")
385
+
386
+ os.makedirs(audio_dir, exist_ok=True)
387
+ os.makedirs(metadata_dir, exist_ok=True)
388
+
389
+ generated_files = {}
390
+
391
+ # Generate individual chapter files
392
+ chapter_files = self.generate_individual_chapters(processing_results, audio_dir)
393
+ generated_files['chapters'] = chapter_files
394
+
395
+ # Generate complete audiobook
396
+ complete_path = os.path.join(audio_dir, "complete_audiobook.wav")
397
+ self.generate_complete_audiobook(processing_results, complete_path)
398
+ generated_files['complete'] = complete_path
399
+
400
+ # Generate metadata files
401
+ metadata_json_path = os.path.join(metadata_dir, "metadata.json")
402
+ self.create_metadata_json(processing_results, title, author, metadata_json_path)
403
+ generated_files['metadata_json'] = metadata_json_path
404
+
405
+ # Generate M3U playlist
406
+ playlist_path = os.path.join(metadata_dir, "playlist.m3u")
407
+ self.generate_m3u_playlist(processing_results, playlist_path)
408
+ generated_files['playlist'] = playlist_path
409
+
410
+ return generated_files
411
+
412
+ def create_complete_package_zip(
413
+ self,
414
+ processing_results: List[ChapterProcessingResult],
415
+ title: str,
416
+ author: str,
417
+ output_dir: str
418
+ ) -> str:
419
+ """
420
+ Create a complete ZIP package of the audiobook.
421
+
422
+ Args:
423
+ processing_results: List of chapter processing results
424
+ title: Audiobook title
425
+ author: Audiobook author
426
+ output_dir: Directory to save the ZIP file
427
+
428
+ Returns:
429
+ Path to the created ZIP file
430
+ """
431
+ # Generate the audiobook package
432
+ package_files = self.generate_audiobook_package(processing_results, title, author, output_dir)
433
+
434
+ # Create ZIP file
435
+ sanitized_title = self.sanitize_filename(title)
436
+ zip_filename = f"{sanitized_title}_audiobook.zip"
437
+ zip_path = os.path.join(output_dir, zip_filename)
438
+
439
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
440
+ # Add all files from the package directory
441
+ package_dir = os.path.join(output_dir, sanitized_title)
442
+
443
+ for root, dirs, files in os.walk(package_dir):
444
+ for file in files:
445
+ file_path = os.path.join(root, file)
446
+ arcname = os.path.relpath(file_path, output_dir)
447
+ zip_file.write(file_path, arcname)
448
+
449
+ return zip_path
450
+
451
+ def estimate_package_size(self, processing_results: List[ChapterProcessingResult]) -> int:
452
+ """
453
+ Estimate the size of the audiobook package in bytes.
454
+
455
+ Args:
456
+ processing_results: List of chapter processing results
457
+
458
+ Returns:
459
+ Estimated package size in bytes
460
+ """
461
+ if not processing_results:
462
+ return 0
463
+
464
+ total_audio_size = 0
465
+
466
+ for result in processing_results:
467
+ if result.status == "completed" and result.audio_data is not None:
468
+ # Estimate WAV file size: sample_rate * duration * 2 bytes (16-bit) * channels
469
+ duration_seconds = result.estimated_duration * 60
470
+ audio_size = int(self.target_sample_rate * duration_seconds * 2 * self.target_channels)
471
+ total_audio_size += audio_size
472
+
473
+ # Add overhead for metadata, playlist, and ZIP compression
474
+ overhead = total_audio_size * 0.1 # 10% overhead
475
+
476
+ return int(total_audio_size + overhead)
477
+
478
+ def add_audio_metadata(self, audio_path: str, metadata: Dict[str, Any]) -> None:
479
+ """
480
+ Add metadata to audio files using mutagen (if available).
481
+
482
+ Args:
483
+ audio_path: Path to the audio file
484
+ metadata: Metadata dictionary
485
+ """
486
+ if not MUTAGEN_AVAILABLE:
487
+ return # Skip if mutagen is not available
488
+
489
+ try:
490
+ # For WAV files, we can't add ID3 tags directly
491
+ # This is a placeholder for future MP3 support
492
+ pass
493
+ except Exception as e:
494
+ # Silently fail if metadata addition fails
495
+ pass
496
+
497
+ def cleanup(self):
498
+ """Clean up any temporary resources."""
499
+ pass
backends/espeech_backend.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # backends/espeech_backend.py
2
+ # Полная интеграция ESpeech/ESpeech-TTS-1_RL-V2 (F5-TTS) для инференса.
3
+ # Основано на коде из model card: загрузка весов, препроцессинг референса,
4
+ # вызов infer_process и возврат (wave, sample_rate).
5
+ from __future__ import annotations
6
+ from typing import Tuple, Optional
7
+ import os
8
+ import gc
9
+ import numpy as np
10
+ import torch
11
+ import torchaudio
12
+
13
+ # Force CPU usage on macOS to avoid MPS issues
14
+ if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
15
+ os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
16
+ # Disable MPS to force CPU usage
17
+ torch.backends.mps.is_available = lambda: False
18
+ torch.backends.mps.is_built = lambda: False
19
+ from huggingface_hub import hf_hub_download, snapshot_download
20
+
21
+ # F5-TTS imports (как в карточке модели)
22
+ from f5_tts.infer.utils_infer import (
23
+ infer_process,
24
+ load_model,
25
+ load_vocoder,
26
+ preprocess_ref_audio_text,
27
+ )
28
+ from f5_tts.model import DiT
29
+
30
+ # Конфиг модели из карточки
31
+ MODEL_CFG = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
32
+
33
+ class EspeechBackend:
34
+ def __init__(self, model_id: str = "ESpeech/ESpeech-TTS-1_RL-V2"):
35
+ self.model_id = model_id
36
+ self.model_file = "espeech_tts_rlv2.pt"
37
+ self.vocab_file = "vocab.txt"
38
+ # Force CPU on macOS to avoid MPS issues
39
+ if torch.cuda.is_available():
40
+ self.device = torch.device("cuda")
41
+ else:
42
+ self.device = torch.device("cpu")
43
+ self.model = None
44
+ self.vocoder = None
45
+ self._ensure_loaded()
46
+
47
+ def _download(self, repo: str, filename: str) -> str:
48
+ try:
49
+ return hf_hub_download(repo_id=repo, filename=filename)
50
+ except Exception:
51
+ # запасной путь: snapshot целиком
52
+ local_dir = f"cache_{repo.replace('/', '_')}"
53
+ snap_dir = snapshot_download(repo_id=repo, local_dir=local_dir)
54
+ path = os.path.join(snap_dir, filename)
55
+ if not os.path.exists(path):
56
+ raise FileNotFoundError(f"{filename} not found in snapshot {snap_dir}")
57
+ return path
58
+
59
+ def _ensure_loaded(self):
60
+ # загрузка весов модели и словаря
61
+ model_path = self._download(self.model_id, self.model_file)
62
+ vocab_path = self._download(self.model_id, self.vocab_file)
63
+ # инициализация модели и вокодера
64
+ self.model = load_model(DiT, MODEL_CFG, model_path, vocab_file=vocab_path)
65
+ self.vocoder = load_vocoder()
66
+ # перенос на устройство
67
+ try:
68
+ self.model.to(self.device)
69
+ self.vocoder.to(self.device)
70
+ except Exception as e:
71
+ # Fallback to CPU if device transfer fails
72
+ print(f"Warning: Failed to move model to {self.device}, falling back to CPU: {e}")
73
+ self.device = torch.device("cpu")
74
+ self.model.to(self.device)
75
+ self.vocoder.to(self.device)
76
+
77
+ def synthesize(
78
+ self,
79
+ text: str,
80
+ ref_audio_path: Optional[str],
81
+ ref_text: str,
82
+ speed: float = 1.0,
83
+ nfe_steps: int = 48,
84
+ seed: Optional[int] = None,
85
+ cross_fade_sec: float = 0.15,
86
+ target_rms: float = 0.1,
87
+ cfg_strength: float = 2.0,
88
+ sway_sampling_coef: float = -1.0,
89
+ ) -> Tuple[np.ndarray, int]:
90
+ """
91
+ Возвращает (audio_float32_mono, sample_rate).
92
+ Требования: float32 [-1..1], моно.
93
+ """
94
+ if not text or not text.strip():
95
+ raise ValueError("Пустой текст для синтеза.")
96
+ if not ref_audio_path or not os.path.exists(ref_audio_path):
97
+ raise FileNotFoundError("Укажите путь к reference audio (6–12 с).")
98
+ if not ref_text or not ref_text.strip():
99
+ raise ValueError("Укажите reference text (транскрипт того же reference audio).")
100
+
101
+ if seed is not None:
102
+ torch.manual_seed(int(seed))
103
+
104
+ # Подготовка референса (функция сама сделает ресэмплинг/моно)
105
+ ref_audio_proc, ref_text_proc = preprocess_ref_audio_text(ref_audio_path, ref_text)
106
+
107
+ # Основной вызов инференса с дополнительными параметрами для улучшения качества голоса
108
+ final_wave, final_sample_rate, _ = infer_process(
109
+ ref_audio_proc,
110
+ ref_text_proc,
111
+ text,
112
+ self.model,
113
+ self.vocoder,
114
+ cross_fade_duration=float(cross_fade_sec),
115
+ nfe_step=int(nfe_steps),
116
+ speed=float(speed),
117
+ target_rms=float(target_rms),
118
+ cfg_strength=float(cfg_strength),
119
+ sway_sampling_coef=float(sway_sampling_coef),
120
+ )
121
+
122
+ # На всякий случай приводим тип/диапазон
123
+ wav = np.asarray(final_wave, dtype=np.float32)
124
+ wav = np.clip(wav, -1.0, 1.0)
125
+ sr = int(final_sample_rate)
126
+
127
+ # Освобождение памяти на CUDA (длинные книги)
128
+ if self.device.type == "cuda":
129
+ try:
130
+ torch.cuda.empty_cache()
131
+ gc.collect()
132
+ except Exception:
133
+ pass
134
+
135
+ return wav, sr
epub_processing_pipeline.py ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ EPUB processing pipeline for Russian Audiobook Studio.
4
+ Integrates EPUB chapter detection with ESpeechBackend for TTS processing.
5
+ """
6
+ import os
7
+ import time
8
+ import gc
9
+ from typing import List, Optional, Callable, Dict, Any, Tuple
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ import numpy as np
13
+ import soundfile as sf
14
+
15
+ from epub_processor import EpubProcessor, Chapter
16
+ from backends.espeech_backend import EspeechBackend
17
+ from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent
18
+ from utils.audio import crossfade_concat, normalize_lufs, save_wav
19
+
20
+
21
+ @dataclass
22
+ class ProcessingSettings:
23
+ """Settings for EPUB processing pipeline."""
24
+ speed: float = 1.0
25
+ nfe_steps: int = 48
26
+ crossfade_ms: int = 150
27
+ target_lufs: float = -20.0
28
+ seed: Optional[int] = None
29
+ ref_audio_path: str = "samples/001/sample.mp3"
30
+ ref_text: str = ""
31
+ # Voice and tone parameters
32
+ target_rms: float = 0.1
33
+ cfg_strength: float = 2.0
34
+ sway_sampling_coef: float = -1.0
35
+
36
+ def __post_init__(self):
37
+ """Load default reference text if not provided."""
38
+ if not self.ref_text:
39
+ try:
40
+ with open("samples/001/sample.text", "r", encoding="utf-8") as f:
41
+ self.ref_text = f.read().strip()
42
+ except FileNotFoundError:
43
+ self.ref_text = ""
44
+
45
+
46
+ @dataclass
47
+ class ChapterProcessingResult:
48
+ """Result of processing a single chapter."""
49
+ chapter_index: int
50
+ chapter_title: str
51
+ status: str # pending, processing, completed, error
52
+ audio_data: Optional[np.ndarray] = None
53
+ sample_rate: Optional[int] = None
54
+ processing_time: float = 0.0
55
+ error_message: Optional[str] = None
56
+ word_count: int = 0
57
+ estimated_duration: float = 0.0
58
+
59
+
60
+ @dataclass
61
+ class ProcessingStatistics:
62
+ """Statistics for book processing."""
63
+ total_chapters: int = 0
64
+ completed_chapters: int = 0
65
+ failed_chapters: int = 0
66
+ total_processing_time: float = 0.0
67
+ average_processing_time: float = 0.0
68
+ total_audio_duration: float = 0.0
69
+ total_word_count: int = 0
70
+
71
+
72
+ class EpubProcessingPipeline:
73
+ """Main pipeline for processing EPUB books into audiobooks."""
74
+
75
+ def __init__(self, epub_processor: EpubProcessor, tts_backend: EspeechBackend):
76
+ """
77
+ Initialize the processing pipeline.
78
+
79
+ Args:
80
+ epub_processor: EPUB processor for chapter extraction
81
+ tts_backend: TTS backend for audio synthesis
82
+ """
83
+ self.epub_processor = epub_processor
84
+ self.tts_backend = tts_backend
85
+ self.processing_settings = ProcessingSettings()
86
+
87
+ # Ensure backend is loaded
88
+ self.tts_backend._ensure_loaded()
89
+
90
+ def process_chapter(
91
+ self,
92
+ chapter: Chapter,
93
+ settings: ProcessingSettings,
94
+ progress_callback: Optional[Callable[[int, str, str], None]] = None,
95
+ cancellation_check: Optional[Callable[[], bool]] = None
96
+ ) -> ChapterProcessingResult:
97
+ """
98
+ Process a single chapter into audio.
99
+
100
+ Args:
101
+ chapter: Chapter to process
102
+ settings: Processing settings
103
+ progress_callback: Optional callback for progress updates
104
+
105
+ Returns:
106
+ ChapterProcessingResult with processing status and audio data
107
+ """
108
+ start_time = time.time()
109
+
110
+ if progress_callback:
111
+ progress_callback(chapter.order, "processing", f"Processing {chapter.title}")
112
+
113
+ # Check for cancellation before starting
114
+ if cancellation_check and cancellation_check():
115
+ return ChapterProcessingResult(
116
+ chapter_index=chapter.order,
117
+ chapter_title=chapter.title,
118
+ status="cancelled",
119
+ processing_time=0.0,
120
+ error_message="Processing cancelled by user",
121
+ word_count=chapter.word_count,
122
+ estimated_duration=chapter.estimated_duration
123
+ )
124
+
125
+ try:
126
+ # Normalize and prepare text
127
+ normalized_text = normalize_text(chapter.content)
128
+ paragraphs = split_into_paragraphs(normalized_text)
129
+ paragraphs = [maybe_ruaccent(p) for p in paragraphs]
130
+
131
+ if not paragraphs:
132
+ raise ValueError("No text content to process")
133
+
134
+ # Process each paragraph
135
+ audio_pieces = []
136
+ sample_rate = None
137
+
138
+ for i, paragraph in enumerate(paragraphs):
139
+ if not paragraph.strip():
140
+ continue
141
+
142
+ # Check for cancellation before each paragraph
143
+ if cancellation_check and cancellation_check():
144
+ return ChapterProcessingResult(
145
+ chapter_index=chapter.order,
146
+ chapter_title=chapter.title,
147
+ status="cancelled",
148
+ processing_time=time.time() - start_time,
149
+ error_message="Processing cancelled by user",
150
+ word_count=chapter.word_count,
151
+ estimated_duration=chapter.estimated_duration
152
+ )
153
+
154
+ # Synthesize audio for paragraph with enhanced voice parameters
155
+ audio, sr = self.tts_backend.synthesize(
156
+ text=paragraph,
157
+ ref_audio_path=settings.ref_audio_path,
158
+ ref_text=settings.ref_text,
159
+ speed=settings.speed,
160
+ nfe_steps=settings.nfe_steps,
161
+ seed=settings.seed,
162
+ cross_fade_sec=settings.crossfade_ms / 1000.0,
163
+ target_rms=settings.target_rms,
164
+ cfg_strength=settings.cfg_strength,
165
+ sway_sampling_coef=settings.sway_sampling_coef
166
+ )
167
+
168
+ audio_pieces.append(audio)
169
+ sample_rate = sr
170
+
171
+ # Memory cleanup after each paragraph
172
+ if i % 5 == 0: # Every 5 paragraphs
173
+ gc.collect()
174
+
175
+ if not audio_pieces:
176
+ raise ValueError("No audio generated")
177
+
178
+ # Concatenate audio pieces with crossfade
179
+ final_audio = crossfade_concat(
180
+ audio_pieces,
181
+ crossfade_ms=settings.crossfade_ms,
182
+ sample_rate=sample_rate
183
+ )
184
+
185
+ # Normalize audio levels
186
+ final_audio = normalize_lufs(
187
+ final_audio,
188
+ sample_rate,
189
+ target_lufs=settings.target_lufs
190
+ )
191
+
192
+ processing_time = time.time() - start_time
193
+
194
+ if progress_callback:
195
+ progress_callback(chapter.order, "completed", f"Completed {chapter.title}")
196
+
197
+ return ChapterProcessingResult(
198
+ chapter_index=chapter.order,
199
+ chapter_title=chapter.title,
200
+ status="completed",
201
+ audio_data=final_audio,
202
+ sample_rate=sample_rate,
203
+ processing_time=processing_time,
204
+ word_count=chapter.word_count,
205
+ estimated_duration=chapter.estimated_duration
206
+ )
207
+
208
+ except Exception as e:
209
+ processing_time = time.time() - start_time
210
+ error_msg = f"Error processing {chapter.title}: {str(e)}"
211
+
212
+ if progress_callback:
213
+ progress_callback(chapter.order, "error", error_msg)
214
+
215
+ return ChapterProcessingResult(
216
+ chapter_index=chapter.order,
217
+ chapter_title=chapter.title,
218
+ status="error",
219
+ processing_time=processing_time,
220
+ error_message=error_msg,
221
+ word_count=chapter.word_count,
222
+ estimated_duration=chapter.estimated_duration
223
+ )
224
+
225
+ def process_chapter_with_retry(
226
+ self,
227
+ chapter: Chapter,
228
+ settings: ProcessingSettings,
229
+ max_retries: int = 2,
230
+ progress_callback: Optional[Callable[[int, str, str], None]] = None,
231
+ cancellation_check: Optional[Callable[[], bool]] = None
232
+ ) -> ChapterProcessingResult:
233
+ """
234
+ Process a chapter with retry mechanism for failed attempts.
235
+
236
+ Args:
237
+ chapter: Chapter to process
238
+ settings: Processing settings
239
+ max_retries: Maximum number of retry attempts
240
+ progress_callback: Optional callback for progress updates
241
+
242
+ Returns:
243
+ ChapterProcessingResult with processing status
244
+ """
245
+ last_result = None
246
+
247
+ for attempt in range(max_retries + 1):
248
+ if attempt > 0:
249
+ if progress_callback:
250
+ progress_callback(chapter.order, "processing", f"Retry {attempt} for {chapter.title}")
251
+ time.sleep(1) # Brief pause before retry
252
+
253
+ result = self.process_chapter(chapter, settings, progress_callback, cancellation_check)
254
+ last_result = result
255
+
256
+ if result.status == "completed":
257
+ return result
258
+
259
+ # All retries failed
260
+ if progress_callback:
261
+ progress_callback(chapter.order, "error", f"Failed after {max_retries} retries")
262
+
263
+ return last_result
264
+
265
+ def process_book(
266
+ self,
267
+ chapters: List[Chapter],
268
+ settings: ProcessingSettings,
269
+ progress_callback: Optional[Callable[[int, str, str], None]] = None,
270
+ max_retries: int = 2,
271
+ cancellation_check: Optional[Callable[[], bool]] = None
272
+ ) -> List[ChapterProcessingResult]:
273
+ """
274
+ Process an entire book with multiple chapters.
275
+
276
+ Args:
277
+ chapters: List of chapters to process
278
+ settings: Processing settings
279
+ progress_callback: Optional callback for progress updates
280
+ max_retries: Maximum retries per chapter
281
+
282
+ Returns:
283
+ List of ChapterProcessingResult objects
284
+ """
285
+ results = []
286
+
287
+ for chapter in chapters:
288
+ # Check for cancellation before each chapter
289
+ if cancellation_check and cancellation_check():
290
+ break
291
+
292
+ # Update chapter status
293
+ self.epub_processor.update_chapter_status(chapters, chapter.order, "processing")
294
+
295
+ # Process chapter with retry
296
+ result = self.process_chapter_with_retry(
297
+ chapter,
298
+ settings,
299
+ max_retries,
300
+ progress_callback,
301
+ cancellation_check
302
+ )
303
+
304
+ # Update chapter status based on result
305
+ if result.status == "completed":
306
+ self.epub_processor.update_chapter_status(chapters, chapter.order, "completed")
307
+ else:
308
+ self.epub_processor.update_chapter_status(
309
+ chapters,
310
+ chapter.order,
311
+ "error",
312
+ result.error_message
313
+ )
314
+
315
+ results.append(result)
316
+
317
+ # Memory cleanup after each chapter
318
+ gc.collect()
319
+
320
+ return results
321
+
322
+ def concatenate_chapter_audio(self, results: List[ChapterProcessingResult]) -> Optional[np.ndarray]:
323
+ """
324
+ Concatenate audio from multiple chapter results.
325
+
326
+ Args:
327
+ results: List of ChapterProcessingResult objects
328
+
329
+ Returns:
330
+ Concatenated audio array or None if no valid audio
331
+ """
332
+ valid_audio = []
333
+ sample_rate = None
334
+
335
+ for result in results:
336
+ if result.status == "completed" and result.audio_data is not None:
337
+ valid_audio.append(result.audio_data)
338
+ if sample_rate is None:
339
+ sample_rate = result.sample_rate
340
+
341
+ if not valid_audio:
342
+ return None
343
+
344
+ # Concatenate with crossfade
345
+ return crossfade_concat(valid_audio, crossfade_ms=150, sample_rate=sample_rate)
346
+
347
+ def export_audiobook(
348
+ self,
349
+ results: List[ChapterProcessingResult],
350
+ output_path: str,
351
+ export_individual_chapters: bool = True
352
+ ) -> Dict[str, str]:
353
+ """
354
+ Export processed audiobook to files.
355
+
356
+ Args:
357
+ results: List of ChapterProcessingResult objects
358
+ output_path: Base path for output files
359
+ export_individual_chapters: Whether to export individual chapter files
360
+
361
+ Returns:
362
+ Dictionary with paths to exported files
363
+ """
364
+ output_dir = Path(output_path).parent
365
+ output_dir.mkdir(parents=True, exist_ok=True)
366
+
367
+ exported_files = {}
368
+ sample_rate = None
369
+
370
+ # Export individual chapters
371
+ if export_individual_chapters:
372
+ for result in results:
373
+ if result.status == "completed" and result.audio_data is not None:
374
+ chapter_filename = f"chapter_{result.chapter_index:03d}_{result.chapter_title.replace(' ', '_')}.wav"
375
+ chapter_path = output_dir / chapter_filename
376
+
377
+ save_wav(str(chapter_path), result.audio_data, result.sample_rate)
378
+ exported_files[f"chapter_{result.chapter_index}"] = str(chapter_path)
379
+
380
+ if sample_rate is None:
381
+ sample_rate = result.sample_rate
382
+
383
+ # Export complete audiobook
384
+ concatenated_audio = self.concatenate_chapter_audio(results)
385
+ if concatenated_audio is not None:
386
+ # Use the exact output path specified by the user
387
+ complete_path = Path(output_path)
388
+ save_wav(str(complete_path), concatenated_audio, sample_rate)
389
+ exported_files["complete"] = str(complete_path)
390
+
391
+ return exported_files
392
+
393
+ def get_processing_statistics(self, results: List[ChapterProcessingResult]) -> ProcessingStatistics:
394
+ """
395
+ Get processing statistics from results.
396
+
397
+ Args:
398
+ results: List of ChapterProcessingResult objects
399
+
400
+ Returns:
401
+ ProcessingStatistics object
402
+ """
403
+ total_chapters = len(results)
404
+ completed_chapters = sum(1 for r in results if r.status == "completed")
405
+ failed_chapters = sum(1 for r in results if r.status == "error")
406
+ total_processing_time = sum(r.processing_time for r in results)
407
+ total_word_count = sum(r.word_count for r in results)
408
+ total_audio_duration = sum(r.estimated_duration for r in results)
409
+
410
+ average_processing_time = total_processing_time / total_chapters if total_chapters > 0 else 0.0
411
+
412
+ return ProcessingStatistics(
413
+ total_chapters=total_chapters,
414
+ completed_chapters=completed_chapters,
415
+ failed_chapters=failed_chapters,
416
+ total_processing_time=total_processing_time,
417
+ average_processing_time=average_processing_time,
418
+ total_audio_duration=total_audio_duration,
419
+ total_word_count=total_word_count
420
+ )
421
+
422
+ def cleanup(self):
423
+ """Clean up resources."""
424
+ self.epub_processor.cleanup_temp_files()
425
+ gc.collect()
epub_processor.py ADDED
@@ -0,0 +1,401 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ EPUB processing module for Russian Audiobook Studio.
4
+ Handles EPUB file validation, chapter extraction, and processing coordination.
5
+ """
6
+ import os
7
+ import tempfile
8
+ from typing import List, Optional, Dict, Any
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ import zipfile
12
+ from ebooklib import epub
13
+ from ebooklib.epub import EpubException
14
+
15
+
16
+ @dataclass
17
+ class Chapter:
18
+ """Represents a chapter in an EPUB book."""
19
+ title: str
20
+ content: str
21
+ file_name: str
22
+ order: int
23
+ preview: str # First 100-200 characters for preview
24
+ status: str = "pending" # pending, processing, completed, error
25
+ word_count: int = 0
26
+ estimated_duration: float = 0.0 # Estimated duration in minutes
27
+ error_message: Optional[str] = None
28
+
29
+
30
+ @dataclass
31
+ class EpubValidationResult:
32
+ """Result of EPUB file validation."""
33
+ is_valid: bool
34
+ error_message: Optional[str]
35
+ chapters: List[Chapter]
36
+ book_title: Optional[str]
37
+ book_author: Optional[str]
38
+ total_chapters: int
39
+
40
+
41
+ class EpubValidationError(Exception):
42
+ """Custom exception for EPUB validation errors."""
43
+ pass
44
+
45
+
46
+ class EpubValidator:
47
+ """Validates EPUB files and extracts chapter information."""
48
+
49
+ MAX_FILE_SIZE = 500 * 1024 * 1024 # 500MB limit
50
+ MIN_PREVIEW_LENGTH = 100
51
+ MAX_PREVIEW_LENGTH = 200
52
+
53
+ def __init__(self):
54
+ self.supported_extensions = ['.epub']
55
+
56
+ def validate_file(self, file_path: str) -> EpubValidationResult:
57
+ """
58
+ Validate an EPUB file and extract chapter information.
59
+
60
+ Args:
61
+ file_path: Path to the EPUB file
62
+
63
+ Returns:
64
+ EpubValidationResult with validation status and chapter information
65
+
66
+ Raises:
67
+ EpubValidationError: If validation fails
68
+ """
69
+ if not file_path:
70
+ return EpubValidationResult(
71
+ is_valid=False,
72
+ error_message="No file path provided",
73
+ chapters=[],
74
+ book_title=None,
75
+ book_author=None,
76
+ total_chapters=0
77
+ )
78
+
79
+ # Check if file exists
80
+ if not os.path.exists(file_path):
81
+ return EpubValidationResult(
82
+ is_valid=False,
83
+ error_message=f"File does not exist: {file_path}",
84
+ chapters=[],
85
+ book_title=None,
86
+ book_author=None,
87
+ total_chapters=0
88
+ )
89
+
90
+ # Check file extension
91
+ if not self._is_epub_file(file_path):
92
+ return EpubValidationResult(
93
+ is_valid=False,
94
+ error_message="File is not an EPUB file. Please upload a .epub file.",
95
+ chapters=[],
96
+ book_title=None,
97
+ book_author=None,
98
+ total_chapters=0
99
+ )
100
+
101
+ # Check file size
102
+ file_size = os.path.getsize(file_path)
103
+ if file_size == 0:
104
+ return EpubValidationResult(
105
+ is_valid=False,
106
+ error_message="File is empty",
107
+ chapters=[],
108
+ book_title=None,
109
+ book_author=None,
110
+ total_chapters=0
111
+ )
112
+
113
+ if file_size > self.MAX_FILE_SIZE:
114
+ return EpubValidationResult(
115
+ is_valid=False,
116
+ error_message=f"File is too large. Maximum size is {self.MAX_FILE_SIZE // (1024*1024)}MB",
117
+ chapters=[],
118
+ book_title=None,
119
+ book_author=None,
120
+ total_chapters=0
121
+ )
122
+
123
+ # Try to parse the EPUB
124
+ try:
125
+ return self._parse_epub(file_path)
126
+ except EpubException as e:
127
+ return EpubValidationResult(
128
+ is_valid=False,
129
+ error_message=f"Invalid EPUB file: {str(e)}",
130
+ chapters=[],
131
+ book_title=None,
132
+ book_author=None,
133
+ total_chapters=0
134
+ )
135
+ except Exception as e:
136
+ return EpubValidationResult(
137
+ is_valid=False,
138
+ error_message=f"Error reading EPUB file: {str(e)}",
139
+ chapters=[],
140
+ book_title=None,
141
+ book_author=None,
142
+ total_chapters=0
143
+ )
144
+
145
+ def _is_epub_file(self, file_path: str) -> bool:
146
+ """Check if file has EPUB extension."""
147
+ return Path(file_path).suffix.lower() in self.supported_extensions
148
+
149
+ def _parse_epub(self, file_path: str) -> EpubValidationResult:
150
+ """Parse EPUB file and extract chapter information."""
151
+ try:
152
+ book = epub.read_epub(file_path)
153
+
154
+ # Extract book metadata
155
+ book_title = book.get_metadata('DC', 'title')
156
+ book_author = book.get_metadata('DC', 'creator')
157
+
158
+ title = book_title[0][0] if book_title else "Unknown Title"
159
+ author = book_author[0][0] if book_author else "Unknown Author"
160
+
161
+ # Extract chapters
162
+ chapters = self._extract_chapters(book)
163
+
164
+ if not chapters:
165
+ return EpubValidationResult(
166
+ is_valid=False,
167
+ error_message="No readable chapters found in EPUB file",
168
+ chapters=[],
169
+ book_title=title,
170
+ book_author=author,
171
+ total_chapters=0
172
+ )
173
+
174
+ return EpubValidationResult(
175
+ is_valid=True,
176
+ error_message=None,
177
+ chapters=chapters,
178
+ book_title=title,
179
+ book_author=author,
180
+ total_chapters=len(chapters)
181
+ )
182
+
183
+ except Exception as e:
184
+ raise EpubValidationError(f"Failed to parse EPUB: {str(e)}")
185
+
186
+ def _extract_chapters(self, book: epub.EpubBook) -> List[Chapter]:
187
+ """Extract chapters from EPUB book."""
188
+ chapters = []
189
+ chapter_order = 0
190
+
191
+ # Try to get items from spine first (reading order)
192
+ spine_items = []
193
+ if hasattr(book, 'spine') and book.spine:
194
+ for item_id, linear in book.spine:
195
+ if not linear:
196
+ continue
197
+ item = book.get_item_with_id(item_id)
198
+ if item:
199
+ spine_items.append(item)
200
+
201
+ # If no spine items, get all document items
202
+ if not spine_items:
203
+ spine_items = [item for item in book.get_items() if item.get_type() == 9] # 9 = HTML document type
204
+
205
+ # Process each item
206
+ for item in spine_items:
207
+ # Check if item is HTML content
208
+ if item.get_type() != 9: # 9 = HTML document type
209
+ continue
210
+
211
+ # Extract text content
212
+ content = self._extract_text_content(item)
213
+ if not content or len(content.strip()) < 50: # Skip very short chapters
214
+ continue
215
+
216
+ # Create chapter
217
+ chapter = Chapter(
218
+ title=self._get_chapter_title(item, chapter_order),
219
+ content=content,
220
+ file_name=item.get_name(),
221
+ order=chapter_order,
222
+ preview=self._create_preview(content),
223
+ word_count=self._count_words(content),
224
+ estimated_duration=self._estimate_duration(content)
225
+ )
226
+
227
+ chapters.append(chapter)
228
+ chapter_order += 1
229
+
230
+ return chapters
231
+
232
+ def _extract_text_content(self, item) -> str:
233
+ """Extract text content from EPUB item."""
234
+ try:
235
+ # Get content and handle different encodings
236
+ raw_content = item.get_content()
237
+ if isinstance(raw_content, bytes):
238
+ # Try different encodings
239
+ for encoding in ['utf-8', 'latin-1', 'cp1252']:
240
+ try:
241
+ content = raw_content.decode(encoding)
242
+ break
243
+ except UnicodeDecodeError:
244
+ continue
245
+ else:
246
+ # Fallback to utf-8 with errors='ignore'
247
+ content = raw_content.decode('utf-8', errors='ignore')
248
+ else:
249
+ content = str(raw_content)
250
+
251
+ # Basic HTML tag removal (simple approach)
252
+ import re
253
+ # Remove HTML tags
254
+ content = re.sub(r'<[^>]+>', '', content)
255
+ # Clean up whitespace
256
+ content = re.sub(r'\s+', ' ', content).strip()
257
+ return content
258
+ except Exception as e:
259
+ print(f"Warning: Could not extract content from {item.get_name()}: {e}")
260
+ return ""
261
+
262
+ def _get_chapter_title(self, item, order: int) -> str:
263
+ """Get chapter title from item or generate default."""
264
+ # Try to extract title from content
265
+ try:
266
+ raw_content = item.get_content()
267
+ if isinstance(raw_content, bytes):
268
+ content = raw_content.decode('utf-8', errors='ignore')
269
+ else:
270
+ content = str(raw_content)
271
+
272
+ import re
273
+ # Look for h1, h2, h3 tags
274
+ title_match = re.search(r'<h[1-3][^>]*>([^<]+)</h[1-3]>', content, re.IGNORECASE)
275
+ if title_match:
276
+ title = title_match.group(1).strip()
277
+ # Clean up the title
278
+ title = re.sub(r'<[^>]+>', '', title) # Remove any remaining HTML tags
279
+ title = re.sub(r'\s+', ' ', title).strip() # Clean whitespace
280
+ if title:
281
+ return title
282
+ except Exception:
283
+ pass
284
+
285
+ # Try to get title from item metadata
286
+ try:
287
+ if hasattr(item, 'title') and item.title:
288
+ return str(item.title)
289
+ except Exception:
290
+ pass
291
+
292
+ # Fallback to file name or default
293
+ file_name = item.get_name()
294
+ if file_name:
295
+ # Clean up file name to make it more readable
296
+ clean_name = Path(file_name).stem
297
+ clean_name = clean_name.replace('_', ' ').replace('-', ' ')
298
+ clean_name = re.sub(r'\d+', '', clean_name) # Remove numbers
299
+ clean_name = clean_name.strip()
300
+ if clean_name:
301
+ return clean_name.title()
302
+
303
+ return f"Chapter {order + 1}"
304
+
305
+ def _create_preview(self, content: str) -> str:
306
+ """Create preview text from chapter content."""
307
+ if not content:
308
+ return ""
309
+
310
+ # Clean content for preview
311
+ preview = content.strip()
312
+
313
+ # Truncate to reasonable length
314
+ if len(preview) > self.MAX_PREVIEW_LENGTH:
315
+ preview = preview[:self.MAX_PREVIEW_LENGTH]
316
+ # Try to end at a sentence boundary
317
+ last_period = preview.rfind('.')
318
+ if last_period > self.MIN_PREVIEW_LENGTH:
319
+ preview = preview[:last_period + 1]
320
+ else:
321
+ preview = preview + "..."
322
+
323
+ return preview
324
+
325
+ def _count_words(self, content: str) -> int:
326
+ """Count words in content."""
327
+ if not content:
328
+ return 0
329
+
330
+ # Simple word counting - split by whitespace and filter empty strings
331
+ words = [word for word in content.split() if word.strip()]
332
+ return len(words)
333
+
334
+ def _estimate_duration(self, content: str) -> float:
335
+ """Estimate audio duration in minutes based on content length."""
336
+ if not content:
337
+ return 0.0
338
+
339
+ # Estimate based on average reading speed
340
+ # Russian text: ~150-200 words per minute for speech synthesis
341
+ # We'll use 180 words per minute as a reasonable estimate
342
+ word_count = self._count_words(content)
343
+ duration_minutes = word_count / 180.0
344
+
345
+ # Add some buffer for processing time
346
+ return round(duration_minutes * 1.1, 1)
347
+
348
+
349
+ class EpubProcessor:
350
+ """Main EPUB processor for handling EPUB files in the web interface."""
351
+
352
+ def __init__(self):
353
+ self.validator = EpubValidator()
354
+ self.temp_dir = tempfile.mkdtemp(prefix="epub_processing_")
355
+
356
+ def process_epub_upload(self, file_path: str) -> EpubValidationResult:
357
+ """
358
+ Process an uploaded EPUB file.
359
+
360
+ Args:
361
+ file_path: Path to uploaded EPUB file
362
+
363
+ Returns:
364
+ EpubValidationResult with validation status and chapter information
365
+ """
366
+ return self.validator.validate_file(file_path)
367
+
368
+ def update_chapter_status(self, chapters: List[Chapter], chapter_index: int, status: str, error_message: Optional[str] = None):
369
+ """Update the status of a specific chapter."""
370
+ if 0 <= chapter_index < len(chapters):
371
+ chapters[chapter_index].status = status
372
+ if error_message:
373
+ chapters[chapter_index].error_message = error_message
374
+
375
+ def get_chapter_status_summary(self, chapters: List[Chapter]) -> Dict[str, int]:
376
+ """Get a summary of chapter statuses."""
377
+ summary = {"pending": 0, "processing": 0, "completed": 0, "error": 0}
378
+ for chapter in chapters:
379
+ if chapter.status in summary:
380
+ summary[chapter.status] += 1
381
+ return summary
382
+
383
+ def get_total_estimated_duration(self, chapters: List[Chapter]) -> float:
384
+ """Get total estimated duration for all chapters."""
385
+ return sum(chapter.estimated_duration for chapter in chapters)
386
+
387
+ def get_total_word_count(self, chapters: List[Chapter]) -> int:
388
+ """Get total word count for all chapters."""
389
+ return sum(chapter.word_count for chapter in chapters)
390
+
391
+ def cleanup_temp_files(self):
392
+ """Clean up temporary files."""
393
+ import shutil
394
+ try:
395
+ shutil.rmtree(self.temp_dir, ignore_errors=True)
396
+ except Exception:
397
+ pass
398
+
399
+ def __del__(self):
400
+ """Cleanup on destruction."""
401
+ self.cleanup_temp_files()
requirements.txt ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies for Hugging Face Spaces deployment
2
+ gradio>=4.44.0
3
+ numpy>=1.24.0
4
+ soundfile>=0.12.1
5
+ pydub>=0.25.1
6
+ librosa>=0.10.1
7
+ tqdm>=4.66.0
8
+ pyloudnorm>=0.1.1
9
+
10
+ # Russian linguistics for automatic accent placement
11
+ ruaccent>=1.5.7
12
+
13
+ # Hugging Face / TTS stack
14
+ huggingface_hub>=0.23.0
15
+ f5-tts>=0.1.1
16
+ datasets>=2.20.0
17
+
18
+ # Audiobook generation and metadata
19
+ mutagen>=1.47.0
20
+
21
+ # PyTorch for Hugging Face Spaces (CPU version for compatibility)
22
+ torch>=2.0.0
23
+ torchaudio>=2.0.0
24
+
25
+ # Additional dependencies for EPUB processing
26
+ ebooklib>=0.18
27
+ beautifulsoup4>=4.12.0
28
+ lxml>=4.9.0
29
+
30
+ # Audio processing
31
+ scipy>=1.10.0
samples/001/sample.text ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ — До свидания, дорогая.
2
+ — До свидания, любимый.
3
+ Аликс Мартин стояла, прислонясь к маленькой, грубо сколоченной калитке, и смотрела вслед му
synth_batch.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse, os, sys, re
3
+ import numpy as np
4
+ import soundfile as sf
5
+ from tqdm import tqdm
6
+ from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent
7
+ from utils.audio import crossfade_concat, normalize_lufs, save_wav
8
+ from backends.espeech_backend import EspeechBackend
9
+
10
+ def read_input(path: str) -> str:
11
+ if path.lower().endswith(".txt"):
12
+ with open(path, "r", encoding="utf-8") as f:
13
+ return f.read()
14
+ elif path.lower().endswith(".epub"):
15
+ try:
16
+ from ebooklib import epub
17
+ from bs4 import BeautifulSoup
18
+ except Exception:
19
+ print("Для EPUB установите: pip install ebooklib beautifulsoup4 lxml", file=sys.stderr)
20
+ sys.exit(2)
21
+ book = epub.read_epub(path)
22
+ texts = []
23
+ for item in book.get_items():
24
+ if item.get_type() == 9: # DOCUMENT
25
+ soup = BeautifulSoup(item.get_body_content(), "lxml")
26
+ texts.append(soup.get_text(" ", strip=True))
27
+ return "\n\n".join(texts)
28
+ else:
29
+ raise ValueError("Поддерживаются .txt и .epub")
30
+
31
+ def load_default_ref_text():
32
+ """Load default reference text from local sample"""
33
+ try:
34
+ with open("samples/001/sample.text", "r", encoding="utf-8") as f:
35
+ return f.read().strip()
36
+ except FileNotFoundError:
37
+ return ""
38
+
39
+ def main():
40
+ ap = argparse.ArgumentParser()
41
+ ap.add_argument("--input", required=True, help="Путь к TXT/EPUB")
42
+ ap.add_argument("--outdir", required=True, help="Каталог для результата")
43
+ ap.add_argument("--ref-audio", required=False, default="samples/001/sample.mp3", help="Путь к референс-аудио (6–12с)")
44
+ ap.add_argument("--ref-text", required=False, default=load_default_ref_text(), help="Референс-текст")
45
+ ap.add_argument("--model-repo", default=os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2"))
46
+ ap.add_argument("--speed", type=float, default=1.0)
47
+ ap.add_argument("--nfe-steps", type=int, default=48)
48
+ ap.add_argument("--crossfade-ms", type=int, default=150)
49
+ ap.add_argument("--target-lufs", type=float, default=-20.0)
50
+ args = ap.parse_args()
51
+
52
+ os.makedirs(args.outdir, exist_ok=True)
53
+
54
+ backend = EspeechBackend(model_id=args.model_repo)
55
+
56
+ raw = read_input(args.input)
57
+ text = normalize_text(raw)
58
+ paragraphs = split_into_paragraphs(text)
59
+ paragraphs = [maybe_ruaccent(p) for p in paragraphs]
60
+
61
+ print(f"Абзацев: {len(paragraphs)}")
62
+
63
+ pieces = []
64
+ sr = None
65
+ for i, para in enumerate(tqdm(paragraphs, desc="Генерация")):
66
+ audio, sr = backend.synthesize(
67
+ text=para,
68
+ ref_audio_path=args.ref_audio,
69
+ ref_text=args.ref_text,
70
+ speed=args.speed,
71
+ nfe_steps=args.nfe_steps,
72
+ seed=None,
73
+ )
74
+ pieces.append(audio)
75
+ # Сохраняем черновики по абзацам (опционально)
76
+ # sf.write(os.path.join(args.outdir, f"para_{i:05d}.wav"), audio, sr)
77
+
78
+ final = crossfade_concat(pieces, crossfade_ms=args.crossfade_ms, sample_rate=sr)
79
+ final = normalize_lufs(final, sr, target_lufs=args.target_lufs)
80
+
81
+ out_path = os.path.join(args.outdir, "book.wav")
82
+ save_wav(out_path, final, sr)
83
+ print(f"ГОТОВО: {out_path}")
84
+
85
+ if __name__ == "__main__":
86
+ main()
utils/audio.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/audio.py
2
+ from typing import List
3
+ import numpy as np
4
+ import soundfile as sf
5
+ from pydub import AudioSegment
6
+ import pyloudnorm as pyln
7
+
8
+ def _to_audiosegment(wav: np.ndarray, sr: int) -> AudioSegment:
9
+ wav16 = np.int16(np.clip(wav, -1.0, 1.0) * 32767.0)
10
+ return AudioSegment(
11
+ wav16.tobytes(), frame_rate=sr, sample_width=2, channels=1
12
+ )
13
+
14
+ def crossfade_concat(pieces: List[np.ndarray], crossfade_ms: int, sample_rate: int) -> np.ndarray:
15
+ if not pieces:
16
+ return np.zeros(1, dtype=np.float32)
17
+ seg = _to_audiosegment(pieces[0], sample_rate)
18
+ for p in pieces[1:]:
19
+ seg_next = _to_audiosegment(p, sample_rate)
20
+ seg = seg.append(seg_next, crossfade=crossfade_ms)
21
+ # Вернём в float32 [-1..1]
22
+ arr = np.array(seg.get_array_of_samples()).astype(np.int16).astype(np.float32) / 32767.0
23
+ return arr
24
+
25
+ def normalize_lufs(wav: np.ndarray, sr: int, target_lufs: float = -20.0) -> np.ndarray:
26
+ meter = pyln.Meter(sr)
27
+ loudness = meter.integrated_loudness(wav)
28
+ gain = target_lufs - loudness
29
+ wav_norm = pyln.normalize.loudness(wav, loudness, target_lufs)
30
+ # Клиппинг-сафети
31
+ wav_norm = np.clip(wav_norm, -0.999, 0.999).astype(np.float32)
32
+ return wav_norm
33
+
34
+ def save_wav(path: str, wav: np.ndarray, sr: int):
35
+ sf.write(path, wav, sr, subtype="PCM_16")
utils/text.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/text.py
2
+ import re
3
+
4
+ def normalize_text(text: str) -> str:
5
+ # Простая нормализация: убираем лишние пробелы, приводим переносы.
6
+ text = text.replace("\r\n", "\n").replace("\r", "\n")
7
+ text = re.sub(r"[ \t]+", " ", text)
8
+ text = re.sub(r"\n{3,}", "\n\n", text).strip()
9
+ return text
10
+
11
+ def split_into_paragraphs(text: str):
12
+ # Делим по пустым строкам; при желании можно делить по длине/точкам.
13
+ parts = [p.strip() for p in text.split("\n\n")]
14
+ parts = [p for p in parts if p]
15
+ return parts
16
+
17
+ def maybe_ruaccent(text: str) -> str:
18
+ """Если установлен ruaccent, проставить ударения автоматически.
19
+ Если нет — вернуть исходный текст.
20
+ """
21
+ try:
22
+ from ruaccent import RUAccent
23
+ acc = RUAccent()
24
+ # Use turbo3.1 model for better accuracy and speed
25
+ acc.load(omograph_model_size="turbo3.1", use_dictionary=True, tiny_mode=False)
26
+ return acc.process_all(text)
27
+ except Exception as e:
28
+ print(f"Warning: ruaccent failed, using original text: {e}")
29
+ return text
30
+
31
+ def maybe_ruaccent_advanced(text: str, model_size: str = "turbo3.1", use_dict: bool = True) -> str:
32
+ """Enhanced ruaccent with configurable parameters for better voice quality."""
33
+ try:
34
+ from ruaccent import RUAccent
35
+ acc = RUAccent()
36
+ acc.load(omograph_model_size=model_size, use_dictionary=use_dict, tiny_mode=False)
37
+ return acc.process_all(text)
38
+ except Exception as e:
39
+ print(f"Warning: ruaccent failed, using original text: {e}")
40
+ return text