Spaces:

danilahs
/

audiobook-ru-tts

Running

App Files Files Community

danilahs commited on Sep 7

Commit

4f6648e

verified ·

1 Parent(s): e50ddee

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitignore +41 -0
README.md +71 -8
app.py +725 -0
audiobook_generator.py +499 -0
backends/espeech_backend.py +135 -0
epub_processing_pipeline.py +425 -0
epub_processor.py +401 -0
requirements.txt +31 -0
samples/001/sample.text +3 -0
synth_batch.py +86 -0
utils/audio.py +35 -0
utils/text.py +40 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,41 @@

+# Hugging Face Spaces
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+env/
+venv/
+.venv/
+pip-log.txt
+pip-delete-this-directory.txt
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.git
+.mypy_cache
+.pytest_cache
+.hypothesis
+# Audio files
+temp_audio/
+output/
+*.wav
+*.mp3
+*.m4a
+# Model cache
+.cache/
+models/
+checkpoints/
+# Temporary files
+*.tmp
+*.temp
+.DS_Store
+Thumbs.db

README.md CHANGED Viewed

@@ -1,14 +1,77 @@
 ---
-title: Audiobook Ru Tts
-emoji: 🚀
-colorFrom: gray
-colorTo: pink
 sdk: gradio
-sdk_version: 5.44.1
 app_file: app.py
 pinned: false
-license: apache-2.0
-short_description: 'Create high-quality Russian audiobooks from EPUB files with '
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Russian Audiobook Studio
+emoji: 🎧
+colorFrom: blue
+colorTo: purple
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: mit
+short_description: Create Russian audiobooks from EPUB files with advanced TTS
 ---
+# 🎧 Russian Audiobook Studio
+Transform your Russian EPUB books into professional-quality audiobooks using advanced Text-to-Speech technology with automatic accent placement and voice customization.
+## ✨ Features
+- **📚 EPUB Processing**: Upload EPUB files and automatically extract chapters
+- **🎯 Chapter Selection**: Choose specific chapters to process with checkbox interface
+- **🎵 Advanced TTS**: High-quality Russian TTS with automatic accent placement using ruaccent
+- **🎛️ Voice Controls**: Customize voice clarity, variation, and volume levels
+- **🎧 Audio Preview**: Built-in audio players for chapter previews
+- **📥 Download Options**: Download individual chapters or complete audiobook
+- **📱 Responsive Design**: Works on desktop and mobile devices
+- **♿ Accessibility**: Full keyboard navigation and screen reader support
+## 🚀 How to Use
+1. **Upload EPUB**: Click "Choose File" and select your Russian EPUB book
+2. **Validate**: Click "Validate EPUB" to extract chapters and book information
+3. **Select Chapters**: Choose which chapters to process using the checkbox interface
+4. **Adjust Settings**: Customize voice parameters in Advanced Settings
+5. **Process**: Click "Process Selected Chapters" to generate audiobook
+6. **Preview & Download**: Listen to previews and download your audiobook
+## ⚙️ Advanced Settings
+- **Speech Speed**: Adjust playback speed (0.6x to 1.4x)
+- **Quality Steps**: Higher values = better quality, longer processing time
+- **Voice Clarity**: Control how closely voice follows reference (1.0-4.0)
+- **Voice Variation**: Control natural voice variation (-2.0 to 1.0)
+- **Volume Level**: Target volume level for generated audio
+## 🎯 Technical Features
+- **Automatic Accent Placement**: Uses ruaccent library for perfect Russian pronunciation
+- **Professional Audio Quality**: 24kHz, 16-bit, mono WAV output
+- **Real-time Progress**: Live updates during processing with cancellation support
+- **Error Handling**: Robust error handling with detailed status messages
+- **File Management**: Automatic file serving and download functionality
+## 📋 Requirements
+- Russian EPUB files with proper text encoding
+- Reference audio sample (6-12 seconds) for voice cloning
+- Reference text (transcript of the reference audio)
+## 🔧 Technical Stack
+- **TTS Engine**: ESpeech-TTS-1_RL-V2 (F5-TTS)
+- **Accent Processing**: ruaccent for automatic Russian stress placement
+- **Audio Processing**: librosa, soundfile, pydub
+- **Web Interface**: Gradio with responsive design
+- **EPUB Processing**: ebooklib for book parsing
+## 📝 Notes
+- Processing time depends on chapter length and quality settings
+- Higher quality settings require more processing time
+- All audio files are temporarily stored for preview and download
+- The system automatically handles Russian text normalization and accent placement
+## 🎉 Ready to Create Audiobooks?
+Upload your Russian EPUB file and start creating professional-quality audiobooks in minutes!

app.py ADDED Viewed

	@@ -0,0 +1,725 @@

+#!/usr/bin/env python3
+import os
+import gradio as gr
+import numpy as np
+import soundfile as sf
+from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent, maybe_ruaccent_advanced
+from utils.audio import crossfade_concat, normalize_lufs, save_wav
+from backends.espeech_backend import EspeechBackend
+from epub_processor import EpubProcessor
+from epub_processing_pipeline import EpubProcessingPipeline, ProcessingSettings
+DEFAULT_MODEL = os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2")
+backend = EspeechBackend(model_id=DEFAULT_MODEL)
+epub_processor = EpubProcessor()
+processing_pipeline = EpubProcessingPipeline(epub_processor, backend)
+# Global processing state
+processing_cancelled = False
+# Load default reference text from local sample
+def load_default_ref_text():
+    try:
+        with open("samples/001/sample.text", "r", encoding="utf-8") as f:
+            return f.read().strip()
+    except FileNotFoundError:
+        return ""
+# EPUB upload validation function
+def validate_epub_upload(file_path):
+    """Validate uploaded EPUB file and return status."""
+    if not file_path:
+        return (
+            "📁 **Upload Status:** No file uploaded",
+            "⏳ **Validation:** Waiting for file upload",
+            "⏳ **Processing:** Not started",
+            None, None, None, [], []
+        )
+    try:
+        # Update validation status
+        validation_status = "🔄 **Validation:** Processing EPUB file..."
+        result = epub_processor.process_epub_upload(file_path)
+        if result.is_valid:
+            # Create enhanced chapter list for display
+            chapter_list = []
+            total_words = 0
+            total_duration = 0.0
+            for i, chapter in enumerate(result.chapters):
+                total_words += chapter.word_count
+                total_duration += chapter.estimated_duration
+                # Create chapter info with metadata
+                chapter_info = f"**{i+1}. {chapter.title}**\n"
+                chapter_info += f"📊 Words: {chapter.word_count:,} | ⏱️ Est. Duration: {chapter.estimated_duration:.1f} min\n"
+                chapter_info += f"📄 Preview: {chapter.preview}\n"
+                chapter_info += f"📁 File: {chapter.file_name}"
+                chapter_list.append(chapter_info)
+            chapter_display = "\n\n".join(chapter_list)
+            upload_status = f"✅ **Upload Status:** EPUB file uploaded successfully"
+            validation_status = f"✅ **Validation:** EPUB file is valid and ready for processing"
+            processing_status = f"🚀 **Processing:** Ready to process {result.total_chapters} chapters"
+            # Enhanced status message with statistics
+            status_message = f"""**📚 Book Information:**
+**Title:** {result.book_title}
+**Author:** {result.book_author}
+**Chapters:** {result.total_chapters}
+**Total Words:** {total_words:,}
+**Estimated Total Duration:** {total_duration:.1f} minutes
+**📖 Chapter Details:**
+{chapter_display}"""
+            # Create chapter selection interface
+            chapter_choices, default_selection = create_chapter_selection_interface(result.chapters)
+            return (
+                upload_status,
+                validation_status,
+                processing_status,
+                status_message,
+                result.chapters,
+                result.book_title,
+                result.book_author,
+                chapter_choices,
+                default_selection
+            )
+        else:
+            upload_status = f"❌ **Upload Status:** EPUB file validation failed"
+            validation_status = f"❌ **Validation:** {result.error_message}"
+            processing_status = f"⏳ **Processing:** Cannot proceed due to validation error"
+            return (
+                upload_status,
+                validation_status,
+                processing_status,
+                f"**Error Details:** {result.error_message}",
+                None, None, None, [], []
+            )
+    except Exception as e:
+        upload_status = f"❌ **Upload Status:** Error processing file"
+        validation_status = f"❌ **Validation:** {str(e)}"
+        processing_status = f"⏳ **Processing:** Cannot proceed due to error"
+        return (
+            upload_status,
+            validation_status,
+            processing_status,
+            f"**Error Details:** {str(e)}",
+            None, None, None, [], []
+        )
+def stop_epub_processing():
+    """Stop the current EPUB processing."""
+    global processing_cancelled
+    processing_cancelled = True
+    return "🛑 **Processing Stopped:** User requested to stop processing"
+def create_chapter_selection_interface(chapters):
+    """Create chapter selection interface data structure."""
+    if not chapters:
+        return [], []
+    # Create choices for checkbox group
+    choices = []
+    for i, chapter in enumerate(chapters):
+        choice_text = f"{i+1}. {chapter.title}"
+        choices.append(choice_text)
+    # All chapters selected by default (return the choice strings, not indices)
+    default_selection = choices  # Select all choices by default
+    return choices, default_selection
+def get_selected_chapters(chapters, selected_choices):
+    """Get the actual chapter objects for selected choices."""
+    if not chapters or not selected_choices:
+        return []
+    selected_chapters = []
+    for choice in selected_choices:
+        # Extract chapter index from choice string (format: "1. Chapter Title")
+        try:
+            chapter_index = int(choice.split('.')[0]) - 1  # Convert to 0-based index
+            if 0 <= chapter_index < len(chapters):
+                selected_chapters.append(chapters[chapter_index])
+        except (ValueError, IndexError):
+            # Skip invalid choices
+            continue
+    return selected_chapters
+def update_chapter_selection_interface(chapters, choices, default_selection):
+    """Update the chapter selection interface after EPUB validation."""
+    if chapters is not None:
+        selection_summary = get_selection_summary(default_selection, len(chapters))
+        return (
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(choices=choices, value=default_selection, visible=True),
+            gr.update(visible=True),  # Show selection buttons
+            gr.update(value=f"📊 **Selection:** {selection_summary}", visible=True)
+        )
+    else:
+        return (
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(choices=[], value=[], visible=False),
+            gr.update(visible=False),  # Hide selection buttons
+            gr.update(value="", visible=False)
+        )
+def save_chapter_selection_state(selected_choices, book_title, book_author):
+    """Save chapter selection state to session."""
+    # This will be handled by Gradio's built-in state management
+    # The selected_choices will be automatically stored in the component's state
+    return selected_choices
+def restore_chapter_selection_state(chapters, book_title, book_author):
+    """Restore chapter selection state from session."""
+    # If we have chapters and this is the same book, restore previous selection
+    # Otherwise, return all chapters selected by default
+    if chapters is not None:
+        # For now, we'll use all chapters selected by default
+        # In a more advanced implementation, we could check session state
+        # and restore previous selections for the same book
+        choices, _ = create_chapter_selection_interface(chapters)
+        return choices  # Return all choices selected by default
+    return []
+def get_selection_summary(selected_choices, total_chapters):
+    """Get a summary of the current selection."""
+    if not selected_choices:
+        return "No chapters selected"
+    selected_count = len(selected_choices)
+    if selected_count == total_chapters:
+        return f"All {total_chapters} chapters selected"
+    else:
+        return f"{selected_count} of {total_chapters} chapters selected"
+def select_all_chapters(chapter_choices):
+    """Select all available chapters."""
+    return chapter_choices if chapter_choices else []
+def deselect_all_chapters():
+    """Deselect all chapters."""
+    return []
+def create_chapter_preview_players(processing_results, book_title, exported_files=None):
+    """Create chapter preview information for display."""
+    if not processing_results or not hasattr(processing_results, 'chapters'):
+        return []
+    chapter_info = []
+    for i, chapter in enumerate(processing_results.chapters):
+        # Use exported file path if available, otherwise construct expected path
+        chapter_key = f"chapter_{i}"
+        if exported_files and chapter_key in exported_files:
+            audio_file_path = exported_files[chapter_key]
+        else:
+            # Fallback to expected path based on export naming pattern
+            safe_title = chapter.title.replace(' ', '_').replace(':', '').replace(',', '').replace('.', '')
+            audio_file_path = f"output/{book_title.replace(' ', '_')}/chapter_{i:03d}_{safe_title}.wav"
+        chapter_info.append({
+            'title': chapter.title,
+            'file_path': audio_file_path,
+            'duration': chapter.estimated_duration,
+            'word_count': chapter.word_count
+        })
+    return chapter_info
+def create_preview_ui_html(players):
+    """Create the complete preview UI HTML."""
+    if not players:
+        return "<p>No chapters available for preview.</p>"
+    html = f"""
+    <div class="chapter-preview-container" style="margin: 20px 0; padding: 20px; border: 2px solid #e1e8ed; border-radius: 12px; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);" role="region" aria-label="Chapter previews section">
+        <h3 style="margin: 0 0 15px 0; color: #2c3e50; font-size: 24px; text-align: center;" id="previews-heading">🎵 Chapter Previews</h3>
+        <p style="color: #666; margin-bottom: 20px; text-align: center; font-size: 14px;" aria-describedby="previews-heading">
+            Listen to individual chapters before downloading the complete audiobook.
+            Each player includes full controls for play, pause, seeking, and volume adjustment.
+        </p>
+        <div class="players-list" style="max-height: 600px; overflow-y: auto; padding-right: 10px;" role="list" aria-label="List of chapter audio players">
+            {''.join(players)}
+        </div>
+        <style>
+            @media (max-width: 768px) {{
+                .chapter-preview-container {{
+                    padding: 15px !important;
+                    margin: 10px 0 !important;
+                }}
+                .chapter-preview-container h3 {{
+                    font-size: 20px !important;
+                }}
+                .chapter-preview-container p {{
+                    font-size: 12px !important;
+                }}
+                .players-list {{
+                    max-height: 400px !important;
+                }}
+            }}
+            @media (max-width: 480px) {{
+                .chapter-preview-container {{
+                    padding: 10px !important;
+                }}
+                .chapter-preview-container h3 {{
+                    font-size: 18px !important;
+                }}
+                .players-list {{
+                    max-height: 300px !important;
+                }}
+            }}
+        </style>
+    </div>
+    """
+    return html
+def get_audio_file_url(file_path):
+    """Convert file path to URL for serving audio files."""
+    if not file_path:
+        return ""
+    # For Gradio, we need to use the file path directly
+    # Gradio will handle the file serving automatically
+    if os.path.isabs(file_path):
+        # Return the absolute path - Gradio can serve it
+        return file_path
+    else:
+        # Return the relative path
+        return file_path
+def copy_file_for_serving(file_path, target_dir="temp_audio"):
+    """Copy file to a directory that Gradio can serve."""
+    if not file_path or not os.path.exists(file_path):
+        return None
+    # Create target directory if it doesn't exist
+    os.makedirs(target_dir, exist_ok=True)
+    # Get filename and copy to target directory
+    filename = os.path.basename(file_path)
+    target_path = os.path.join(target_dir, filename)
+    try:
+        import shutil
+        shutil.copy2(file_path, target_path)
+        return target_path
+    except Exception as e:
+        print(f"Error copying file {file_path} to {target_path}: {e}")
+        return None
+def process_epub_book(chapters, selected_choices, book_title, book_author, speed, nfe_steps, crossfade_ms, seed, target_rms, cfg_strength, sway_coef):
+    """Process selected chapters from an EPUB book into audiobook."""
+    global processing_cancelled
+    processing_cancelled = False  # Reset cancellation flag
+    if not chapters:
+        return "❌ **Error:** No chapters available", None, None, ""
+    # Get selected chapters
+    selected_chapters = get_selected_chapters(chapters, selected_choices)
+    if not selected_chapters:
+        return "❌ **Error:** No chapters selected for processing", None, None, ""
+    try:
+        # Create processing settings with enhanced voice parameters
+        settings = ProcessingSettings(
+            speed=speed,
+            nfe_steps=nfe_steps,
+            crossfade_ms=crossfade_ms,
+            seed=seed if seed > 0 else None,
+            target_rms=target_rms,
+            cfg_strength=cfg_strength,
+            sway_sampling_coef=sway_coef
+        )
+        # Progress tracking
+        progress_messages = []
+        def progress_callback(chapter_index, status, message):
+            # Check for cancellation
+            if processing_cancelled:
+                return
+            # Show actual chapter title instead of index for better clarity
+            if chapter_index < len(selected_chapters):
+                chapter_title = selected_chapters[chapter_index].title
+                # Add visual status indicators
+                status_emoji = {
+                    'pending': '⏳',
+                    'processing': '🔄',
+                    'completed': '✅',
+                    'error': '❌',
+                    'cancelled': '🛑'
+                }.get(status.lower(), '📝')
+                progress_messages.append(f"{status_emoji} **{chapter_title}**: {status.title()} - {message}")
+            else:
+                progress_messages.append(f"📝 Chapter {chapter_index + 1}: {status} - {message}")
+        def cancellation_check():
+            return processing_cancelled
+        # Process the selected chapters
+        results = processing_pipeline.process_book(selected_chapters, settings, progress_callback, cancellation_check=cancellation_check)
+        # Check if processing was cancelled
+        if processing_cancelled:
+            return "🛑 **Processing Cancelled:** User stopped the processing", None, None, ""
+        # Get statistics
+        stats = processing_pipeline.get_processing_statistics(results)
+        # Create enhanced status message with visual feedback
+        completion_emoji = "🎉" if stats.failed_chapters == 0 else "⚠️"
+        status_message = f"""{completion_emoji} **Processing Complete!**
+**📊 Statistics:**
+- **Selected Chapters:** {len(selected_chapters)} of {len(chapters)} total chapters
+- **✅ Completed:** {stats.completed_chapters}
+- **❌ Failed:** {stats.failed_chapters}
+- **⏱️ Total Processing Time:** {stats.total_processing_time:.1f} seconds
+- **📈 Average Time per Chapter:** {stats.average_processing_time:.1f} seconds
+- **📝 Total Words:** {stats.total_word_count:,}
+- **🎵 Estimated Duration:** {stats.total_audio_duration:.1f} minutes
+**📋 Processing Log:**
+{chr(10).join(progress_messages[-10:])}  # Show last 10 messages
+**🎯 Next Steps:**
+- Listen to individual chapter previews below
+- Download the complete audiobook when ready
+- Check processing log for any issues
+"""
+        # Export audiobook
+        output_dir = f"output/{book_title.replace(' ', '_')}"
+        exported_files = processing_pipeline.export_audiobook(results, f"{output_dir}/audiobook.wav")
+        # Create download links - will be updated after file copying
+        download_info = f"""**📥 Download Files:**
+- **Individual Chapters:** {len([k for k in exported_files.keys() if k.startswith('chapter')])} files available below
+"""
+        # Create chapter preview information
+        chapter_audio_file = None
+        chapter_info_text = ""
+        if results and len(results) > 0:
+            # Get the first chapter's audio file for preview and copy it for serving
+            chapter_key = "chapter_0"
+            if chapter_key in exported_files:
+                original_file = exported_files[chapter_key]
+                chapter_audio_file = copy_file_for_serving(original_file)
+            # Also copy the complete audiobook for serving
+            complete_audiobook_original = exported_files.get('complete')
+            complete_audiobook_served = None
+            if complete_audiobook_original:
+                complete_audiobook_served = copy_file_for_serving(complete_audiobook_original)
+            # Create chapter information text with download links
+            chapter_info_list = []
+            for i, chapter in enumerate(selected_chapters):
+                chapter_key = f"chapter_{i}"
+                if chapter_key in exported_files:
+                    chapter_file = exported_files[chapter_key]
+                    chapter_info_list.append(f"**📖 {chapter.title}** - {chapter.estimated_duration:.1f} min ({chapter.word_count:,} words)")
+            if chapter_info_list:
+                chapter_info_text = f"""**🎵 Chapter Previews:**
+{chr(10).join(chapter_info_list)}
+**📥 Individual Chapter Downloads:**
+"""
+                # Add download links for each chapter
+                for i, chapter in enumerate(selected_chapters):
+                    chapter_key = f"chapter_{i}"
+                    if chapter_key in exported_files:
+                        chapter_file = exported_files[chapter_key]
+                        safe_title = chapter.title.replace(' ', '_').replace(':', '').replace(',', '').replace('.', '')
+                        chapter_info_text += f"- [{chapter.title}]({chapter_file})\n"
+        # Update download info with the copied complete audiobook
+        if complete_audiobook_served:
+            download_info = f"""**📥 Download Files:**
+<div style="margin: 10px 0;">
+    <a href="{complete_audiobook_served}" download="{book_title.replace(' ', '_')}_complete_audiobook.wav"
+       style="display: inline-block; padding: 12px 24px; background: #28a745; color: white; text-decoration: none; border-radius: 6px; font-size: 16px; font-weight: bold;"
+       role="button" aria-label="Download complete audiobook">
+        📥 Download Complete Audiobook
+    </a>
+</div>
+- **Individual Chapters:** {len([k for k in exported_files.keys() if k.startswith('chapter')])} files available below
+"""
+        return status_message, download_info, complete_audiobook_served, chapter_audio_file, chapter_info_text
+    except Exception as e:
+        error_message = f"❌ **Processing Error:** {str(e)}"
+        return error_message, None, None, None, ""
+def synthesize_ui(text, ref_audio, ref_text, speed, nfe_steps, crossfade_ms, seed):
+    if not text or text.strip() == "":
+        return None, "Введите текст."
+    # Prepare paragraphs
+    text_norm = normalize_text(text)
+    paras = split_into_paragraphs(text_norm)
+    # Optional auto-accents for RU
+    paras = [maybe_ruaccent(p) for p in paras]
+    # Use local sample as default reference if no audio provided
+    ref_path = "samples/001/sample.mp3"
+    if ref_audio is not None:
+        ref_path = "ref_tmp.wav"
+        # Gradio gives (sr, np.ndarray) or file path depending on component
+        if isinstance(ref_audio, tuple) and isinstance(ref_audio[1], np.ndarray):
+            sf.write(ref_path, ref_audio[1], ref_audio[0])
+        elif isinstance(ref_audio, str):
+            ref_path = ref_audio
+    # Generate per paragraph
+    pieces = []
+    sr = None
+    rng_seed = int(seed) if seed is not None else None
+    for i, para in enumerate(paras):
+        audio, sr = backend.synthesize(
+            text=para,
+            ref_audio_path=ref_path,
+            ref_text=ref_text or "",
+            speed=float(speed),
+            nfe_steps=int(nfe_steps),
+            seed=rng_seed,
+        )
+        pieces.append(audio)
+    # Crossfade and normalize
+    if len(pieces) == 1:
+        final = pieces[0]
+    else:
+        final = crossfade_concat(pieces, crossfade_ms=int(crossfade_ms), sample_rate=sr)
+    # Target loudness (for personal listening; tweak as you like)
+    final = normalize_lufs(final, sr, target_lufs=-20.0)
+    out_path = "out_preview.wav"
+    save_wav(out_path, final, sr)
+    return (sr, final), f"Готово: {len(pieces)} фрагм., длительность ~{len(final)/sr:.1f}с. Сохранено: {out_path}"
+with gr.Blocks(title="RU Audiobook Studio") as demo:
+    gr.Markdown("# RU Audiobook Studio — ESpeech TTS\nГенерируйте главы аудиокниг с готовыми голосами (zero/one‑shot).")
+    # EPUB Upload Section
+    with gr.Tab("📚 EPUB Book Processing"):
+        gr.Markdown("## Upload EPUB Book\nUpload an EPUB file to create a complete audiobook with automatic chapter detection.")
+        # Main content area with responsive layout
+        with gr.Row(equal_height=True):
+            # Left column - Upload and Settings (responsive)
+            with gr.Column(scale=1, min_width=350):
+                # File Upload Section
+                with gr.Group():
+                    gr.Markdown("### 📁 File Upload")
+                    epub_upload = gr.File(
+                        label="Upload EPUB File",
+                        file_types=[".epub"],
+                        file_count="single",
+                        height=100
+                    )
+                    epub_validate_btn = gr.Button("🔍 Validate EPUB", variant="secondary", size="lg", elem_id="validate-epub-btn")
+                # Processing Settings Section
+                with gr.Group():
+                    gr.Markdown("### ⚙️ Processing Settings")
+                    with gr.Accordion("Advanced Settings", open=False):
+                        epub_speed = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Speech Speed", info="Adjust the speed of speech generation (0.6 = slower, 1.4 = faster)")
+                        epub_nfe_steps = gr.Slider(12, 96, value=48, step=1, label="Quality Steps (NFE)", info="Higher values produce better quality but take longer to process")
+                        epub_crossfade_ms = gr.Slider(0, 400, value=150, step=10, label="Crossfade (ms)", info="Smooth transition between audio segments")
+                        epub_seed = gr.Number(value=0, label="Seed (0 = auto)", info="Random seed for reproducible results (0 = random)")
+                        # Voice and tone parameters
+                        gr.Markdown("### 🎵 Voice & Tone Settings")
+                        epub_target_rms = gr.Slider(0.05, 0.3, value=0.1, step=0.01, label="Volume Level (RMS)", info="Target volume level for generated audio (0.05 = quiet, 0.3 = loud)")
+                        epub_cfg_strength = gr.Slider(1.0, 4.0, value=2.0, step=0.1, label="Voice Clarity", info="Controls how closely the voice follows the reference (1.0 = more variation, 4.0 = more consistent)")
+                        epub_sway_coef = gr.Slider(-2.0, 1.0, value=-1.0, step=0.1, label="Voice Variation", info="Controls natural voice variation (-2.0 = more monotone, 1.0 = more expressive)")
+            # Right column - Chapter Selection and Processing (responsive)
+            with gr.Column(scale=2, min_width=500):
+                # Chapter Selection Section
+                with gr.Group():
+                    gr.Markdown("### 📖 Chapter Selection")
+                    epub_chapter_selection = gr.CheckboxGroup(
+                        label="Select Chapters to Process",
+                        choices=[],
+                        value=[],
+                        visible=False,
+                        info="All chapters are selected by default. Uncheck chapters you don't want to process.",
+                        elem_id="chapter-selection"
+                    )
+                    # Selection control buttons
+                    with gr.Row(visible=False) as epub_selection_buttons:
+                        epub_select_all_btn = gr.Button("✅ Select All", variant="secondary", size="sm")
+                        epub_deselect_all_btn = gr.Button("❌ Deselect All", variant="secondary", size="sm")
+                    epub_selection_summary = gr.Markdown("", visible=False)
+                # Processing Section
+                with gr.Group():
+                    gr.Markdown("### 🚀 Processing")
+                    with gr.Row():
+                        epub_process_btn = gr.Button("🚀 Process Selected Chapters", variant="primary", visible=False, elem_id="process-btn")
+                        epub_stop_btn = gr.Button("🛑 Stop Processing", variant="stop", visible=False, elem_id="stop-btn")
+            # Right column - Status and Results (responsive)
+            with gr.Column(scale=3, min_width=500):
+                # Status Section
+                with gr.Group():
+                    gr.Markdown("### 📊 Status & Results")
+                    epub_status = gr.Markdown("📁 **Upload Status:** Ready to upload EPUB file")
+                    epub_progress = gr.Progress()
+                    epub_validation_status = gr.Markdown("⏳ **Validation:** Waiting for file upload")
+                    epub_processing_status = gr.Markdown("⏳ **Processing:** Not started")
+                    epub_details = gr.Markdown("")
+                # Results Section
+                with gr.Group():
+                    gr.Markdown("### 🎵 Results")
+                    epub_download_audio = gr.Audio(label="Download Complete Audiobook", visible=False)
+                    epub_chapter_audio = gr.Audio(label="Chapter Audio Preview", visible=False)
+                    epub_chapter_info = gr.Markdown("", visible=False)
+                # State management (hidden)
+                epub_chapters = gr.State()  # Store chapter data
+                epub_book_title = gr.State()  # Store book title
+                epub_book_author = gr.State()  # Store book author
+                epub_chapter_choices = gr.State()  # Store chapter choices for selection
+                epub_selected_choices = gr.State()  # Store selected chapter choices
+    # Manual Text Processing Section
+    with gr.Tab("✏️ Manual Text Processing"):
+        gr.Markdown("## Manual Text Input\nEnter text manually for quick processing and testing.")
+        with gr.Row():
+            with gr.Column(scale=3):
+                text = gr.Textbox(lines=12, label="Текст (глава/абзацы)")
+                ref_text = gr.Textbox(lines=3, label="Reference text (текст из референса)", value=load_default_ref_text())
+                ref_audio = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Reference audio (6–12 с, опционально - используется samples/001/sample.mp3 по умолчанию)")
+                with gr.Row():
+                    speed = gr.Slider(0.6, 1.4, value=1.0, step=0.05, label="Speed")
+                    nfe_steps = gr.Slider(12, 96, value=48, step=1, label="NFE steps")
+                crossfade_ms = gr.Slider(0, 400, value=150, step=10, label="Crossfade (ms) между абзацами")
+                seed = gr.Number(value=0, label="Seed (0 = авто)")
+                btn = gr.Button("Synthesize", variant="primary")
+            with gr.Column(scale=2):
+                audio_out = gr.Audio(label="Предпрослушка", autoplay=False)
+                status = gr.Markdown()
+    # Event handlers for manual text processing
+    btn.click(
+        synthesize_ui,
+        inputs=[text, ref_audio, ref_text, speed, nfe_steps, crossfade_ms, seed],
+        outputs=[audio_out, status]
+    )
+    # Event handlers for EPUB processing
+    epub_validate_btn.click(
+        validate_epub_upload,
+        inputs=[epub_upload],
+        outputs=[epub_status, epub_validation_status, epub_processing_status, epub_details, epub_chapters, epub_book_title, epub_book_author, epub_chapter_choices, epub_selected_choices]
+    ).then(
+        update_chapter_selection_interface,
+        inputs=[epub_chapters, epub_chapter_choices, epub_selected_choices],
+        outputs=[epub_process_btn, epub_stop_btn, epub_chapter_selection, epub_selection_buttons, epub_selection_summary]
+    )
+    # EPUB processing event handler
+    epub_process_btn.click(
+        lambda: (gr.update(visible=False), gr.update(visible=True)),
+        outputs=[epub_process_btn, epub_stop_btn]
+    ).then(
+        process_epub_book,
+        inputs=[epub_chapters, epub_chapter_selection, epub_book_title, epub_book_author, epub_speed, epub_nfe_steps, epub_crossfade_ms, epub_seed, epub_target_rms, epub_cfg_strength, epub_sway_coef],
+        outputs=[epub_processing_status, epub_details, epub_download_audio, epub_chapter_audio, epub_chapter_info]
+    ).then(
+        lambda: (gr.update(visible=True), gr.update(visible=False)),
+        outputs=[epub_process_btn, epub_stop_btn]
+    ).then(
+        lambda audio_file, info_text, download_audio: (
+            gr.update(visible=bool(audio_file)),
+            gr.update(visible=bool(info_text.strip())),
+            gr.update(visible=bool(download_audio), value=download_audio)
+        ),
+        inputs=[epub_chapter_audio, epub_chapter_info, epub_download_audio],
+        outputs=[epub_chapter_audio, epub_chapter_info, epub_download_audio]
+    )
+    # Chapter selection change handler
+    epub_chapter_selection.change(
+        save_chapter_selection_state,
+        inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
+        outputs=[epub_selected_choices]
+    ).then(
+        lambda selected_choices, chapters: gr.update(
+            value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
+        ),
+        inputs=[epub_selected_choices, epub_chapters],
+        outputs=[epub_selection_summary]
+    )
+    # Select All button handler
+    epub_select_all_btn.click(
+        select_all_chapters,
+        inputs=[epub_chapter_choices],
+        outputs=[epub_chapter_selection]
+    ).then(
+        save_chapter_selection_state,
+        inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
+        outputs=[epub_selected_choices]
+    ).then(
+        lambda selected_choices, chapters: gr.update(
+            value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
+        ),
+        inputs=[epub_selected_choices, epub_chapters],
+        outputs=[epub_selection_summary]
+    )
+    # Deselect All button handler
+    epub_deselect_all_btn.click(
+        deselect_all_chapters,
+        outputs=[epub_chapter_selection]
+    ).then(
+        save_chapter_selection_state,
+        inputs=[epub_chapter_selection, epub_book_title, epub_book_author],
+        outputs=[epub_selected_choices]
+    ).then(
+        lambda selected_choices, chapters: gr.update(
+            value=f"📊 **Selection:** {get_selection_summary(selected_choices, len(chapters) if chapters else 0)}"
+        ),
+        inputs=[epub_selected_choices, epub_chapters],
+        outputs=[epub_selection_summary]
+    )
+    # EPUB stop processing event handler
+    epub_stop_btn.click(
+        stop_epub_processing,
+        outputs=[epub_processing_status]
+    ).then(
+        lambda: (gr.update(visible=True), gr.update(visible=False)),
+        outputs=[epub_process_btn, epub_stop_btn]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

audiobook_generator.py ADDED Viewed

	@@ -0,0 +1,499 @@

+#!/usr/bin/env python3
+"""
+Audiobook package generation module for Russian Audiobook Studio.
+Creates high-quality audiobook packages with metadata, playlists, and proper file structure.
+"""
+import os
+import json
+import zipfile
+import re
+from pathlib import Path
+from typing import List, Dict, Any, Optional, Tuple
+from dataclasses import dataclass
+import numpy as np
+import soundfile as sf
+from datetime import datetime
+# Import mutagen for metadata handling
+try:
+    from mutagen import File as MutagenFile
+    from mutagen.id3 import ID3, TIT2, TPE1, TALB, TCON, TDRC, TRCK
+    from mutagen.mp3 import MP3
+    MUTAGEN_AVAILABLE = True
+except ImportError:
+    MUTAGEN_AVAILABLE = False
+from epub_processing_pipeline import ChapterProcessingResult
+@dataclass
+class AudiobookMetadata:
+    """Metadata for an audiobook package."""
+    title: str
+    author: str
+    total_chapters: int
+    total_duration: float
+    total_word_count: int
+    chapters: List[Dict[str, Any]]
+    creation_date: str
+    version: str = "1.0"
+class AudiobookGenerator:
+    """Generator for creating audiobook packages with metadata and proper structure."""
+    def __init__(self):
+        """Initialize the audiobook generator."""
+        self.target_sample_rate = 24000  # 24kHz
+        self.target_bit_depth = 16  # 16-bit
+        self.target_channels = 1  # Mono
+        self.version = "1.0"
+    def sanitize_filename(self, filename: str, max_length: int = 100) -> str:
+        """
+        Sanitize filename for cross-platform compatibility.
+        Args:
+            filename: Original filename
+            max_length: Maximum length for the filename
+        Returns:
+            Sanitized filename
+        """
+        # Remove or replace problematic characters
+        sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
+        sanitized = re.sub(r'[^\w\s\-_\.]', '_', sanitized)
+        sanitized = re.sub(r'\s+', '_', sanitized)  # Replace spaces with underscores
+        sanitized = re.sub(r'_+', '_', sanitized)  # Replace multiple underscores with single
+        # Remove leading/trailing underscores and dots
+        sanitized = sanitized.strip('_.')
+        # Truncate if too long
+        if len(sanitized) > max_length:
+            sanitized = sanitized[:max_length].rstrip('_.')
+        return sanitized
+    def save_high_quality_audio(
+        self,
+        audio_data: np.ndarray,
+        sample_rate: int,
+        output_path: str
+    ) -> None:
+        """
+        Save audio data as high-quality WAV file (24kHz, 16-bit, mono).
+        Args:
+            audio_data: Audio data as numpy array
+            sample_rate: Original sample rate
+            output_path: Path to save the audio file
+        Raises:
+            ValueError: If audio data or sample rate is invalid
+        """
+        if audio_data is None:
+            raise ValueError("Invalid audio data")
+        if sample_rate <= 0:
+            raise ValueError("Invalid sample rate")
+        # Ensure mono audio
+        if len(audio_data.shape) > 1:
+            audio_data = np.mean(audio_data, axis=1)
+        # Resample to target sample rate if needed
+        if sample_rate != self.target_sample_rate:
+            audio_data = self._resample_audio(audio_data, sample_rate, self.target_sample_rate)
+        # Ensure 16-bit depth
+        if audio_data.dtype != np.int16:
+            # Convert to 16-bit
+            audio_data = (audio_data * 32767).astype(np.int16)
+        # Create output directory if it doesn't exist
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        # Save as WAV file
+        sf.write(output_path, audio_data, self.target_sample_rate, subtype='PCM_16')
+    def _resample_audio(self, audio_data: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
+        """
+        Resample audio data to target sample rate.
+        Args:
+            audio_data: Original audio data
+            orig_sr: Original sample rate
+            target_sr: Target sample rate
+        Returns:
+            Resampled audio data
+        """
+        if orig_sr == target_sr:
+            return audio_data
+        # Simple linear interpolation resampling
+        # For production use, consider using librosa or scipy.signal.resample
+        ratio = target_sr / orig_sr
+        new_length = int(len(audio_data) * ratio)
+        # Create new time indices
+        old_indices = np.linspace(0, len(audio_data) - 1, len(audio_data))
+        new_indices = np.linspace(0, len(audio_data) - 1, new_length)
+        # Interpolate
+        resampled = np.interp(new_indices, old_indices, audio_data)
+        return resampled.astype(audio_data.dtype)
+    def create_metadata(
+        self,
+        title: str,
+        author: str,
+        chapters: List[Any],
+        total_duration: float
+    ) -> Dict[str, Any]:
+        """
+        Create comprehensive metadata for the audiobook.
+        Args:
+            title: Audiobook title
+            author: Audiobook author
+            chapters: List of chapter objects
+            total_duration: Total duration in minutes
+        Returns:
+            Dictionary containing metadata
+        """
+        total_word_count = sum(getattr(chapter, 'word_count', 0) for chapter in chapters)
+        metadata = {
+            'title': title,
+            'artist': author,
+            'album': title,
+            'genre': 'Audiobook',
+            'date': datetime.now().strftime('%Y-%m-%d'),
+            'total_chapters': len(chapters),
+            'total_duration': total_duration,
+            'total_word_count': total_word_count,
+            'chapters': []
+        }
+        # Add chapter metadata
+        for i, chapter in enumerate(chapters):
+            chapter_meta = {
+                'title': getattr(chapter, 'title', f'Chapter {i+1}'),
+                'order': i + 1,
+                'word_count': getattr(chapter, 'word_count', 0),
+                'estimated_duration': getattr(chapter, 'estimated_duration', 0.0),
+                'file_name': f"chapter_{i+1:03d}_{self.sanitize_filename(getattr(chapter, 'title', f'Chapter_{i+1}'))}.wav"
+            }
+            metadata['chapters'].append(chapter_meta)
+        return metadata
+    def generate_m3u_playlist(
+        self,
+        processing_results: List[ChapterProcessingResult],
+        output_path: str
+    ) -> None:
+        """
+        Generate M3U playlist file for chapter navigation.
+        Args:
+            processing_results: List of chapter processing results
+            output_path: Path to save the playlist file
+        """
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write("#EXTM3U\n")
+            f.write(f"#EXTINF:-1,Complete Audiobook\n")
+            f.write("audio/complete_audiobook.wav\n\n")
+            for result in processing_results:
+                if result.status == "completed":
+                    duration_seconds = int(result.estimated_duration * 60)
+                    sanitized_title = self.sanitize_filename(result.chapter_title)
+                    filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
+                    f.write(f"#EXTINF:{duration_seconds},{result.chapter_title}\n")
+                    f.write(f"audio/{filename}\n")
+    def create_metadata_json(
+        self,
+        processing_results: List[ChapterProcessingResult],
+        title: str,
+        author: str,
+        output_path: str
+    ) -> None:
+        """
+        Create JSON metadata file.
+        Args:
+            processing_results: List of chapter processing results
+            title: Audiobook title
+            author: Audiobook author
+            output_path: Path to save the metadata file
+        """
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        # Calculate totals
+        total_duration = sum(result.estimated_duration for result in processing_results)
+        total_word_count = sum(result.word_count for result in processing_results)
+        # Create metadata
+        metadata = {
+            'title': title,
+            'author': author,
+            'total_chapters': len(processing_results),
+            'total_duration': total_duration,
+            'total_word_count': total_word_count,
+            'creation_date': datetime.now().isoformat(),
+            'version': self.version,
+            'chapters': []
+        }
+        # Add chapter information
+        for result in processing_results:
+            if result.status == "completed":
+                sanitized_title = self.sanitize_filename(result.chapter_title)
+                filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
+                chapter_info = {
+                    'title': result.chapter_title,
+                    'order': result.chapter_index + 1,
+                    'word_count': result.word_count,
+                    'estimated_duration': result.estimated_duration,
+                    'file_name': filename,
+                    'processing_time': result.processing_time,
+                    'sample_rate': result.sample_rate
+                }
+                metadata['chapters'].append(chapter_info)
+        # Save metadata
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(metadata, f, indent=2, ensure_ascii=False)
+    def generate_individual_chapters(
+        self,
+        processing_results: List[ChapterProcessingResult],
+        output_dir: str
+    ) -> List[str]:
+        """
+        Generate individual chapter audio files.
+        Args:
+            processing_results: List of chapter processing results
+            output_dir: Directory to save chapter files
+        Returns:
+            List of paths to generated chapter files
+        """
+        if not processing_results:
+            raise ValueError("No chapters to process")
+        os.makedirs(output_dir, exist_ok=True)
+        chapter_files = []
+        for result in processing_results:
+            if result.status == "completed" and result.audio_data is not None:
+                sanitized_title = self.sanitize_filename(result.chapter_title)
+                filename = f"chapter_{result.chapter_index+1:03d}_{sanitized_title}.wav"
+                file_path = os.path.join(output_dir, filename)
+                self.save_high_quality_audio(
+                    result.audio_data,
+                    result.sample_rate,
+                    file_path
+                )
+                chapter_files.append(file_path)
+        return chapter_files
+    def generate_complete_audiobook(
+        self,
+        processing_results: List[ChapterProcessingResult],
+        output_path: str
+    ) -> str:
+        """
+        Generate complete audiobook by concatenating all chapters.
+        Args:
+            processing_results: List of chapter processing results
+            output_path: Path to save the complete audiobook
+        Returns:
+            Path to the generated complete audiobook file
+        """
+        if not processing_results:
+            raise ValueError("No chapters to process")
+        # Collect valid audio data
+        valid_audio = []
+        sample_rate = None
+        for result in processing_results:
+            if result.status == "completed" and result.audio_data is not None:
+                valid_audio.append(result.audio_data)
+                if sample_rate is None:
+                    sample_rate = result.sample_rate
+        if not valid_audio:
+            raise ValueError("No valid audio data found")
+        # Concatenate audio
+        from utils.audio import crossfade_concat
+        # Use smaller crossfade for short audio segments
+        crossfade_ms = min(150, int(min(len(audio) for audio in valid_audio) / sample_rate * 1000 * 0.5))
+        complete_audio = crossfade_concat(valid_audio, crossfade_ms=crossfade_ms, sample_rate=sample_rate)
+        # Save complete audiobook
+        self.save_high_quality_audio(complete_audio, sample_rate, output_path)
+        return output_path
+    def generate_audiobook_package(
+        self,
+        processing_results: List[ChapterProcessingResult],
+        title: str,
+        author: str,
+        output_dir: str
+    ) -> Dict[str, str]:
+        """
+        Generate complete audiobook package with all files and metadata.
+        Args:
+            processing_results: List of chapter processing results
+            title: Audiobook title
+            author: Audiobook author
+            output_dir: Directory to create the package
+        Returns:
+            Dictionary with paths to generated files
+        """
+        if processing_results is None:
+            raise ValueError("Processing results cannot be None")
+        if not processing_results:
+            raise ValueError("No chapters to process")
+        # Create directory structure
+        sanitized_title = self.sanitize_filename(title)
+        package_dir = os.path.join(output_dir, sanitized_title)
+        audio_dir = os.path.join(package_dir, "audio")
+        metadata_dir = os.path.join(package_dir, "metadata")
+        os.makedirs(audio_dir, exist_ok=True)
+        os.makedirs(metadata_dir, exist_ok=True)
+        generated_files = {}
+        # Generate individual chapter files
+        chapter_files = self.generate_individual_chapters(processing_results, audio_dir)
+        generated_files['chapters'] = chapter_files
+        # Generate complete audiobook
+        complete_path = os.path.join(audio_dir, "complete_audiobook.wav")
+        self.generate_complete_audiobook(processing_results, complete_path)
+        generated_files['complete'] = complete_path
+        # Generate metadata files
+        metadata_json_path = os.path.join(metadata_dir, "metadata.json")
+        self.create_metadata_json(processing_results, title, author, metadata_json_path)
+        generated_files['metadata_json'] = metadata_json_path
+        # Generate M3U playlist
+        playlist_path = os.path.join(metadata_dir, "playlist.m3u")
+        self.generate_m3u_playlist(processing_results, playlist_path)
+        generated_files['playlist'] = playlist_path
+        return generated_files
+    def create_complete_package_zip(
+        self,
+        processing_results: List[ChapterProcessingResult],
+        title: str,
+        author: str,
+        output_dir: str
+    ) -> str:
+        """
+        Create a complete ZIP package of the audiobook.
+        Args:
+            processing_results: List of chapter processing results
+            title: Audiobook title
+            author: Audiobook author
+            output_dir: Directory to save the ZIP file
+        Returns:
+            Path to the created ZIP file
+        """
+        # Generate the audiobook package
+        package_files = self.generate_audiobook_package(processing_results, title, author, output_dir)
+        # Create ZIP file
+        sanitized_title = self.sanitize_filename(title)
+        zip_filename = f"{sanitized_title}_audiobook.zip"
+        zip_path = os.path.join(output_dir, zip_filename)
+        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+            # Add all files from the package directory
+            package_dir = os.path.join(output_dir, sanitized_title)
+            for root, dirs, files in os.walk(package_dir):
+                for file in files:
+                    file_path = os.path.join(root, file)
+                    arcname = os.path.relpath(file_path, output_dir)
+                    zip_file.write(file_path, arcname)
+        return zip_path
+    def estimate_package_size(self, processing_results: List[ChapterProcessingResult]) -> int:
+        """
+        Estimate the size of the audiobook package in bytes.
+        Args:
+            processing_results: List of chapter processing results
+        Returns:
+            Estimated package size in bytes
+        """
+        if not processing_results:
+            return 0
+        total_audio_size = 0
+        for result in processing_results:
+            if result.status == "completed" and result.audio_data is not None:
+                # Estimate WAV file size: sample_rate * duration * 2 bytes (16-bit) * channels
+                duration_seconds = result.estimated_duration * 60
+                audio_size = int(self.target_sample_rate * duration_seconds * 2 * self.target_channels)
+                total_audio_size += audio_size
+        # Add overhead for metadata, playlist, and ZIP compression
+        overhead = total_audio_size * 0.1  # 10% overhead
+        return int(total_audio_size + overhead)
+    def add_audio_metadata(self, audio_path: str, metadata: Dict[str, Any]) -> None:
+        """
+        Add metadata to audio files using mutagen (if available).
+        Args:
+            audio_path: Path to the audio file
+            metadata: Metadata dictionary
+        """
+        if not MUTAGEN_AVAILABLE:
+            return  # Skip if mutagen is not available
+        try:
+            # For WAV files, we can't add ID3 tags directly
+            # This is a placeholder for future MP3 support
+            pass
+        except Exception as e:
+            # Silently fail if metadata addition fails
+            pass
+    def cleanup(self):
+        """Clean up any temporary resources."""
+        pass

backends/espeech_backend.py ADDED Viewed

	@@ -0,0 +1,135 @@

+# backends/espeech_backend.py
+# Полная интеграция ESpeech/ESpeech-TTS-1_RL-V2 (F5-TTS) для инференса.
+# Основано на коде из model card: загрузка весов, препроцессинг референса,
+# вызов infer_process и возврат (wave, sample_rate).
+from __future__ import annotations
+from typing import Tuple, Optional
+import os
+import gc
+import numpy as np
+import torch
+import torchaudio
+# Force CPU usage on macOS to avoid MPS issues
+if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
+    os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
+    # Disable MPS to force CPU usage
+    torch.backends.mps.is_available = lambda: False
+    torch.backends.mps.is_built = lambda: False
+from huggingface_hub import hf_hub_download, snapshot_download
+# F5-TTS imports (как в карточке модели)
+from f5_tts.infer.utils_infer import (
+    infer_process,
+    load_model,
+    load_vocoder,
+    preprocess_ref_audio_text,
+)
+from f5_tts.model import DiT
+# Конфиг модели из карточки
+MODEL_CFG = dict(dim=1024, depth=22, heads=16, ff_mult=2, text_dim=512, conv_layers=4)
+class EspeechBackend:
+    def __init__(self, model_id: str = "ESpeech/ESpeech-TTS-1_RL-V2"):
+        self.model_id = model_id
+        self.model_file = "espeech_tts_rlv2.pt"
+        self.vocab_file = "vocab.txt"
+        # Force CPU on macOS to avoid MPS issues
+        if torch.cuda.is_available():
+            self.device = torch.device("cuda")
+        else:
+            self.device = torch.device("cpu")
+        self.model = None
+        self.vocoder = None
+        self._ensure_loaded()
+    def _download(self, repo: str, filename: str) -> str:
+        try:
+            return hf_hub_download(repo_id=repo, filename=filename)
+        except Exception:
+            # запасной путь: snapshot целиком
+            local_dir = f"cache_{repo.replace('/', '_')}"
+            snap_dir = snapshot_download(repo_id=repo, local_dir=local_dir)
+            path = os.path.join(snap_dir, filename)
+            if not os.path.exists(path):
+                raise FileNotFoundError(f"{filename} not found in snapshot {snap_dir}")
+            return path
+    def _ensure_loaded(self):
+        # загрузка весов модели и словаря
+        model_path = self._download(self.model_id, self.model_file)
+        vocab_path = self._download(self.model_id, self.vocab_file)
+        # инициализация модели и вокодера
+        self.model = load_model(DiT, MODEL_CFG, model_path, vocab_file=vocab_path)
+        self.vocoder = load_vocoder()
+        # перенос на устройство
+        try:
+            self.model.to(self.device)
+            self.vocoder.to(self.device)
+        except Exception as e:
+            # Fallback to CPU if device transfer fails
+            print(f"Warning: Failed to move model to {self.device}, falling back to CPU: {e}")
+            self.device = torch.device("cpu")
+            self.model.to(self.device)
+            self.vocoder.to(self.device)
+    def synthesize(
+        self,
+        text: str,
+        ref_audio_path: Optional[str],
+        ref_text: str,
+        speed: float = 1.0,
+        nfe_steps: int = 48,
+        seed: Optional[int] = None,
+        cross_fade_sec: float = 0.15,
+        target_rms: float = 0.1,
+        cfg_strength: float = 2.0,
+        sway_sampling_coef: float = -1.0,
+    ) -> Tuple[np.ndarray, int]:
+        """
+        Возвращает (audio_float32_mono, sample_rate).
+        Требования: float32 [-1..1], моно.
+        """
+        if not text or not text.strip():
+            raise ValueError("Пустой текст для синтеза.")
+        if not ref_audio_path or not os.path.exists(ref_audio_path):
+            raise FileNotFoundError("Укажите путь к reference audio (6–12 с).")
+        if not ref_text or not ref_text.strip():
+            raise ValueError("Укажите reference text (транскрипт того же reference audio).")
+        if seed is not None:
+            torch.manual_seed(int(seed))
+        # Подготовка референса (функция сама сделает ресэмплинг/моно)
+        ref_audio_proc, ref_text_proc = preprocess_ref_audio_text(ref_audio_path, ref_text)
+        # Основной вызов инференса с дополнительными параметрами для улучшения качества голоса
+        final_wave, final_sample_rate, _ = infer_process(
+            ref_audio_proc,
+            ref_text_proc,
+            text,
+            self.model,
+            self.vocoder,
+            cross_fade_duration=float(cross_fade_sec),
+            nfe_step=int(nfe_steps),
+            speed=float(speed),
+            target_rms=float(target_rms),
+            cfg_strength=float(cfg_strength),
+            sway_sampling_coef=float(sway_sampling_coef),
+        )
+        # На всякий случай приводим тип/диапазон
+        wav = np.asarray(final_wave, dtype=np.float32)
+        wav = np.clip(wav, -1.0, 1.0)
+        sr = int(final_sample_rate)
+        # Освобождение памяти на CUDA (длинные книги)
+        if self.device.type == "cuda":
+            try:
+                torch.cuda.empty_cache()
+                gc.collect()
+            except Exception:
+                pass
+        return wav, sr

epub_processing_pipeline.py ADDED Viewed

	@@ -0,0 +1,425 @@

+#!/usr/bin/env python3
+"""
+EPUB processing pipeline for Russian Audiobook Studio.
+Integrates EPUB chapter detection with ESpeechBackend for TTS processing.
+"""
+import os
+import time
+import gc
+from typing import List, Optional, Callable, Dict, Any, Tuple
+from dataclasses import dataclass
+from pathlib import Path
+import numpy as np
+import soundfile as sf
+from epub_processor import EpubProcessor, Chapter
+from backends.espeech_backend import EspeechBackend
+from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent
+from utils.audio import crossfade_concat, normalize_lufs, save_wav
+@dataclass
+class ProcessingSettings:
+    """Settings for EPUB processing pipeline."""
+    speed: float = 1.0
+    nfe_steps: int = 48
+    crossfade_ms: int = 150
+    target_lufs: float = -20.0
+    seed: Optional[int] = None
+    ref_audio_path: str = "samples/001/sample.mp3"
+    ref_text: str = ""
+    # Voice and tone parameters
+    target_rms: float = 0.1
+    cfg_strength: float = 2.0
+    sway_sampling_coef: float = -1.0
+    def __post_init__(self):
+        """Load default reference text if not provided."""
+        if not self.ref_text:
+            try:
+                with open("samples/001/sample.text", "r", encoding="utf-8") as f:
+                    self.ref_text = f.read().strip()
+            except FileNotFoundError:
+                self.ref_text = ""
+@dataclass
+class ChapterProcessingResult:
+    """Result of processing a single chapter."""
+    chapter_index: int
+    chapter_title: str
+    status: str  # pending, processing, completed, error
+    audio_data: Optional[np.ndarray] = None
+    sample_rate: Optional[int] = None
+    processing_time: float = 0.0
+    error_message: Optional[str] = None
+    word_count: int = 0
+    estimated_duration: float = 0.0
+@dataclass
+class ProcessingStatistics:
+    """Statistics for book processing."""
+    total_chapters: int = 0
+    completed_chapters: int = 0
+    failed_chapters: int = 0
+    total_processing_time: float = 0.0
+    average_processing_time: float = 0.0
+    total_audio_duration: float = 0.0
+    total_word_count: int = 0
+class EpubProcessingPipeline:
+    """Main pipeline for processing EPUB books into audiobooks."""
+    def __init__(self, epub_processor: EpubProcessor, tts_backend: EspeechBackend):
+        """
+        Initialize the processing pipeline.
+        Args:
+            epub_processor: EPUB processor for chapter extraction
+            tts_backend: TTS backend for audio synthesis
+        """
+        self.epub_processor = epub_processor
+        self.tts_backend = tts_backend
+        self.processing_settings = ProcessingSettings()
+        # Ensure backend is loaded
+        self.tts_backend._ensure_loaded()
+    def process_chapter(
+        self,
+        chapter: Chapter,
+        settings: ProcessingSettings,
+        progress_callback: Optional[Callable[[int, str, str], None]] = None,
+        cancellation_check: Optional[Callable[[], bool]] = None
+    ) -> ChapterProcessingResult:
+        """
+        Process a single chapter into audio.
+        Args:
+            chapter: Chapter to process
+            settings: Processing settings
+            progress_callback: Optional callback for progress updates
+        Returns:
+            ChapterProcessingResult with processing status and audio data
+        """
+        start_time = time.time()
+        if progress_callback:
+            progress_callback(chapter.order, "processing", f"Processing {chapter.title}")
+        # Check for cancellation before starting
+        if cancellation_check and cancellation_check():
+            return ChapterProcessingResult(
+                chapter_index=chapter.order,
+                chapter_title=chapter.title,
+                status="cancelled",
+                processing_time=0.0,
+                error_message="Processing cancelled by user",
+                word_count=chapter.word_count,
+                estimated_duration=chapter.estimated_duration
+            )
+        try:
+            # Normalize and prepare text
+            normalized_text = normalize_text(chapter.content)
+            paragraphs = split_into_paragraphs(normalized_text)
+            paragraphs = [maybe_ruaccent(p) for p in paragraphs]
+            if not paragraphs:
+                raise ValueError("No text content to process")
+            # Process each paragraph
+            audio_pieces = []
+            sample_rate = None
+            for i, paragraph in enumerate(paragraphs):
+                if not paragraph.strip():
+                    continue
+                # Check for cancellation before each paragraph
+                if cancellation_check and cancellation_check():
+                    return ChapterProcessingResult(
+                        chapter_index=chapter.order,
+                        chapter_title=chapter.title,
+                        status="cancelled",
+                        processing_time=time.time() - start_time,
+                        error_message="Processing cancelled by user",
+                        word_count=chapter.word_count,
+                        estimated_duration=chapter.estimated_duration
+                    )
+                # Synthesize audio for paragraph with enhanced voice parameters
+                audio, sr = self.tts_backend.synthesize(
+                    text=paragraph,
+                    ref_audio_path=settings.ref_audio_path,
+                    ref_text=settings.ref_text,
+                    speed=settings.speed,
+                    nfe_steps=settings.nfe_steps,
+                    seed=settings.seed,
+                    cross_fade_sec=settings.crossfade_ms / 1000.0,
+                    target_rms=settings.target_rms,
+                    cfg_strength=settings.cfg_strength,
+                    sway_sampling_coef=settings.sway_sampling_coef
+                )
+                audio_pieces.append(audio)
+                sample_rate = sr
+                # Memory cleanup after each paragraph
+                if i % 5 == 0:  # Every 5 paragraphs
+                    gc.collect()
+            if not audio_pieces:
+                raise ValueError("No audio generated")
+            # Concatenate audio pieces with crossfade
+            final_audio = crossfade_concat(
+                audio_pieces,
+                crossfade_ms=settings.crossfade_ms,
+                sample_rate=sample_rate
+            )
+            # Normalize audio levels
+            final_audio = normalize_lufs(
+                final_audio,
+                sample_rate,
+                target_lufs=settings.target_lufs
+            )
+            processing_time = time.time() - start_time
+            if progress_callback:
+                progress_callback(chapter.order, "completed", f"Completed {chapter.title}")
+            return ChapterProcessingResult(
+                chapter_index=chapter.order,
+                chapter_title=chapter.title,
+                status="completed",
+                audio_data=final_audio,
+                sample_rate=sample_rate,
+                processing_time=processing_time,
+                word_count=chapter.word_count,
+                estimated_duration=chapter.estimated_duration
+            )
+        except Exception as e:
+            processing_time = time.time() - start_time
+            error_msg = f"Error processing {chapter.title}: {str(e)}"
+            if progress_callback:
+                progress_callback(chapter.order, "error", error_msg)
+            return ChapterProcessingResult(
+                chapter_index=chapter.order,
+                chapter_title=chapter.title,
+                status="error",
+                processing_time=processing_time,
+                error_message=error_msg,
+                word_count=chapter.word_count,
+                estimated_duration=chapter.estimated_duration
+            )
+    def process_chapter_with_retry(
+        self,
+        chapter: Chapter,
+        settings: ProcessingSettings,
+        max_retries: int = 2,
+        progress_callback: Optional[Callable[[int, str, str], None]] = None,
+        cancellation_check: Optional[Callable[[], bool]] = None
+    ) -> ChapterProcessingResult:
+        """
+        Process a chapter with retry mechanism for failed attempts.
+        Args:
+            chapter: Chapter to process
+            settings: Processing settings
+            max_retries: Maximum number of retry attempts
+            progress_callback: Optional callback for progress updates
+        Returns:
+            ChapterProcessingResult with processing status
+        """
+        last_result = None
+        for attempt in range(max_retries + 1):
+            if attempt > 0:
+                if progress_callback:
+                    progress_callback(chapter.order, "processing", f"Retry {attempt} for {chapter.title}")
+                time.sleep(1)  # Brief pause before retry
+            result = self.process_chapter(chapter, settings, progress_callback, cancellation_check)
+            last_result = result
+            if result.status == "completed":
+                return result
+        # All retries failed
+        if progress_callback:
+            progress_callback(chapter.order, "error", f"Failed after {max_retries} retries")
+        return last_result
+    def process_book(
+        self,
+        chapters: List[Chapter],
+        settings: ProcessingSettings,
+        progress_callback: Optional[Callable[[int, str, str], None]] = None,
+        max_retries: int = 2,
+        cancellation_check: Optional[Callable[[], bool]] = None
+    ) -> List[ChapterProcessingResult]:
+        """
+        Process an entire book with multiple chapters.
+        Args:
+            chapters: List of chapters to process
+            settings: Processing settings
+            progress_callback: Optional callback for progress updates
+            max_retries: Maximum retries per chapter
+        Returns:
+            List of ChapterProcessingResult objects
+        """
+        results = []
+        for chapter in chapters:
+            # Check for cancellation before each chapter
+            if cancellation_check and cancellation_check():
+                break
+            # Update chapter status
+            self.epub_processor.update_chapter_status(chapters, chapter.order, "processing")
+            # Process chapter with retry
+            result = self.process_chapter_with_retry(
+                chapter,
+                settings,
+                max_retries,
+                progress_callback,
+                cancellation_check
+            )
+            # Update chapter status based on result
+            if result.status == "completed":
+                self.epub_processor.update_chapter_status(chapters, chapter.order, "completed")
+            else:
+                self.epub_processor.update_chapter_status(
+                    chapters,
+                    chapter.order,
+                    "error",
+                    result.error_message
+                )
+            results.append(result)
+            # Memory cleanup after each chapter
+            gc.collect()
+        return results
+    def concatenate_chapter_audio(self, results: List[ChapterProcessingResult]) -> Optional[np.ndarray]:
+        """
+        Concatenate audio from multiple chapter results.
+        Args:
+            results: List of ChapterProcessingResult objects
+        Returns:
+            Concatenated audio array or None if no valid audio
+        """
+        valid_audio = []
+        sample_rate = None
+        for result in results:
+            if result.status == "completed" and result.audio_data is not None:
+                valid_audio.append(result.audio_data)
+                if sample_rate is None:
+                    sample_rate = result.sample_rate
+        if not valid_audio:
+            return None
+        # Concatenate with crossfade
+        return crossfade_concat(valid_audio, crossfade_ms=150, sample_rate=sample_rate)
+    def export_audiobook(
+        self,
+        results: List[ChapterProcessingResult],
+        output_path: str,
+        export_individual_chapters: bool = True
+    ) -> Dict[str, str]:
+        """
+        Export processed audiobook to files.
+        Args:
+            results: List of ChapterProcessingResult objects
+            output_path: Base path for output files
+            export_individual_chapters: Whether to export individual chapter files
+        Returns:
+            Dictionary with paths to exported files
+        """
+        output_dir = Path(output_path).parent
+        output_dir.mkdir(parents=True, exist_ok=True)
+        exported_files = {}
+        sample_rate = None
+        # Export individual chapters
+        if export_individual_chapters:
+            for result in results:
+                if result.status == "completed" and result.audio_data is not None:
+                    chapter_filename = f"chapter_{result.chapter_index:03d}_{result.chapter_title.replace(' ', '_')}.wav"
+                    chapter_path = output_dir / chapter_filename
+                    save_wav(str(chapter_path), result.audio_data, result.sample_rate)
+                    exported_files[f"chapter_{result.chapter_index}"] = str(chapter_path)
+                    if sample_rate is None:
+                        sample_rate = result.sample_rate
+        # Export complete audiobook
+        concatenated_audio = self.concatenate_chapter_audio(results)
+        if concatenated_audio is not None:
+            # Use the exact output path specified by the user
+            complete_path = Path(output_path)
+            save_wav(str(complete_path), concatenated_audio, sample_rate)
+            exported_files["complete"] = str(complete_path)
+        return exported_files
+    def get_processing_statistics(self, results: List[ChapterProcessingResult]) -> ProcessingStatistics:
+        """
+        Get processing statistics from results.
+        Args:
+            results: List of ChapterProcessingResult objects
+        Returns:
+            ProcessingStatistics object
+        """
+        total_chapters = len(results)
+        completed_chapters = sum(1 for r in results if r.status == "completed")
+        failed_chapters = sum(1 for r in results if r.status == "error")
+        total_processing_time = sum(r.processing_time for r in results)
+        total_word_count = sum(r.word_count for r in results)
+        total_audio_duration = sum(r.estimated_duration for r in results)
+        average_processing_time = total_processing_time / total_chapters if total_chapters > 0 else 0.0
+        return ProcessingStatistics(
+            total_chapters=total_chapters,
+            completed_chapters=completed_chapters,
+            failed_chapters=failed_chapters,
+            total_processing_time=total_processing_time,
+            average_processing_time=average_processing_time,
+            total_audio_duration=total_audio_duration,
+            total_word_count=total_word_count
+        )
+    def cleanup(self):
+        """Clean up resources."""
+        self.epub_processor.cleanup_temp_files()
+        gc.collect()

epub_processor.py ADDED Viewed

	@@ -0,0 +1,401 @@

+#!/usr/bin/env python3
+"""
+EPUB processing module for Russian Audiobook Studio.
+Handles EPUB file validation, chapter extraction, and processing coordination.
+"""
+import os
+import tempfile
+from typing import List, Optional, Dict, Any
+from dataclasses import dataclass
+from pathlib import Path
+import zipfile
+from ebooklib import epub
+from ebooklib.epub import EpubException
+@dataclass
+class Chapter:
+    """Represents a chapter in an EPUB book."""
+    title: str
+    content: str
+    file_name: str
+    order: int
+    preview: str  # First 100-200 characters for preview
+    status: str = "pending"  # pending, processing, completed, error
+    word_count: int = 0
+    estimated_duration: float = 0.0  # Estimated duration in minutes
+    error_message: Optional[str] = None
+@dataclass
+class EpubValidationResult:
+    """Result of EPUB file validation."""
+    is_valid: bool
+    error_message: Optional[str]
+    chapters: List[Chapter]
+    book_title: Optional[str]
+    book_author: Optional[str]
+    total_chapters: int
+class EpubValidationError(Exception):
+    """Custom exception for EPUB validation errors."""
+    pass
+class EpubValidator:
+    """Validates EPUB files and extracts chapter information."""
+    MAX_FILE_SIZE = 500 * 1024 * 1024  # 500MB limit
+    MIN_PREVIEW_LENGTH = 100
+    MAX_PREVIEW_LENGTH = 200
+    def __init__(self):
+        self.supported_extensions = ['.epub']
+    def validate_file(self, file_path: str) -> EpubValidationResult:
+        """
+        Validate an EPUB file and extract chapter information.
+        Args:
+            file_path: Path to the EPUB file
+        Returns:
+            EpubValidationResult with validation status and chapter information
+        Raises:
+            EpubValidationError: If validation fails
+        """
+        if not file_path:
+            return EpubValidationResult(
+                is_valid=False,
+                error_message="No file path provided",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+        # Check if file exists
+        if not os.path.exists(file_path):
+            return EpubValidationResult(
+                is_valid=False,
+                error_message=f"File does not exist: {file_path}",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+        # Check file extension
+        if not self._is_epub_file(file_path):
+            return EpubValidationResult(
+                is_valid=False,
+                error_message="File is not an EPUB file. Please upload a .epub file.",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+        # Check file size
+        file_size = os.path.getsize(file_path)
+        if file_size == 0:
+            return EpubValidationResult(
+                is_valid=False,
+                error_message="File is empty",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+        if file_size > self.MAX_FILE_SIZE:
+            return EpubValidationResult(
+                is_valid=False,
+                error_message=f"File is too large. Maximum size is {self.MAX_FILE_SIZE // (1024*1024)}MB",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+        # Try to parse the EPUB
+        try:
+            return self._parse_epub(file_path)
+        except EpubException as e:
+            return EpubValidationResult(
+                is_valid=False,
+                error_message=f"Invalid EPUB file: {str(e)}",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+        except Exception as e:
+            return EpubValidationResult(
+                is_valid=False,
+                error_message=f"Error reading EPUB file: {str(e)}",
+                chapters=[],
+                book_title=None,
+                book_author=None,
+                total_chapters=0
+            )
+    def _is_epub_file(self, file_path: str) -> bool:
+        """Check if file has EPUB extension."""
+        return Path(file_path).suffix.lower() in self.supported_extensions
+    def _parse_epub(self, file_path: str) -> EpubValidationResult:
+        """Parse EPUB file and extract chapter information."""
+        try:
+            book = epub.read_epub(file_path)
+            # Extract book metadata
+            book_title = book.get_metadata('DC', 'title')
+            book_author = book.get_metadata('DC', 'creator')
+            title = book_title[0][0] if book_title else "Unknown Title"
+            author = book_author[0][0] if book_author else "Unknown Author"
+            # Extract chapters
+            chapters = self._extract_chapters(book)
+            if not chapters:
+                return EpubValidationResult(
+                    is_valid=False,
+                    error_message="No readable chapters found in EPUB file",
+                    chapters=[],
+                    book_title=title,
+                    book_author=author,
+                    total_chapters=0
+                )
+            return EpubValidationResult(
+                is_valid=True,
+                error_message=None,
+                chapters=chapters,
+                book_title=title,
+                book_author=author,
+                total_chapters=len(chapters)
+            )
+        except Exception as e:
+            raise EpubValidationError(f"Failed to parse EPUB: {str(e)}")
+    def _extract_chapters(self, book: epub.EpubBook) -> List[Chapter]:
+        """Extract chapters from EPUB book."""
+        chapters = []
+        chapter_order = 0
+        # Try to get items from spine first (reading order)
+        spine_items = []
+        if hasattr(book, 'spine') and book.spine:
+            for item_id, linear in book.spine:
+                if not linear:
+                    continue
+                item = book.get_item_with_id(item_id)
+                if item:
+                    spine_items.append(item)
+        # If no spine items, get all document items
+        if not spine_items:
+            spine_items = [item for item in book.get_items() if item.get_type() == 9]  # 9 = HTML document type
+        # Process each item
+        for item in spine_items:
+            # Check if item is HTML content
+            if item.get_type() != 9:  # 9 = HTML document type
+                continue
+            # Extract text content
+            content = self._extract_text_content(item)
+            if not content or len(content.strip()) < 50:  # Skip very short chapters
+                continue
+            # Create chapter
+            chapter = Chapter(
+                title=self._get_chapter_title(item, chapter_order),
+                content=content,
+                file_name=item.get_name(),
+                order=chapter_order,
+                preview=self._create_preview(content),
+                word_count=self._count_words(content),
+                estimated_duration=self._estimate_duration(content)
+            )
+            chapters.append(chapter)
+            chapter_order += 1
+        return chapters
+    def _extract_text_content(self, item) -> str:
+        """Extract text content from EPUB item."""
+        try:
+            # Get content and handle different encodings
+            raw_content = item.get_content()
+            if isinstance(raw_content, bytes):
+                # Try different encodings
+                for encoding in ['utf-8', 'latin-1', 'cp1252']:
+                    try:
+                        content = raw_content.decode(encoding)
+                        break
+                    except UnicodeDecodeError:
+                        continue
+                else:
+                    # Fallback to utf-8 with errors='ignore'
+                    content = raw_content.decode('utf-8', errors='ignore')
+            else:
+                content = str(raw_content)
+            # Basic HTML tag removal (simple approach)
+            import re
+            # Remove HTML tags
+            content = re.sub(r'<[^>]+>', '', content)
+            # Clean up whitespace
+            content = re.sub(r'\s+', ' ', content).strip()
+            return content
+        except Exception as e:
+            print(f"Warning: Could not extract content from {item.get_name()}: {e}")
+            return ""
+    def _get_chapter_title(self, item, order: int) -> str:
+        """Get chapter title from item or generate default."""
+        # Try to extract title from content
+        try:
+            raw_content = item.get_content()
+            if isinstance(raw_content, bytes):
+                content = raw_content.decode('utf-8', errors='ignore')
+            else:
+                content = str(raw_content)
+            import re
+            # Look for h1, h2, h3 tags
+            title_match = re.search(r'<h[1-3][^>]*>([^<]+)</h[1-3]>', content, re.IGNORECASE)
+            if title_match:
+                title = title_match.group(1).strip()
+                # Clean up the title
+                title = re.sub(r'<[^>]+>', '', title)  # Remove any remaining HTML tags
+                title = re.sub(r'\s+', ' ', title).strip()  # Clean whitespace
+                if title:
+                    return title
+        except Exception:
+            pass
+        # Try to get title from item metadata
+        try:
+            if hasattr(item, 'title') and item.title:
+                return str(item.title)
+        except Exception:
+            pass
+        # Fallback to file name or default
+        file_name = item.get_name()
+        if file_name:
+            # Clean up file name to make it more readable
+            clean_name = Path(file_name).stem
+            clean_name = clean_name.replace('_', ' ').replace('-', ' ')
+            clean_name = re.sub(r'\d+', '', clean_name)  # Remove numbers
+            clean_name = clean_name.strip()
+            if clean_name:
+                return clean_name.title()
+        return f"Chapter {order + 1}"
+    def _create_preview(self, content: str) -> str:
+        """Create preview text from chapter content."""
+        if not content:
+            return ""
+        # Clean content for preview
+        preview = content.strip()
+        # Truncate to reasonable length
+        if len(preview) > self.MAX_PREVIEW_LENGTH:
+            preview = preview[:self.MAX_PREVIEW_LENGTH]
+            # Try to end at a sentence boundary
+            last_period = preview.rfind('.')
+            if last_period > self.MIN_PREVIEW_LENGTH:
+                preview = preview[:last_period + 1]
+            else:
+                preview = preview + "..."
+        return preview
+    def _count_words(self, content: str) -> int:
+        """Count words in content."""
+        if not content:
+            return 0
+        # Simple word counting - split by whitespace and filter empty strings
+        words = [word for word in content.split() if word.strip()]
+        return len(words)
+    def _estimate_duration(self, content: str) -> float:
+        """Estimate audio duration in minutes based on content length."""
+        if not content:
+            return 0.0
+        # Estimate based on average reading speed
+        # Russian text: ~150-200 words per minute for speech synthesis
+        # We'll use 180 words per minute as a reasonable estimate
+        word_count = self._count_words(content)
+        duration_minutes = word_count / 180.0
+        # Add some buffer for processing time
+        return round(duration_minutes * 1.1, 1)
+class EpubProcessor:
+    """Main EPUB processor for handling EPUB files in the web interface."""
+    def __init__(self):
+        self.validator = EpubValidator()
+        self.temp_dir = tempfile.mkdtemp(prefix="epub_processing_")
+    def process_epub_upload(self, file_path: str) -> EpubValidationResult:
+        """
+        Process an uploaded EPUB file.
+        Args:
+            file_path: Path to uploaded EPUB file
+        Returns:
+            EpubValidationResult with validation status and chapter information
+        """
+        return self.validator.validate_file(file_path)
+    def update_chapter_status(self, chapters: List[Chapter], chapter_index: int, status: str, error_message: Optional[str] = None):
+        """Update the status of a specific chapter."""
+        if 0 <= chapter_index < len(chapters):
+            chapters[chapter_index].status = status
+            if error_message:
+                chapters[chapter_index].error_message = error_message
+    def get_chapter_status_summary(self, chapters: List[Chapter]) -> Dict[str, int]:
+        """Get a summary of chapter statuses."""
+        summary = {"pending": 0, "processing": 0, "completed": 0, "error": 0}
+        for chapter in chapters:
+            if chapter.status in summary:
+                summary[chapter.status] += 1
+        return summary
+    def get_total_estimated_duration(self, chapters: List[Chapter]) -> float:
+        """Get total estimated duration for all chapters."""
+        return sum(chapter.estimated_duration for chapter in chapters)
+    def get_total_word_count(self, chapters: List[Chapter]) -> int:
+        """Get total word count for all chapters."""
+        return sum(chapter.word_count for chapter in chapters)
+    def cleanup_temp_files(self):
+        """Clean up temporary files."""
+        import shutil
+        try:
+            shutil.rmtree(self.temp_dir, ignore_errors=True)
+        except Exception:
+            pass
+    def __del__(self):
+        """Cleanup on destruction."""
+        self.cleanup_temp_files()

requirements.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+# Core dependencies for Hugging Face Spaces deployment
+gradio>=4.44.0
+numpy>=1.24.0
+soundfile>=0.12.1
+pydub>=0.25.1
+librosa>=0.10.1
+tqdm>=4.66.0
+pyloudnorm>=0.1.1
+# Russian linguistics for automatic accent placement
+ruaccent>=1.5.7
+# Hugging Face / TTS stack
+huggingface_hub>=0.23.0
+f5-tts>=0.1.1
+datasets>=2.20.0
+# Audiobook generation and metadata
+mutagen>=1.47.0
+# PyTorch for Hugging Face Spaces (CPU version for compatibility)
+torch>=2.0.0
+torchaudio>=2.0.0
+# Additional dependencies for EPUB processing
+ebooklib>=0.18
+beautifulsoup4>=4.12.0
+lxml>=4.9.0
+# Audio processing
+scipy>=1.10.0

samples/001/sample.text ADDED Viewed

	@@ -0,0 +1,3 @@

+— До свидания, дорогая.
+— До свидания, любимый.
+Аликс Мартин стояла, прислонясь к маленькой, грубо сколоченной калитке, и смотрела вслед му

synth_batch.py ADDED Viewed

	@@ -0,0 +1,86 @@

+#!/usr/bin/env python3
+import argparse, os, sys, re
+import numpy as np
+import soundfile as sf
+from tqdm import tqdm
+from utils.text import normalize_text, split_into_paragraphs, maybe_ruaccent
+from utils.audio import crossfade_concat, normalize_lufs, save_wav
+from backends.espeech_backend import EspeechBackend
+def read_input(path: str) -> str:
+    if path.lower().endswith(".txt"):
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read()
+    elif path.lower().endswith(".epub"):
+        try:
+            from ebooklib import epub
+            from bs4 import BeautifulSoup
+        except Exception:
+            print("Для EPUB установите: pip install ebooklib beautifulsoup4 lxml", file=sys.stderr)
+            sys.exit(2)
+        book = epub.read_epub(path)
+        texts = []
+        for item in book.get_items():
+            if item.get_type() == 9:  # DOCUMENT
+                soup = BeautifulSoup(item.get_body_content(), "lxml")
+                texts.append(soup.get_text(" ", strip=True))
+        return "\n\n".join(texts)
+    else:
+        raise ValueError("Поддерживаются .txt и .epub")
+def load_default_ref_text():
+    """Load default reference text from local sample"""
+    try:
+        with open("samples/001/sample.text", "r", encoding="utf-8") as f:
+            return f.read().strip()
+    except FileNotFoundError:
+        return ""
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--input", required=True, help="Путь к TXT/EPUB")
+    ap.add_argument("--outdir", required=True, help="Каталог для результата")
+    ap.add_argument("--ref-audio", required=False, default="samples/001/sample.mp3", help="Путь к референс-аудио (6–12с)")
+    ap.add_argument("--ref-text", required=False, default=load_default_ref_text(), help="Референс-текст")
+    ap.add_argument("--model-repo", default=os.getenv("MODEL_REPO", "ESpeech/ESpeech-TTS-1_RL-V2"))
+    ap.add_argument("--speed", type=float, default=1.0)
+    ap.add_argument("--nfe-steps", type=int, default=48)
+    ap.add_argument("--crossfade-ms", type=int, default=150)
+    ap.add_argument("--target-lufs", type=float, default=-20.0)
+    args = ap.parse_args()
+    os.makedirs(args.outdir, exist_ok=True)
+    backend = EspeechBackend(model_id=args.model_repo)
+    raw = read_input(args.input)
+    text = normalize_text(raw)
+    paragraphs = split_into_paragraphs(text)
+    paragraphs = [maybe_ruaccent(p) for p in paragraphs]
+    print(f"Абзацев: {len(paragraphs)}")
+    pieces = []
+    sr = None
+    for i, para in enumerate(tqdm(paragraphs, desc="Генерация")):
+        audio, sr = backend.synthesize(
+            text=para,
+            ref_audio_path=args.ref_audio,
+            ref_text=args.ref_text,
+            speed=args.speed,
+            nfe_steps=args.nfe_steps,
+            seed=None,
+        )
+        pieces.append(audio)
+        # Сохраняем черновики по абзацам (опционально)
+        # sf.write(os.path.join(args.outdir, f"para_{i:05d}.wav"), audio, sr)
+    final = crossfade_concat(pieces, crossfade_ms=args.crossfade_ms, sample_rate=sr)
+    final = normalize_lufs(final, sr, target_lufs=args.target_lufs)
+    out_path = os.path.join(args.outdir, "book.wav")
+    save_wav(out_path, final, sr)
+    print(f"ГОТОВО: {out_path}")
+if __name__ == "__main__":
+    main()

utils/audio.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# utils/audio.py
+from typing import List
+import numpy as np
+import soundfile as sf
+from pydub import AudioSegment
+import pyloudnorm as pyln
+def _to_audiosegment(wav: np.ndarray, sr: int) -> AudioSegment:
+    wav16 = np.int16(np.clip(wav, -1.0, 1.0) * 32767.0)
+    return AudioSegment(
+        wav16.tobytes(), frame_rate=sr, sample_width=2, channels=1
+    )
+def crossfade_concat(pieces: List[np.ndarray], crossfade_ms: int, sample_rate: int) -> np.ndarray:
+    if not pieces:
+        return np.zeros(1, dtype=np.float32)
+    seg = _to_audiosegment(pieces[0], sample_rate)
+    for p in pieces[1:]:
+        seg_next = _to_audiosegment(p, sample_rate)
+        seg = seg.append(seg_next, crossfade=crossfade_ms)
+    # Вернём в float32 [-1..1]
+    arr = np.array(seg.get_array_of_samples()).astype(np.int16).astype(np.float32) / 32767.0
+    return arr
+def normalize_lufs(wav: np.ndarray, sr: int, target_lufs: float = -20.0) -> np.ndarray:
+    meter = pyln.Meter(sr)
+    loudness = meter.integrated_loudness(wav)
+    gain = target_lufs - loudness
+    wav_norm = pyln.normalize.loudness(wav, loudness, target_lufs)
+    # Клиппинг-сафети
+    wav_norm = np.clip(wav_norm, -0.999, 0.999).astype(np.float32)
+    return wav_norm
+def save_wav(path: str, wav: np.ndarray, sr: int):
+    sf.write(path, wav, sr, subtype="PCM_16")

utils/text.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# utils/text.py
+import re
+def normalize_text(text: str) -> str:
+    # Простая нормализация: убираем лишние пробелы, приводим переносы.
+    text = text.replace("\r\n", "\n").replace("\r", "\n")
+    text = re.sub(r"[ \t]+", " ", text)
+    text = re.sub(r"\n{3,}", "\n\n", text).strip()
+    return text
+def split_into_paragraphs(text: str):
+    # Делим по пустым строкам; при желании можно делить по длине/точкам.
+    parts = [p.strip() for p in text.split("\n\n")]
+    parts = [p for p in parts if p]
+    return parts
+def maybe_ruaccent(text: str) -> str:
+    """Если установлен ruaccent, проставить ударения автоматически.
+    Если нет — вернуть исходный текст.
+    """
+    try:
+        from ruaccent import RUAccent
+        acc = RUAccent()
+        # Use turbo3.1 model for better accuracy and speed
+        acc.load(omograph_model_size="turbo3.1", use_dictionary=True, tiny_mode=False)
+        return acc.process_all(text)
+    except Exception as e:
+        print(f"Warning: ruaccent failed, using original text: {e}")
+        return text
+def maybe_ruaccent_advanced(text: str, model_size: str = "turbo3.1", use_dict: bool = True) -> str:
+    """Enhanced ruaccent with configurable parameters for better voice quality."""
+    try:
+        from ruaccent import RUAccent
+        acc = RUAccent()
+        acc.load(omograph_model_size=model_size, use_dictionary=use_dict, tiny_mode=False)
+        return acc.process_all(text)
+    except Exception as e:
+        print(f"Warning: ruaccent failed, using original text: {e}")
+        return text