Spaces:
Sleeping
Sleeping
| """Simplified audio generation functionality that delegates complex processing to the TTS API.""" | |
| from typing import Tuple, Optional | |
| import gradio as gr | |
| import numpy as np | |
| class SimpleAudioProcessor: | |
| """Simplified audio processor that uses the enhanced TTS API for complex processing.""" | |
| def __init__(self): | |
| """Initialize the simple audio processor.""" | |
| pass | |
| def generate_audio(self, explanation_text: str, progress=None) -> Tuple[Tuple[int, np.ndarray], dict]: | |
| """ | |
| Generate TTS audio for explanations using the enhanced TTS API. | |
| This method sends the full text to the TTS API which handles: | |
| - Text chunking | |
| - Parallel processing | |
| - Audio concatenation | |
| - All on the server side with GPU acceleration | |
| Args: | |
| explanation_text: The text to convert to audio | |
| progress: Optional progress callback | |
| Returns: | |
| Tuple of (audio_result, update_dict) where audio_result is (sample_rate, audio_data) | |
| """ | |
| if not explanation_text or explanation_text.strip() == "": | |
| raise gr.Error("No explanations available to convert to audio. Please generate explanations first.") | |
| try: | |
| clean_text = explanation_text.strip() | |
| if progress: | |
| progress(0.1, desc="Sending text to TTS API for processing...") | |
| # Import the simplified audio generation function | |
| from .generate_simple_tts_audio import generate_simple_tts_audio | |
| # Generate audio using the new simplified API call | |
| audio_result = generate_simple_tts_audio(clean_text, progress=progress) | |
| if progress: | |
| progress(1.0, desc="Audio generation complete!") | |
| return audio_result, gr.update(visible=True) | |
| except Exception as e: | |
| raise gr.Error(f"Error generating audio: {str(e)}") | |
| def get_processing_info(self, text: str) -> dict: | |
| """Get basic information about the text to be processed.""" | |
| if not text or not text.strip(): | |
| return {"error": "No text provided"} | |
| text_length = len(text.strip()) | |
| estimated_chunks = max(1, text_length // 800) # Rough estimate | |
| estimated_time = text_length * 0.05 # Rough estimate: 0.05 seconds per character | |
| return { | |
| "processing_mode": "server_side_parallel", | |
| "text_length": text_length, | |
| "estimated_chunks": estimated_chunks, | |
| "estimated_time_seconds": estimated_time, | |
| "estimated_time_readable": f"{estimated_time:.1f} seconds" if estimated_time < 60 else f"{estimated_time/60:.1f} minutes", | |
| "note": "Processing handled by TTS API with GPU acceleration" | |
| } | |