Spaces:
Sleeping
Sleeping
| import logging | |
| import numpy as np | |
| import soundfile as sf | |
| from typing import Optional, Generator, Tuple | |
| from utils.tts_base import TTSBase | |
| # Configure logging | |
| logger = logging.getLogger(__name__) | |
| # Flag to track Kokoro availability | |
| KOKORO_AVAILABLE = False | |
| # Try to import Kokoro | |
| try: | |
| from kokoro import KPipeline | |
| KOKORO_AVAILABLE = True | |
| logger.info("Kokoro TTS engine is available") | |
| except ImportError: | |
| logger.warning("Kokoro TTS engine is not available") | |
| except Exception as e: | |
| logger.error(f"Kokoro import failed with unexpected error: {str(e)}") | |
| KOKORO_AVAILABLE = False | |
| def _get_pipeline(lang_code: str = 'z'): | |
| """Lazy-load the Kokoro pipeline | |
| Args: | |
| lang_code (str): Language code for the pipeline | |
| Returns: | |
| KPipeline or None: The Kokoro pipeline or None if not available | |
| """ | |
| if not KOKORO_AVAILABLE: | |
| logger.warning("Kokoro TTS engine is not available") | |
| return None | |
| try: | |
| pipeline = KPipeline(lang_code=lang_code) | |
| logger.info("Kokoro pipeline successfully loaded") | |
| return pipeline | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Kokoro pipeline: {str(e)}") | |
| return None | |
| class KokoroTTS(TTSBase): | |
| """Kokoro TTS engine implementation | |
| This engine uses the Kokoro library for TTS generation. | |
| """ | |
| def __init__(self, lang_code: str = 'z'): | |
| """Initialize the Kokoro TTS engine | |
| Args: | |
| lang_code (str): Language code for the engine | |
| """ | |
| super().__init__(lang_code) | |
| self.pipeline = None | |
| def _ensure_pipeline(self): | |
| """Ensure the pipeline is loaded | |
| Returns: | |
| bool: True if pipeline is available, False otherwise | |
| """ | |
| if self.pipeline is None: | |
| self.pipeline = _get_pipeline(self.lang_code) | |
| return self.pipeline is not None | |
| def generate_speech(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Optional[str]: | |
| """Generate speech using Kokoro TTS engine | |
| Args: | |
| text (str): Input text to synthesize | |
| voice (str): Voice ID to use (e.g., 'af_heart', 'af_bella', etc.) | |
| speed (float): Speech speed multiplier (0.5 to 2.0) | |
| Returns: | |
| Optional[str]: Path to the generated audio file or None if generation fails | |
| """ | |
| logger.info(f"Generating speech with Kokoro for text length: {len(text)}") | |
| # Check if Kokoro is available | |
| if not KOKORO_AVAILABLE: | |
| logger.error("Kokoro TTS engine is not available") | |
| return None | |
| # Ensure pipeline is loaded | |
| if not self._ensure_pipeline(): | |
| logger.error("Failed to load Kokoro pipeline") | |
| return None | |
| try: | |
| # Generate unique output path | |
| output_path = self._generate_output_path(prefix="kokoro") | |
| # Generate speech | |
| generator = self.pipeline(text, voice=voice, speed=speed) | |
| for _, _, audio in generator: | |
| logger.info(f"Saving Kokoro audio to {output_path}") | |
| sf.write(output_path, audio, 24000) | |
| break | |
| logger.info(f"Kokoro audio generation complete: {output_path}") | |
| return output_path | |
| except Exception as e: | |
| logger.error(f"Error generating speech with Kokoro: {str(e)}", exc_info=True) | |
| return None | |
| def generate_speech_stream(self, text: str, voice: str = 'af_heart', speed: float = 1.0) -> Generator[Tuple[int, np.ndarray], None, None]: | |
| """Generate speech stream using Kokoro TTS engine | |
| Args: | |
| text (str): Input text to synthesize | |
| voice (str): Voice ID to use | |
| speed (float): Speech speed multiplier | |
| Yields: | |
| tuple: (sample_rate, audio_data) pairs for each segment | |
| """ | |
| logger.info(f"Generating speech stream with Kokoro for text length: {len(text)}") | |
| # Check if Kokoro is available | |
| if not KOKORO_AVAILABLE: | |
| logger.error("Kokoro TTS engine is not available") | |
| return | |
| # Ensure pipeline is loaded | |
| if not self._ensure_pipeline(): | |
| logger.error("Failed to load Kokoro pipeline") | |
| return | |
| try: | |
| # Generate speech stream | |
| generator = self.pipeline(text, voice=voice, speed=speed) | |
| for _, _, audio in generator: | |
| yield 24000, audio | |
| except Exception as e: | |
| logger.error(f"Error generating speech stream with Kokoro: {str(e)}", exc_info=True) | |
| return |