from datetime import datetime from os import makedirs from gradio import Error from loguru import logger from numpy import ndarray from soundfile import write from torch import Tensor from vocalizr import BASE_DIR, CHAR_LIMIT, PIPELINE def save_file_wav(audio: ndarray) -> None: """Save audio data to a WAV file in the 'results' directory. Creates a timestamped WAV file in the 'results' directory with the provided audio data at a fixed sample rate of 24,000 Hz. :param audio: Data to save. :return: None :raise OSError: If an error occurs while saving the file. """ makedirs(name="results", exist_ok=True) filename: str = ( f"{BASE_DIR}/results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.wav" ) try: logger.info(f"Saving audio to {filename}") write(file=filename, data=audio, samplerate=24000) except OSError as e: raise OSError(f"Failed to save audio to {filename}: {e}") from e def generate_audio_for_text( text: str, voice: str = "af_heart", speed: float = 1, save_file: bool = False ) -> tuple[int, ndarray]: """Generate audio for the input text. :param text: Input text to convert to speech :param voice: Voice identifier :param speed: Speech speed multiplier :param save_file: If to save the audio file to disk. :return: Tuple containing the audio sample rate and raw audio data. :raise Error: If an error occurs during generation. """ text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT] try: for _, _, audio in PIPELINE(text, voice, speed): audio = Tensor(audio).numpy() if save_file: save_file_wav(audio=audio) return 24000, audio except Error as e: raise Error(message=str(e)) from e raise RuntimeError("No audio generated")