import gradio as gr import wave import numpy as np from io import BytesIO from huggingface_hub import hf_hub_download from piper import PiperVoice # Adjust import as per your project structure #file_path = hf_hub_download("rhasspy/piper-voices", "en_GB-alan-medium.onnx") def synthesize_speech(text): # Load the PiperVoice model and configuration # model_path = "en_GB-alan-medium.onnx" # this is for loading local model # config_path = "en_GB-alan-medium.onnx.json" # for loading local json model_path = hf_hub_download(repo_id="rhasspy/piper-voices", filename="en_GB-alan-medium.onnx") config_path = hf_hub_download(repo_id="rhasspy/piper-voices", filename="en_GB-alan-medium.onnx.json") voice = PiperVoice.load(model_path, config_path) # Create an in-memory buffer for the WAV file buffer = BytesIO() with wave.open(buffer, 'wb') as wav_file: wav_file.setframerate(voice.config.sample_rate) wav_file.setsampwidth(2) # 16-bit wav_file.setnchannels(1) # mono # Synthesize speech voice.synthesize(text, wav_file) # Convert buffer to NumPy array for Gradio output buffer.seek(0) audio_data = np.frombuffer(buffer.read(), dtype=np.int16) return audio_data.tobytes() # Create a Gradio interface with labels iface = gr.Interface( fn=synthesize_speech, inputs=gr.Textbox(label="Input Text"), outputs=[gr.Audio(label="Synthesized Speech")], title="Text to Speech Synthesizer", description="Enter text to synthesize it into speech using PiperVoice.", allow_flagging="never" ) # Run the app iface.launch()