UrduTTS / app.py
Chan-Y's picture
Create app.py
9f70b20 verified
import gradio as gr
import torchaudio
from tortoise.api import TextToSpeech
from tortoise.utils.audio import load_voice
# Initialize TextToSpeech with the models directory
tts = TextToSpeech(models_dir="urdu-tts/models/urdu-multispeaker")
# Function to generate audio from text
def generate_audio(text, voice, preset):
try:
# Load the voice samples and conditioning latents
voice_samples, conditioning_latents = load_voice(voice)
# Generate the audio
generated_audio = tts.tts_with_preset(
text,
voice_samples=voice_samples,
conditioning_latents=conditioning_latents,
preset=preset
)
# Save the generated audio to a file
output_file = "output.wav"
torchaudio.save(output_file, generated_audio.squeeze(0).cpu(), 24000)
# Return the audio file
return output_file
except Exception as e:
return f"Error: {str(e)}"
# Define the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Urdu Text-to-Speech")
with gr.Row():
text_input = gr.Textbox(label="Enter Text (Urdu)", placeholder="محبت، ادب، شاعری، زبان، دلکش")
voice_dropdown = gr.Dropdown(label="Select Voice", choices=["daniel", "voice2", "voice3"], value="daniel")
preset_dropdown = gr.Dropdown(label="Select Preset", choices=["fast", "standard", "high_quality"], value="fast")
generate_button = gr.Button("Generate Audio")
audio_output = gr.Audio(label="Generated Audio")
generate_button.click(generate_audio, inputs=[text_input, voice_dropdown, preset_dropdown], outputs=audio_output)
# Launch the Gradio interface
demo.launch()