|
import gradio as gr |
|
import torchaudio |
|
from tortoise.api import TextToSpeech |
|
from tortoise.utils.audio import load_voice |
|
|
|
|
|
tts = TextToSpeech(models_dir="urdu-tts/models/urdu-multispeaker") |
|
|
|
|
|
def generate_audio(text, voice, preset): |
|
try: |
|
|
|
voice_samples, conditioning_latents = load_voice(voice) |
|
|
|
|
|
generated_audio = tts.tts_with_preset( |
|
text, |
|
voice_samples=voice_samples, |
|
conditioning_latents=conditioning_latents, |
|
preset=preset |
|
) |
|
|
|
|
|
output_file = "output.wav" |
|
torchaudio.save(output_file, generated_audio.squeeze(0).cpu(), 24000) |
|
|
|
|
|
return output_file |
|
except Exception as e: |
|
return f"Error: {str(e)}" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Urdu Text-to-Speech") |
|
|
|
with gr.Row(): |
|
text_input = gr.Textbox(label="Enter Text (Urdu)", placeholder="محبت، ادب، شاعری، زبان، دلکش") |
|
voice_dropdown = gr.Dropdown(label="Select Voice", choices=["daniel", "voice2", "voice3"], value="daniel") |
|
preset_dropdown = gr.Dropdown(label="Select Preset", choices=["fast", "standard", "high_quality"], value="fast") |
|
|
|
generate_button = gr.Button("Generate Audio") |
|
audio_output = gr.Audio(label="Generated Audio") |
|
|
|
generate_button.click(generate_audio, inputs=[text_input, voice_dropdown, preset_dropdown], outputs=audio_output) |
|
|
|
|
|
demo.launch() |
|
|