Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import torch | |
import scipy.io.wavfile | |
from pydub import AudioSegment | |
# Load the pre-trained model and tokenizer | |
model_name = "facebook/mms-tts-tam" | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
def audio_to_waveform(audio_file): | |
audio = AudioSegment.from_file(audio_file) | |
waveform = torch.FloatTensor(audio.get_array_of_samples()).view(1, -1) | |
return waveform | |
def change_voice(input_audio, voice_sample, language): | |
# Convert audio files to waveforms | |
input_waveform = audio_to_waveform(input_audio) | |
voice_waveform = audio_to_waveform(voice_sample) | |
# Generate the new voice waveform | |
text = tokenizer.decode(model.generate(input_waveform)) | |
inputs = tokenizer(text, return_tensors="pt") | |
with torch.no_grad(): | |
output = model(**inputs).waveform | |
# Save to output file | |
output_path = "output.wav" | |
scipy.io.wavfile.write(output_path, rate=model.config.sampling_rate, data=output.numpy()) | |
return output_path | |
def toggle(choice): | |
if choice == "mic": | |
return gr.update(visible=True, value=None), gr.update(visible=False, value=None) | |
else: | |
return gr.update(visible=False, value=None), gr.update(visible=True, value=None) | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
input_audio = gr.Audio(label="Input Audio", type="filepath") | |
voice_sample = gr.Audio(label="Voice Sample", type="filepath") | |
language = gr.Radio(label="Language", choices=["ta"], value="ta") | |
btn = gr.Button("Submit") | |
with gr.Column(): | |
output_audio = gr.Audio(label="Output Audio") | |
btn.click(change_voice, inputs=[input_audio, voice_sample, language], outputs=output_audio) | |
demo.launch() | |