Spaces:
Runtime error
Runtime error
File size: 1,881 Bytes
0196eee 460593c 0196eee e0640e9 460593c e0640e9 460593c e0640e9 460593c e0640e9 460593c e0640e9 460593c e0640e9 460593c e0640e9 0196eee e0640e9 0196eee e0640e9 0196eee e0640e9 0196eee 460593c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import scipy.io.wavfile
from pydub import AudioSegment
# Load the pre-trained model and tokenizer
model_name = "facebook/mms-tts-tam"
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
def audio_to_waveform(audio_file):
audio = AudioSegment.from_file(audio_file)
waveform = torch.FloatTensor(audio.get_array_of_samples()).view(1, -1)
return waveform
def change_voice(input_audio, voice_sample, language):
# Convert audio files to waveforms
input_waveform = audio_to_waveform(input_audio)
voice_waveform = audio_to_waveform(voice_sample)
# Generate the new voice waveform
text = tokenizer.decode(model.generate(input_waveform))
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs).waveform
# Save to output file
output_path = "output.wav"
scipy.io.wavfile.write(output_path, rate=model.config.sampling_rate, data=output.numpy())
return output_path
def toggle(choice):
if choice == "mic":
return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
else:
return gr.update(visible=False, value=None), gr.update(visible=True, value=None)
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
input_audio = gr.Audio(label="Input Audio", type="filepath")
voice_sample = gr.Audio(label="Voice Sample", type="filepath")
language = gr.Radio(label="Language", choices=["ta"], value="ta")
btn = gr.Button("Submit")
with gr.Column():
output_audio = gr.Audio(label="Output Audio")
btn.click(change_voice, inputs=[input_audio, voice_sample, language], outputs=output_audio)
demo.launch()
|