Spaces:
Runtime error
Runtime error
Karthik64001
commited on
Commit
•
460593c
1
Parent(s):
e0640e9
Update app.py
Browse files
app.py
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import
|
3 |
import torch
|
4 |
import scipy.io.wavfile
|
5 |
from pydub import AudioSegment
|
6 |
|
7 |
-
#
|
8 |
model_name = "facebook/mms-tts-tam"
|
9 |
-
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
11 |
|
12 |
-
def audio_to_waveform(
|
13 |
-
audio = AudioSegment.from_file(
|
14 |
waveform = torch.FloatTensor(audio.get_array_of_samples()).view(1, -1)
|
15 |
return waveform
|
16 |
|
@@ -20,17 +20,16 @@ def change_voice(input_audio, voice_sample, language):
|
|
20 |
voice_waveform = audio_to_waveform(voice_sample)
|
21 |
|
22 |
# Generate the new voice waveform
|
23 |
-
text = tokenizer.decode(
|
24 |
inputs = tokenizer(text, return_tensors="pt")
|
25 |
with torch.no_grad():
|
26 |
-
output =
|
27 |
|
28 |
# Save to output file
|
29 |
output_path = "output.wav"
|
30 |
-
scipy.io.wavfile.write(output_path, rate=
|
31 |
return output_path
|
32 |
|
33 |
-
# Gradio interface
|
34 |
def toggle(choice):
|
35 |
if choice == "mic":
|
36 |
return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
|
@@ -49,4 +48,4 @@ with gr.Blocks() as demo:
|
|
49 |
|
50 |
btn.click(change_voice, inputs=[input_audio, voice_sample, language], outputs=output_audio)
|
51 |
|
52 |
-
demo.launch(
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
3 |
import torch
|
4 |
import scipy.io.wavfile
|
5 |
from pydub import AudioSegment
|
6 |
|
7 |
+
# Load the pre-trained model and tokenizer
|
8 |
model_name = "facebook/mms-tts-tam"
|
9 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
11 |
|
12 |
+
def audio_to_waveform(audio_file):
|
13 |
+
audio = AudioSegment.from_file(audio_file)
|
14 |
waveform = torch.FloatTensor(audio.get_array_of_samples()).view(1, -1)
|
15 |
return waveform
|
16 |
|
|
|
20 |
voice_waveform = audio_to_waveform(voice_sample)
|
21 |
|
22 |
# Generate the new voice waveform
|
23 |
+
text = tokenizer.decode(model.generate(input_waveform))
|
24 |
inputs = tokenizer(text, return_tensors="pt")
|
25 |
with torch.no_grad():
|
26 |
+
output = model(**inputs).waveform
|
27 |
|
28 |
# Save to output file
|
29 |
output_path = "output.wav"
|
30 |
+
scipy.io.wavfile.write(output_path, rate=model.config.sampling_rate, data=output.numpy())
|
31 |
return output_path
|
32 |
|
|
|
33 |
def toggle(choice):
|
34 |
if choice == "mic":
|
35 |
return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
|
|
|
48 |
|
49 |
btn.click(change_voice, inputs=[input_audio, voice_sample, language], outputs=output_audio)
|
50 |
|
51 |
+
demo.launch()
|