Karthik64001 commited on
Commit
460593c
1 Parent(s): e0640e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -1,16 +1,16 @@
1
  import gradio as gr
2
- from transformers import VitsModel, AutoTokenizer
3
  import torch
4
  import scipy.io.wavfile
5
  from pydub import AudioSegment
6
 
7
- # Initialize the TTS model
8
  model_name = "facebook/mms-tts-tam"
9
- tts_model = VitsModel.from_pretrained(model_name)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
- def audio_to_waveform(audio):
13
- audio = AudioSegment.from_file(audio)
14
  waveform = torch.FloatTensor(audio.get_array_of_samples()).view(1, -1)
15
  return waveform
16
 
@@ -20,17 +20,16 @@ def change_voice(input_audio, voice_sample, language):
20
  voice_waveform = audio_to_waveform(voice_sample)
21
 
22
  # Generate the new voice waveform
23
- text = tokenizer.decode(tts_model.generate(input_waveform))
24
  inputs = tokenizer(text, return_tensors="pt")
25
  with torch.no_grad():
26
- output = tts_model(**inputs).waveform
27
 
28
  # Save to output file
29
  output_path = "output.wav"
30
- scipy.io.wavfile.write(output_path, rate=tts_model.config.sampling_rate, data=output.numpy())
31
  return output_path
32
 
33
- # Gradio interface
34
  def toggle(choice):
35
  if choice == "mic":
36
  return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
@@ -49,4 +48,4 @@ with gr.Blocks() as demo:
49
 
50
  btn.click(change_voice, inputs=[input_audio, voice_sample, language], outputs=output_audio)
51
 
52
- demo.launch(enable_queue=True)
 
1
  import gradio as gr
2
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
3
  import torch
4
  import scipy.io.wavfile
5
  from pydub import AudioSegment
6
 
7
+ # Load the pre-trained model and tokenizer
8
  model_name = "facebook/mms-tts-tam"
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
+ def audio_to_waveform(audio_file):
13
+ audio = AudioSegment.from_file(audio_file)
14
  waveform = torch.FloatTensor(audio.get_array_of_samples()).view(1, -1)
15
  return waveform
16
 
 
20
  voice_waveform = audio_to_waveform(voice_sample)
21
 
22
  # Generate the new voice waveform
23
+ text = tokenizer.decode(model.generate(input_waveform))
24
  inputs = tokenizer(text, return_tensors="pt")
25
  with torch.no_grad():
26
+ output = model(**inputs).waveform
27
 
28
  # Save to output file
29
  output_path = "output.wav"
30
+ scipy.io.wavfile.write(output_path, rate=model.config.sampling_rate, data=output.numpy())
31
  return output_path
32
 
 
33
  def toggle(choice):
34
  if choice == "mic":
35
  return gr.update(visible=True, value=None), gr.update(visible=False, value=None)
 
48
 
49
  btn.click(change_voice, inputs=[input_audio, voice_sample, language], outputs=output_audio)
50
 
51
+ demo.launch()