camanalo1 commited on
Commit
f23d60c
1 Parent(s): 1fe1102

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -12,8 +12,10 @@ transcriber = pipeline("automatic-speech-recognition", model="facebook/s2t-small
12
  generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
13
 
14
  # Initialize TTS tokenizer and model
15
- tokenizer = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
16
- model = VitsModel.from_pretrained("facebook/mms-tts-eng")
 
 
17
 
18
  def transcribe_and_generate_audio(audio):
19
  sr, y = audio
@@ -26,11 +28,11 @@ def transcribe_and_generate_audio(audio):
26
  # Generate text based on ASR output
27
  generated_text = generator(asr_output)[0]['generated_text']
28
 
29
- # Generate audio from text
30
- inputs = tokenizer(text=generated_text, return_tensors="pt")
31
  set_seed(555)
32
  with torch.no_grad():
33
- outputs = model(**inputs)
34
  waveform = outputs.waveform[0]
35
  waveform_path = "output.wav"
36
  sf.write(waveform_path, waveform.numpy(), 16000, format='wav')
 
12
  generator = pipeline("text-generation", model="microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True)
13
 
14
  # Initialize TTS tokenizer and model
15
+ tokenizer_tts = VitsTokenizer.from_pretrained("facebook/mms-tts-eng")
16
+ model_tts = VitsModel.from_pretrained("facebook/mms-tts-eng")
17
+
18
+ print("TTS Tokenizer:", tokenizer_tts) # Print the tokenizer for the TTS model
19
 
20
  def transcribe_and_generate_audio(audio):
21
  sr, y = audio
 
28
  # Generate text based on ASR output
29
  generated_text = generator(asr_output)[0]['generated_text']
30
 
31
+ # Generate audio from text using TTS model
32
+ inputs = tokenizer_tts(text=generated_text, return_tensors="pt")
33
  set_seed(555)
34
  with torch.no_grad():
35
+ outputs = model_tts(**inputs)
36
  waveform = outputs.waveform[0]
37
  waveform_path = "output.wav"
38
  sf.write(waveform_path, waveform.numpy(), 16000, format='wav')