pratham0011 commited on
Commit
649a0ea
·
verified ·
1 Parent(s): 3873892

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -8,7 +8,9 @@ from llama_index.llms.text_generation_inference import TextGenerationInference
8
  import whisper
9
  import gradio as gr
10
  from gtts import gTTS
11
-
 
 
12
  model = whisper.load_model("base")
13
  HF_API_TOKEN = os.getenv("HF_TOKEN")
14
 
@@ -26,11 +28,27 @@ def translate_audio(audio):
26
  result = whisper.decode(model, mel, options)
27
  return result.text
28
 
29
- def audio_response(t):
30
- tts = gTTS(text=t, lang='en', slow=False)
31
- tts.save("output.mp3")
32
- mp3_file_path = "output.mp3"
33
- return mp3_file_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def messages_to_prompt(messages):
36
  # Default system message for a chatbot
 
8
  import whisper
9
  import gradio as gr
10
  from gtts import gTTS
11
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
12
+ import soundfile as sf
13
+ from datasets import load_dataset
14
  model = whisper.load_model("base")
15
  HF_API_TOKEN = os.getenv("HF_TOKEN")
16
 
 
28
  result = whisper.decode(model, mel, options)
29
  return result.text
30
 
31
+ def audio_response(text, output_path="speech.wav"):
32
+ # Load the processor, model, and vocoder
33
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
34
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
35
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
36
+
37
+ # Process the input text
38
+ inputs = processor(text=text, return_tensors="pt")
39
+
40
+ # Load xvector containing speaker's voice characteristics
41
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
42
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
43
+
44
+ # Generate speech
45
+ with torch.no_grad():
46
+ speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
47
+
48
+ # Save the audio to a file
49
+ sf.write(output_path, speech.numpy(), samplerate=16000) # Ensure the sample rate matches your needs
50
+
51
+ return output_path
52
 
53
  def messages_to_prompt(messages):
54
  # Default system message for a chatbot