Spaces:

pratham0011
/

AI-Voice-Assistance

Running

pratham0011 commited on Aug 20, 2024

Commit

649a0ea

verified ·

1 Parent(s): 3873892

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,7 +8,9 @@ from llama_index.llms.text_generation_inference import TextGenerationInference
 import whisper
 import gradio as gr
 from gtts import gTTS
 model = whisper.load_model("base")
 HF_API_TOKEN = os.getenv("HF_TOKEN")
@@ -26,11 +28,27 @@ def translate_audio(audio):
     result = whisper.decode(model, mel, options)
     return result.text
-def audio_response(t):
-    tts = gTTS(text=t, lang='en', slow=False)
-    tts.save("output.mp3")
-    mp3_file_path = "output.mp3"
-    return mp3_file_path
 def messages_to_prompt(messages):
     # Default system message for a chatbot

 import whisper
 import gradio as gr
 from gtts import gTTS
+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+import soundfile as sf
+from datasets import load_dataset
 model = whisper.load_model("base")
 HF_API_TOKEN = os.getenv("HF_TOKEN")
     result = whisper.decode(model, mel, options)
     return result.text
+def audio_response(text, output_path="speech.wav"):
+    # Load the processor, model, and vocoder
+    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+    # Process the input text
+    inputs = processor(text=text, return_tensors="pt")
+    # Load xvector containing speaker's voice characteristics
+    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+    # Generate speech
+    with torch.no_grad():
+        speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    # Save the audio to a file
+    sf.write(output_path, speech.numpy(), samplerate=16000)  # Ensure the sample rate matches your needs
+    return output_path
 def messages_to_prompt(messages):
     # Default system message for a chatbot