|
import os |
|
import gradio as gr |
|
import whisper |
|
from groq import Groq |
|
from gtts import gTTS |
|
import tempfile |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
client = Groq( |
|
api_key = os.environ.get("GROQ_API_KEY"), |
|
) |
|
|
|
|
|
def transcribe_audio(audio): |
|
|
|
result = model.transcribe(audio) |
|
return result["text"] |
|
|
|
|
|
def generate_response(transcription): |
|
chat_completion = client.chat.completions.create( |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": transcription, |
|
} |
|
], |
|
model="llama3-8b-8192", |
|
) |
|
return chat_completion.choices[0].message.content |
|
|
|
|
|
def text_to_speech(response): |
|
tts = gTTS(response) |
|
temp_audio = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) |
|
tts.save(temp_audio.name) |
|
return temp_audio.name |
|
|
|
|
|
def chatbot(audio): |
|
transcription = transcribe_audio(audio) |
|
response = generate_response(transcription) |
|
audio_output = text_to_speech(response) |
|
return transcription, response, audio_output |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chatbot, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=[ |
|
gr.Textbox(label="Transcription"), |
|
gr.Textbox(label="Llama Response"), |
|
gr.Audio(label="Response Audio") |
|
], |
|
live=True |
|
) |
|
|
|
iface.launch() |
|
|