File size: 1,255 Bytes
fb39d60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os
import gradio as gr
from groq import Groq
import whisper
from gtts import gTTS
import tempfile

# Set up Groq API key
os.environ['GROQ_API_KEY'] = 'GROQ_API-KEY'
groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY'))

# Load Whisper model
whisper_model = whisper.load_model("base")

def process_audio(audio_file):
    # Transcribe audio using Whisper
    result = whisper_model.transcribe(audio_file)
    user_text = result['text']

    # Generate response using Llama 8b model with Groq API
    chat_completion = groq_client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": user_text,
            }
        ],
        model="llama3-8b-8192",
    )
    response_text = chat_completion.choices[0].message.content

    # Convert response text to speech using gTTS
    tts = gTTS(text=response_text, lang='en')
    audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
    tts.save(audio_file.name)

    return response_text, audio_file.name

# Create Gradio interface
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
    live=True
)

iface.launch()