File size: 1,592 Bytes
c412199
 
 
 
 
 
 
 
 
 
dde51c5
c412199
 
 
 
 
 
dde51c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c412199
 
 
 
 
 
 
 
 
dde51c5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
!pip install gradio groq openai-whisper gtts

import os
import gradio as gr
from groq import Groq
import whisper
from gtts import gTTS
import tempfile

# Set up Groq API key
os.environ['GROQ_API_KEY'] = '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'  # Replace with your valid key
groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY'))

# Load Whisper model
whisper_model = whisper.load_model("base")

def process_audio(audio_file):
    try:
        # Transcribe audio using Whisper
        result = whisper_model.transcribe(audio_file)
        user_text = result['text']

        # Generate response using Llama 8b model with Groq API
        chat_completion = groq_client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": user_text,
                }
            ],
            model="llama3-8b-8192",
        )
        response_text = chat_completion.choices[0].message.content

        # Convert response text to speech using gTTS
        tts = gTTS(text=response_text, lang='en')
        
        # Saving to a temporary file
        audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
        tts.save(audio_file.name)

        return response_text, audio_file.name

    except Exception as e:
        print("Error:", e)
        return str(e), None

# Create Gradio interface
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
    live=True
)

iface.launch()