hey-gemma / app.py
Gabriel C
Create app.py
1324088 verified
raw
history blame
2.45 kB
import os
import time
import gradio as gr
import numpy as np
import soundfile as sf
from groq import Groq
from openai import OpenAI
groq_client = Groq(api_key=os.getenv('GROQ_API_KEY'))
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
def transcribe(audio_path):
"""
Transcribe the audio segment using Whisper.
"""
with open(audio_path, 'rb') as audio_file:
transcription = openai_client.audio.transcriptions.create(
file=audio_file,
language="en",
model="whisper-1"
)
return transcription.text
def autocomplete(text):
"""
Autocomplete the text using Gemma.
"""
if text != "":
response = groq_client.chat.completions.create(
model='gemma-7b-it',
messages=[{"role": "system", "content": "You are a friendly assistant."},
{"role": "user", "content": text}]
)
return response.choices[0].message.content
def process_audio(input_audio):
"""
Process the audio input by transcribing and completing the sentences.
"""
# Now you can use the audio_file_path with soundfile.read()
audio_data, sample_rate = sf.read(input_audio)
# Ensure mono audio
if len(audio_data.shape) > 1:
audio_data = np.mean(audio_data, axis=1)
transcription_list = []
for start in range(0, len(audio_data), sample_rate):
end = start + sample_rate
segment = audio_data[start:end]
# Temporarily saving each segment to a file (Whisper requires a file input)
segment_filename = f"/tmp/audio_segment_{start}.wav"
sf.write(segment_filename, segment, sample_rate)
# Transcribe the audio segment
transcription = transcribe(segment_filename)
transcription_list.append(transcription)
# # Send the transcription for completion
completion_result = autocomplete(transcription)
text = f"Qn: {transcription} \n \n Ans: {completion_result}"
return text
# Define the Gradio interface
interface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(sources="microphone", streaming=True, type="filepath"),
outputs=gr.Markdown(),
title="Dear Gemma",
description="Talk to the AI assistant. It completes your sentences in real time.",
live=True,
allow_flagging="never"
)
if __name__ == "__main__":
interface.launch()