Spaces:
Runtime error
Runtime error
import os | |
import time | |
import gradio as gr | |
import numpy as np | |
import soundfile as sf | |
from groq import Groq | |
from openai import OpenAI | |
groq_client = Groq(api_key=os.getenv('GROQ_API_KEY')) | |
openai_client = OpenAI(api_key=os.getenv('OPENAI_API_KEY')) | |
def transcribe(audio_path): | |
""" | |
Transcribe the audio segment using Whisper. | |
""" | |
with open(audio_path, 'rb') as audio_file: | |
transcription = openai_client.audio.transcriptions.create( | |
file=audio_file, | |
language="en", | |
model="whisper-1" | |
) | |
return transcription.text | |
def autocomplete(text): | |
""" | |
Autocomplete the text using Gemma. | |
""" | |
if text != "": | |
response = groq_client.chat.completions.create( | |
model='gemma-7b-it', | |
messages=[{"role": "system", "content": "You are a friendly assistant."}, | |
{"role": "user", "content": text}] | |
) | |
return response.choices[0].message.content | |
def process_audio(input_audio): | |
""" | |
Process the audio input by transcribing and completing the sentences. | |
""" | |
# Now you can use the audio_file_path with soundfile.read() | |
audio_data, sample_rate = sf.read(input_audio) | |
# Ensure mono audio | |
if len(audio_data.shape) > 1: | |
audio_data = np.mean(audio_data, axis=1) | |
transcription_list = [] | |
for start in range(0, len(audio_data), sample_rate): | |
end = start + sample_rate | |
segment = audio_data[start:end] | |
# Temporarily saving each segment to a file (Whisper requires a file input) | |
segment_filename = f"/tmp/audio_segment_{start}.wav" | |
sf.write(segment_filename, segment, sample_rate) | |
# Transcribe the audio segment | |
transcription = transcribe(segment_filename) | |
transcription_list.append(transcription) | |
# # Send the transcription for completion | |
completion_result = autocomplete(transcription) | |
text = f"Qn: {transcription} \n \n Ans: {completion_result}" | |
return text | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(sources="microphone", streaming=True, type="filepath"), | |
outputs=gr.Markdown(), | |
title="Dear Gemma", | |
description="Talk to the AI assistant. It completes your sentences in real time.", | |
live=True, | |
allow_flagging="never" | |
) | |
if __name__ == "__main__": | |
interface.launch() |