boi-doingthings's picture
Update app.py
46b0abc verified
import os
import gradio as gr
import numpy as np
import whisper
from openai import OpenAI
from scipy.io.wavfile import write
client = OpenAI()
def process_transaction_details(transcribed_text):
'''
Extract the transaction details from the given transcribed text and return them as a JSON
Input:
transcribed_text (str): The transcribed text to process
Output:
dict: A JSON object containing the transaction details
'''
prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'."
try:
response = client.chat.completions.create(
model="gpt-3.5-turbo-0125",
response_format={ "type": "json_object" },
messages=[
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
{"role": "user", "content": prompt}
]
)
# print(response.choices[0].message.content)
return response.choices[0].message.content
except Exception as e:
print(f"An error occurred: {e}")
return {}
def transcribe(audio):
if audio is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement
temp_filename = "temp_audio.wav"
write(temp_filename, sr, (y * 32767).astype(np.int16))
result = model.transcribe(temp_filename)
return process_transaction_details(result['text'])
demo = gr.Interface(
transcribe,
gr.Audio(sources=["microphone"],max_length=10),
"json",
)
if __name__ == "__main__":
demo.launch()