Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import numpy as np | |
import whisper | |
from openai import OpenAI | |
from scipy.io.wavfile import write | |
client = OpenAI() | |
def process_transaction_details(transcribed_text): | |
''' | |
Extract the transaction details from the given transcribed text and return them as a JSON | |
Input: | |
transcribed_text (str): The transcribed text to process | |
Output: | |
dict: A JSON object containing the transaction details | |
''' | |
prompt = f"Extract the transaction details from the following sentence and categorize the transaction based on the description. Format the response as JSON with fields for 'amount', 'description', and 'category'. Sentence: '{transcribed_text}'." | |
try: | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo-0125", | |
response_format={ "type": "json_object" }, | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant designed to output JSON."}, | |
{"role": "user", "content": prompt} | |
] | |
) | |
# print(response.choices[0].message.content) | |
return response.choices[0].message.content | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
return {} | |
def transcribe(audio): | |
if audio is None: | |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") | |
sr, y = audio | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
model = whisper.load_model("base") # or "small", "medium", "large", depending on your requirement | |
temp_filename = "temp_audio.wav" | |
write(temp_filename, sr, (y * 32767).astype(np.int16)) | |
result = model.transcribe(temp_filename) | |
return process_transaction_details(result['text']) | |
demo = gr.Interface( | |
transcribe, | |
gr.Audio(sources=["microphone"],max_length=10), | |
"json", | |
) | |
if __name__ == "__main__": | |
demo.launch() |