import gradio as gr import time import openai import json import os openai.api_key = os.environ.get('OPENAI_KEY') def classify_audio(audio): # Transcribe the audio to text audio_transcript = asr_pipeline(audio)["text"] audio_transcript = audio_transcript.lower() messages = [ {"role": "system", "content": "Is this chat a scam, spam or is safe? Only answer in JSON format with 'classification': '' as string and 'reasons': '' as the most plausible reasons why. The reason should be explaning to the potential victim why the conversation is probably a scam"}, {"role": "user", "content": audio_transcript}, ] # Call the OpenAI API to generate a response response = openai.ChatCompletion.create( model="gpt-4", # Replace with the actual GPT-4 model ID messages=messages ) # Extract the generated text text = response.choices[0].message['content'] text = json.loads(text) # Get the decision and reasons from the JSON dictionary decision = text["classification"] reasons = text["reasons"] # Return the transcription and the prediction as a dictionary return audio_transcript, decision, reasons gr.Interface( fn=classify_audio, inputs=gr.inputs.Audio(source="upload", type="filepath"), outputs=[ gr.outputs.Textbox(label="Transcription"), gr.outputs.Textbox(label="Classification"), gr.outputs.Textbox(label="Reason"), ], live=True ).launch(share=True, debug=True)