#app.py import gradio as gr import openai import os import RiverValleyData # Importing the RiverValleyData module import base64 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") openai.api_key = OPENAI_API_KEY def image_to_base64(img_path): with open(img_path, "rb") as img_file: return base64.b64encode(img_file.read()).decode('utf-8') img_base64 = image_to_base64("RiverValleySBC.JPG") img_html = f'SBC6' def predict(question_choice, audio): # Transcribe the audio using Whisper with open(audio, "rb") as audio_file: transcript = openai.Audio.transcribe("whisper-1", audio_file) message = transcript["text"] # This is the transcribed message from the audio input # Generate the system message based on the chosen question strategy, explanation = RiverValleyData.strategy_text["SEP"] # Reference to the picture description from RiverValleyData.py picture_description = RiverValleyData.description # Determine whether to include the picture description based on the question choice picture_description_inclusion = f""" For the first question, ensure your feedback refers to the picture description provided: {picture_description} """ if question_choice == RiverValleyData.questions[0] else "" # Construct the conversation with the system and user's message conversation = [ { "role": "system", "content": f""" You are an expert English Language Teacher in a Singapore Primary school, directly guiding a Primary 6 student in Singapore. The student is answering the question: '{question_choice}'. {picture_description_inclusion} Point out areas they did well and where they can improve, following the {strategy}. Encourage the use of sophisticated vocabulary and expressions. For the second and third questions, the picture is not relevant, so the student should not refer to it in their response. {explanation} The feedback should be in second person, addressing the student directly. """ }, {"role": "user", "content": message} ] response = openai.ChatCompletion.create( model='gpt-3.5-turbo', messages=conversation, temperature=0.6, max_tokens=500, # Limiting the response to 500 tokens stream=True ) partial_message = "" for chunk in response: if len(chunk['choices'][0]['delta']) != 0: partial_message = partial_message + chunk['choices'][0]['delta']['content'] yield partial_message # Gradio Interface iface = gr.Interface( fn=predict, inputs=[ gr.Radio(RiverValleyData.questions, label="Choose a question", default=RiverValleyData.questions[0]), # Dropdown for question choice gr.inputs.Audio(source="microphone", type="filepath") # Audio input ], outputs=gr.inputs.Textbox(), # Using inputs.Textbox as an output to make it editable description=img_html, css="custom.css" # Link to the custom CSS file ) iface.queue(max_size=99, concurrency_count=40).launch(debug=True)