import gradio as gr import base64, os, threading from openai import OpenAI lock = threading.Lock() config = { "max_tokens": 1000, "model": "gpt-4o", "temperature": 0 } def get_img_b64(img_path): with open(img_path, "rb") as img_file: return base64.b64encode(img_file.read()).decode("utf-8") def invoke(openai_api_key, prompt, image): if not openai_api_key: raise gr.Error("OpenAI API Key is required.") if not prompt: raise gr.Error("Prompt is required.") if not image: raise gr.Error("Image is required.") with lock: os.environ["OPENAI_API_KEY"] = openai_api_key content = "" try: client = OpenAI() img_b64 = get_img_b64(image) completion = client.chat.completions.create( max_tokens = config["max_tokens"], messages = [{"role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}]}], model = config["model"], temperature = config["temperature"] ) content = completion.choices[0].message.content except Exception as e: err_msg = e raise gr.Error(e) finally: del os.environ["OPENAI_API_KEY"] return content gr.close_all() demo = gr.Interface( fn = invoke, inputs = [gr.Textbox(label = "OpenAI API Key", type = "password", lines = 1), gr.Textbox(label = "Prompt", lines = 1, value = "Describe the diagram"), gr.Image(label = "Image", type = "filepath", sources = ["upload"], value = "https://raw.githubusercontent.com/bstraehle/ai-ml-dl/main/hugging-face/multimodal-ai/architecture.png")], outputs = [gr.Markdown(label = "Completion", value = os.environ["OUTPUT"])], title = "Multimodal Reasoning Application", description = os.environ["DESCRIPTION"] ) demo.launch()