Spaces:

bstraehle
/

multimodal-ai

Running

File size: 2,207 Bytes

77af022
687b573
ab63eed
 
77af022
687b573
 
77af022
890ee36
90632d5
846dd3c
77af022
 
dc8d358
 
 
b246237
1ab8b43
87f0bbf
646b8ee
87f0bbf
646b8ee
87f0bbf
dc8d358
646b8ee
687b573
 
 
 
 
 
 
aa0f0bf
687b573
ab63eed
687b573
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646b8ee
687b573
77af022
91b409e
 
0c6235f
 
 
90632d5
a7e2e35
19569d3
e37329d
0c6235f
a7e2e35
 
f52fc76
8c1dd68

import gradio as gr
import base64, os, threading

from openai import OpenAI

lock = threading.Lock()

config = {
    "max_tokens": 1000,
    "model": "gpt-4o",
    "temperature": 0
}

def get_img_b64(img_path):
    with open(img_path, "rb") as img_file:
        return base64.b64encode(img_file.read()).decode("utf-8")

def invoke(openai_api_key, prompt, image):
    if not openai_api_key:
        raise gr.Error("OpenAI API Key is required.")
    if not prompt:
        raise gr.Error("Prompt is required.")
    if not image:
        raise gr.Error("Image is required.")

    with lock:
        os.environ["OPENAI_API_KEY"] = openai_api_key
        
        content = ""
        
        try:
            client = OpenAI()
    
            img_b64 = get_img_b64(image)
    
            completion = client.chat.completions.create(
                max_tokens = config["max_tokens"],
                messages = [{"role": "user",
                             "content": [{"type": "text", 
                                          "text": prompt},
                                         {"type": "image_url",
                                          "image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}]}],
                model = config["model"],
                temperature = config["temperature"]
            )
        
            content = completion.choices[0].message.content
        except Exception as e:
            err_msg = e
            raise gr.Error(e)
        finally:
            del os.environ["OPENAI_API_KEY"]        
    
        return content

gr.close_all()

demo = gr.Interface(
    fn = invoke, 
    inputs = [gr.Textbox(label = "OpenAI API Key", type = "password", lines = 1),
              gr.Textbox(label = "Prompt", lines = 1, value = "Describe the diagram"),
              gr.Image(label = "Image", type = "filepath", sources = ["upload"], 
                       value = "https://raw.githubusercontent.com/bstraehle/ai-ml-dl/main/hugging-face/multimodal-ai/architecture.png")],
    outputs = [gr.Markdown(label = "Completion", value = os.environ["OUTPUT"])],
    title = "Multimodal Reasoning Application",
    description = os.environ["DESCRIPTION"]
)

demo.launch()