Spaces:

bstraehle
/

multimodal-ai

Running

App Files Files Community

multimodal-ai / app.py

bstraehle

Update app.py

87f0bbf verified 5 months ago

raw

history blame

2.21 kB

	import gradio as gr
	import base64, os, threading

	from openai import OpenAI

	lock = threading.Lock()

	config = {
	"max_tokens": 1000,
	"model": "gpt-4o",
	"temperature": 0
	}

	def get_img_b64(img_path):
	with open(img_path, "rb") as img_file:
	return base64.b64encode(img_file.read()).decode("utf-8")

	def invoke(openai_api_key, prompt, image):
	if not openai_api_key:
	raise gr.Error("OpenAI API Key is required.")
	if not prompt:
	raise gr.Error("Prompt is required.")
	if not image:
	raise gr.Error("Image is required.")

	with lock:
	os.environ["OPENAI_API_KEY"] = openai_api_key

	content = ""

	try:
	client = OpenAI()

	img_b64 = get_img_b64(image)

	completion = client.chat.completions.create(
	max_tokens = config["max_tokens"],
	messages = [{"role": "user",
	"content": [{"type": "text",
	"text": prompt},
	{"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{img_b64}"}}]}],
	model = config["model"],
	temperature = config["temperature"]
	)

	content = completion.choices[0].message.content
	except Exception as e:
	err_msg = e
	raise gr.Error(e)
	finally:
	del os.environ["OPENAI_API_KEY"]

	return content

	gr.close_all()

	demo = gr.Interface(
	fn = invoke,
	inputs = [gr.Textbox(label = "OpenAI API Key", type = "password", lines = 1),
	gr.Textbox(label = "Prompt", lines = 1, value = "Describe the diagram"),
	gr.Image(label = "Image", type = "filepath", sources = ["upload"],
	value = "https://raw.githubusercontent.com/bstraehle/ai-ml-dl/main/hugging-face/multimodal-ai/architecture.png")],
	outputs = [gr.Markdown(label = "Completion", value = os.environ["OUTPUT"])],
	title = "Multimodal Reasoning Application",
	description = os.environ["DESCRIPTION"]
	)

	demo.launch()