Spaces:

DataPrism
/

GPT-4-Vision-Chat

Running

App Files Files Community

GPT-4-Vision-Chat / app.py

GianJSX

Update app.py

d1f1735 7 months ago

raw history blame

No virus

5.59 kB

	import chainlit as cl
	from openai import OpenAI
	from langsmith.run_helpers import traceable
	from langsmith_config import setup_langsmith_config
	import base64
	import os

	if os.getenv("OPENAI_API_KEY") is not None:
	os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
	model = "gpt-4-1106-preview"
	model_vision = "gpt-4-vision-preview"
	setup_langsmith_config()

	def process_images(msg: cl.Message):
	# Processing images exclusively
	images = [file for file in msg.elements if "image" in file.mime]

	# Accessing the bytes of a specific image
	image_bytes = images[0].content # take the first image just for demo purposes

	# check the size of the image, max 1mb
	if len(image_bytes) > 1000000:
	return "too_large"

	# we need base64 encoded image
	image_base64 = base64.b64encode(image_bytes).decode('utf-8')
	return image_base64

	async def process_stream(stream, msg: cl.Message):
	for part in stream:
	if token := part.choices[0].delta.content or "":
	await msg.stream_token(token)

	def handle_vision_call(msg, image_history):
	image_base64 = None
	image_base64 = process_images(msg)
	if image_base64 == "too_large":
	return "too_large"

	if image_base64:
	# add the image to the image history
	image_history.append(
	{
	"role": "user",
	"content": [
	{"type": "text", "text": msg.content},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{image_base64}",
	"detail": "low"
	}
	},
	],
	}
	)
	stream = gpt_vision_call(image_history)

	return stream

	@traceable(run_type="llm", name="gpt 3 turbo call")
	async def gpt_call(message_history: list = []):
	client = OpenAI()

	stream = client.chat.completions.create(
	model=model,
	messages=message_history,
	max_tokens=4096,
	stream=True,
	)
	return stream

	@traceable(run_type="llm", name="gpt 4 turbo vision call")
	def gpt_vision_call(image_history: list = []):
	client = OpenAI()

	stream = client.chat.completions.create(
	model=model_vision,
	messages=image_history,
	max_tokens=350,
	stream=True,
	)

	return stream

	@traceable(run_type="llm", name="api-key set")
	async def wait_for_key():
	res = await cl.AskUserMessage(content="Send an openai api-key to start", timeout=600).send()
	if res:
	await cl.Message(content="setting up...", indent=1).send()
	os.environ["OPENAI_API_KEY"] = res["content"]
	# check if the key is valid
	client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])
	try:
	stream = client.chat.completions.create(
	model=model,
	messages=[{"role": "system", "content": "test"}],
	max_tokens=1,
	stream=True,
	)
	if stream:
	await cl.Message(content="api-key setted, you can start chatting!", indent=1).send()
	except Exception as e:
	await cl.Message(content=f"{e}", indent=1).send()
	return await wait_for_key()
	return await cl.Message(content="api-key setted, you can start chatting!").send()
	else:
	return await wait_for_key()

	@cl.on_chat_start
	async def start_chat():
	cl.user_session.set(
	"message_history",
	[{"role": "system", "content": "You are a helpful assistant. You are made by GPT-3.5-turbo-1106, the latest version developed by Openai. You do not have the ability to receive images, but if the user uploads an image with the message, GPT-4-vision-preview will be used. So if a user asks you if you have the ability to analyze images, you can tell them that. And tell him that at the bottom left (above the text input) he has a button to upload images, or he can drag it to the chat, or he can just copy paste the input"}],
	)
	cl.user_session.set("image_history", [{"role": "system", "content": "You are a helpful assistant. You are developed with GPT-4-vision-preview, if the user uploads an image, you have the ability to understand it. For normal messages GPT-3.5-turbo-1106 will be used, and for images you will use it. If the user asks about your capabilities you can tell them that."}])
	if os.getenv("OPENAI_API_KEY") is None:
	await wait_for_key()


	@cl.on_message
	@traceable(run_type="chain", name="message")
	async def on_message(msg: cl.Message):
	message_history = cl.user_session.get("message_history")
	image_history = cl.user_session.get("image_history")

	stream_msg = cl.Message(content="")
	stream = None

	if msg.elements:
	stream = handle_vision_call(msg, image_history)
	if stream == "too_large":
	return await cl.Message(content="Image too large, max 1mb").send()


	else:
	# add the message in both to keep the coherence between the two histories
	message_history.append({"role": "user", "content": msg.content})
	image_history.append({"role": "user", "content": msg.content})

	stream = await gpt_call(message_history)

	if stream:
	await process_stream(stream, msg=stream_msg)
	message_history.append({"role": "assistant", "content": stream_msg.content})
	image_history.append({"role": "assistant", "content": stream_msg.content})

	return stream_msg.content