webcamGPT

Running

App Files Files Community

webcamGPT / app.py

Nymbo

Update app.py

b45fd05 10 months ago

raw

history blame contribute delete

No virus

3.8 kB

	import base64
	import os
	import uuid
	import time
	import cv2
	import gradio as gr
	import numpy as np
	import requests

	MARKDOWN = """
	# ♠ Alyxsissy GPT-Vision Testing 👁‍🗨

	Skeleton API structure for GPT-4-Vision. Implimentation into SissyGPT WiP.
	"""
	AVATARS = (
	"https://raw.githubusercontent.com/Nymbo/Web-Hosted-Images/main/Alyxsissy%20Favicon.png?token=GHSAT0AAAAAACJF7K6DI4PXYMGQU3TPEZ2GZL2LFXA",
	"https://media.roboflow.com/spaces/openai-white-logomark.png"
	)
	IMAGE_CACHE_DIRECTORY = "data"
	API_URL = "https://api.openai.com/v1/chat/completions"


	def preprocess_image(image: np.ndarray) -> np.ndarray:
	image = np.fliplr(image)
	return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


	def encode_image_to_base64(image: np.ndarray) -> str:
	success, buffer = cv2.imencode('.jpg', image)
	if not success:
	raise ValueError("Could not encode image to JPEG format.")

	encoded_image = base64.b64encode(buffer).decode('utf-8')
	return encoded_image


	def compose_payload(image: np.ndarray, prompt: str) -> dict:
	base64_image = encode_image_to_base64(image)
	return {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}


	def compose_headers(api_key: str) -> dict:
	return {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}


	def prompt_image(api_key: str, image: np.ndarray, prompt: str) -> str:
	headers = compose_headers(api_key=api_key)
	payload = compose_payload(image=image, prompt=prompt)
	response = requests.post(url=API_URL, headers=headers, json=payload).json()

	if 'error' in response:
	raise ValueError(response['error']['message'])
	return response['choices'][0]['message']['content']


	def cache_image(image: np.ndarray) -> str:
	image_filename = f"{uuid.uuid4()}.jpeg"
	os.makedirs(IMAGE_CACHE_DIRECTORY, exist_ok=True)
	image_path = os.path.join(IMAGE_CACHE_DIRECTORY, image_filename)
	cv2.imwrite(image_path, image)
	return image_path


	def respond(api_key: str, image: np.ndarray, prompt: str, chat_history):
	if not api_key:
	raise ValueError(
	"API_KEY is not set. "
	"Please follow the instructions in the README to set it up.")

	image = preprocess_image(image=image)
	cached_image_path = cache_image(image)
	response = prompt_image(api_key=api_key, image=image, prompt=prompt)
	chat_history.append(((cached_image_path,), None))
	chat_history.append((prompt, response))
	return "", chat_history


	with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="pink")) as demo:
	gr.Markdown(MARKDOWN)
	with gr.Row():
	webcam = gr.Webcam(label="Webcam", source="webcam", streaming=True)
	with gr.Column():
	api_key_textbox = gr.Textbox(
	label="OpenAI API KEY", type="password")
	chatbot = gr.Chatbot(
	height=500, bubble_full_width=False, avatar_images=AVATARS)
	message_textbox = gr.Textbox()
	clear_button = gr.ClearButton(message_textbox, chatbot, variant="primary")

	message_textbox.submit(
	fn=respond,
	inputs=[api_key_textbox, webcam, message_textbox, chatbot],
	outputs=[message_textbox, chatbot]
	)

	demo.launch(debug=False, show_error=True)