Spaces:
Running
Running
# Import the necessary libraries | |
import gradio as gr | |
import openai | |
import base64 | |
import io | |
import requests | |
# Function to encode the image to base64 | |
def encode_image_to_base64(image): | |
buffered = io.BytesIO() | |
image.save(buffered, format="JPEG") | |
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
return img_str | |
# Function to send the image to the OpenAI API and get a response | |
def ask_openai_with_image(api_key, instruction, json_prompt, low_quality_mode, image): | |
# Set the OpenAI API key | |
openai.api_key = api_key | |
# Encode the uploaded image to base64 | |
base64_image = encode_image_to_base64(image) | |
instruction = instruction.strip() | |
if json_prompt.strip() != "": | |
instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}" | |
# Create the payload with the base64 encoded image | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": instruction, | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}", | |
"detail": "low" if low_quality_mode else "high", | |
}, | |
}, | |
], | |
} | |
], | |
"max_tokens": 4095, | |
} | |
# Send the request to the OpenAI API | |
response = requests.post( | |
"https://api.openai.com/v1/chat/completions", | |
headers={"Authorization": f"Bearer {openai.api_key}"}, | |
json=payload, | |
) | |
# Check if the request was successful | |
if response.status_code == 200: | |
response_json = response.json() | |
print("Response JSON:", response_json) # Print the raw response JSON | |
try: | |
# Attempt to extract the content text | |
return response_json["choices"][0]["message"]["content"] | |
except Exception as e: | |
# If there is an error in the JSON structure, print it | |
print("Error in JSON structure:", e) | |
print("Full JSON response:", response_json) | |
return "Error processing the image response." | |
else: | |
# If an error occurred, return the error message | |
return f"Error: {response.text}" | |
json_schema = gr.Textbox( | |
label="JSON Attributes", | |
info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.", | |
lines=3, | |
placeholder="""Example: | |
- name: Name of the object | |
- color: Color of the object | |
""", | |
) | |
instructions = gr.Textbox( | |
label="Instructions", | |
info="Instructions for the vision model to follow. Leave blank to use default.", | |
lines=2, | |
placeholder="""Default: | |
I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""", | |
) | |
low_quality_mode = gr.Checkbox( | |
label="Low Quality Mode", | |
info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.", | |
) | |
# Create a Gradio interface | |
vision_playground = gr.Interface( | |
fn=ask_openai_with_image, | |
inputs=[ | |
gr.Textbox(label="API Key"), | |
instructions, | |
json_schema, | |
low_quality_mode, | |
gr.Image(type="pil", label="Image"), | |
], | |
outputs=[gr.Markdown()], | |
title="GPT-4-Vision Playground", | |
description="Upload an image and get a description from GPT-4 with Vision.", | |
) | |
# Launch the app | |
vision_playground.launch() | |