Spaces:
Running
Running
| # Import the necessary libraries | |
| import gradio as gr | |
| import openai | |
| import base64 | |
| import io | |
| import requests | |
| # Function to encode the image to base64 | |
| def encode_image_to_base64(image): | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="JPEG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| return img_str | |
| # Function to send the image to the OpenAI API and get a response | |
| def ask_openai_with_image(api_key, instruction, json_prompt, low_quality_mode, image): | |
| # Set the OpenAI API key | |
| openai.api_key = api_key | |
| # Encode the uploaded image to base64 | |
| base64_image = encode_image_to_base64(image) | |
| instruction = instruction.strip() | |
| if json_prompt.strip() != "": | |
| instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}" | |
| # Create the payload with the base64 encoded image | |
| payload = { | |
| "model": "gpt-4-vision-preview", | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| { | |
| "type": "text", | |
| "text": instruction, | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}", | |
| "detail": "low" if low_quality_mode else "high", | |
| }, | |
| }, | |
| ], | |
| } | |
| ], | |
| "max_tokens": 4095, | |
| } | |
| # Send the request to the OpenAI API | |
| response = requests.post( | |
| "https://api.openai.com/v1/chat/completions", | |
| headers={"Authorization": f"Bearer {openai.api_key}"}, | |
| json=payload, | |
| ) | |
| # Check if the request was successful | |
| if response.status_code == 200: | |
| response_json = response.json() | |
| print("Response JSON:", response_json) # Print the raw response JSON | |
| try: | |
| # Attempt to extract the content text | |
| return response_json["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| # If there is an error in the JSON structure, print it | |
| print("Error in JSON structure:", e) | |
| print("Full JSON response:", response_json) | |
| return "Error processing the image response." | |
| else: | |
| # If an error occurred, return the error message | |
| return f"Error: {response.text}" | |
| json_schema = gr.Textbox( | |
| label="JSON Attributes", | |
| info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.", | |
| lines=3, | |
| placeholder="""Example: | |
| - name: Name of the object | |
| - color: Color of the object | |
| """, | |
| ) | |
| instructions = gr.Textbox( | |
| label="Instructions", | |
| info="Instructions for the vision model to follow. Leave blank to use default.", | |
| lines=2, | |
| placeholder="""Default: | |
| I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""", | |
| ) | |
| low_quality_mode = gr.Checkbox( | |
| label="Low Quality Mode", | |
| info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.", | |
| ) | |
| # Create a Gradio interface | |
| vision_playground = gr.Interface( | |
| fn=ask_openai_with_image, | |
| inputs=[ | |
| gr.Textbox(label="API Key"), | |
| instructions, | |
| json_schema, | |
| low_quality_mode, | |
| gr.Image(type="pil", label="Image"), | |
| ], | |
| outputs=[gr.Markdown()], | |
| title="GPT-4-Vision Playground", | |
| description="Upload an image and get a description from GPT-4 with Vision.", | |
| ) | |
| # Launch the app | |
| vision_playground.launch() | |