markllego ychen commited on
Commit
6d09e4d
1 Parent(s): 71ff575

IMPORTANT: Ask the user to provide UI & other improvements (#1)

Browse files

- Add custom options (7d0ff4471d4cb45c4514a47ac84250a9515dbf02)


Co-authored-by: ychen <ychen@users.noreply.huggingface.co>

Files changed (2) hide show
  1. .gitignore +2 -0
  2. app.py +59 -24
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ venv/
2
+ flagged/
app.py CHANGED
@@ -2,29 +2,31 @@
2
  import gradio as gr
3
  import openai
4
  import base64
5
- from PIL import Image
6
  import io
7
  import requests
8
- import os
9
 
10
- # Consider using environment variables or a configuration file for API keys.
11
- # WARNING: Do not hardcode API keys in your code, especially if sharing or using version control.
12
- openai.api_key = os.getenv('OPENAI_API_KEY')
13
- if openai.api_key is None:
14
- raise ValueError("Please set the OPENAI_API_KEY environment variable.")
15
 
16
  # Function to encode the image to base64
17
  def encode_image_to_base64(image):
18
  buffered = io.BytesIO()
19
  image.save(buffered, format="JPEG")
20
- img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
21
  return img_str
22
 
 
23
  # Function to send the image to the OpenAI API and get a response
24
- def ask_openai_with_image(image):
 
 
 
25
  # Encode the uploaded image to base64
26
  base64_image = encode_image_to_base64(image)
27
-
 
 
 
 
 
28
  # Create the payload with the base64 encoded image
29
  payload = {
30
  "model": "gpt-4-vision-preview",
@@ -34,25 +36,28 @@ def ask_openai_with_image(image):
34
  "content": [
35
  {
36
  "type": "text",
37
- "text": "I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide."
38
  },
39
  {
40
  "type": "image_url",
41
- "image_url": f"data:image/jpeg;base64,{base64_image}"
42
- }
43
- ]
 
 
 
44
  }
45
  ],
46
- "max_tokens": 4095
47
  }
48
-
49
  # Send the request to the OpenAI API
50
  response = requests.post(
51
  "https://api.openai.com/v1/chat/completions",
52
  headers={"Authorization": f"Bearer {openai.api_key}"},
53
- json=payload
54
  )
55
-
56
  # Check if the request was successful
57
  if response.status_code == 200:
58
  response_json = response.json()
@@ -69,14 +74,44 @@ def ask_openai_with_image(image):
69
  # If an error occurred, return the error message
70
  return f"Error: {response.text}"
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # Create a Gradio interface
73
- iface = gr.Interface(
74
  fn=ask_openai_with_image,
75
- inputs=gr.Image(type="pil"),
76
- outputs="text",
77
- title="GPT-4 with Vision",
78
- description="Upload an image and get a description from GPT-4 with Vision."
 
 
 
 
 
 
79
  )
80
 
81
  # Launch the app
82
- iface.launch()
 
2
  import gradio as gr
3
  import openai
4
  import base64
 
5
  import io
6
  import requests
 
7
 
 
 
 
 
 
8
 
9
  # Function to encode the image to base64
10
  def encode_image_to_base64(image):
11
  buffered = io.BytesIO()
12
  image.save(buffered, format="JPEG")
13
+ img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
14
  return img_str
15
 
16
+
17
  # Function to send the image to the OpenAI API and get a response
18
+ def ask_openai_with_image(api_key, instruction, json_prompt, low_quality_mode, image):
19
+ # Set the OpenAI API key
20
+ openai.api_key = api_key
21
+
22
  # Encode the uploaded image to base64
23
  base64_image = encode_image_to_base64(image)
24
+
25
+ instruction = instruction.strip()
26
+
27
+ if json_prompt.strip() != "":
28
+ instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}"
29
+
30
  # Create the payload with the base64 encoded image
31
  payload = {
32
  "model": "gpt-4-vision-preview",
 
36
  "content": [
37
  {
38
  "type": "text",
39
+ "text": instruction,
40
  },
41
  {
42
  "type": "image_url",
43
+ "image_url": {
44
+ "url": f"data:image/jpeg;base64,{base64_image}",
45
+ "detail": "low" if low_quality_mode else "high",
46
+ },
47
+ },
48
+ ],
49
  }
50
  ],
51
+ "max_tokens": 4095,
52
  }
53
+
54
  # Send the request to the OpenAI API
55
  response = requests.post(
56
  "https://api.openai.com/v1/chat/completions",
57
  headers={"Authorization": f"Bearer {openai.api_key}"},
58
+ json=payload,
59
  )
60
+
61
  # Check if the request was successful
62
  if response.status_code == 200:
63
  response_json = response.json()
 
74
  # If an error occurred, return the error message
75
  return f"Error: {response.text}"
76
 
77
+
78
+ json_schema = gr.Textbox(
79
+ label="JSON Attributes",
80
+ info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.",
81
+ lines=3,
82
+ placeholder="""Example:
83
+ - name: Name of the object
84
+ - color: Color of the object
85
+ """,
86
+ )
87
+
88
+ instructions = gr.Textbox(
89
+ label="Instructions",
90
+ info="Instructions for the vision model to follow. Leave blank to use default.",
91
+ lines=2,
92
+ placeholder="""Default:
93
+ I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""",
94
+ )
95
+
96
+ low_quality_mode = gr.Checkbox(
97
+ label="Low Quality Mode",
98
+ info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.",
99
+ )
100
+
101
  # Create a Gradio interface
102
+ vision_playground = gr.Interface(
103
  fn=ask_openai_with_image,
104
+ inputs=[
105
+ gr.Textbox(label="API Key"),
106
+ instructions,
107
+ json_schema,
108
+ low_quality_mode,
109
+ gr.Image(type="pil", label="Image"),
110
+ ],
111
+ outputs=[gr.Markdown()],
112
+ title="GPT-4-Vision Playground",
113
+ description="Upload an image and get a description from GPT-4 with Vision.",
114
  )
115
 
116
  # Launch the app
117
+ vision_playground.launch()