Nils Durner commited on
Commit
1cfc216
·
1 Parent(s): 0008662

add GPT-4V support

Browse files
Files changed (1) hide show
  1. app.py +69 -5
app.py CHANGED
@@ -1,17 +1,51 @@
1
  import gradio as gr
2
- import json
3
  import os
4
  import openai
5
 
6
  dump_controls = False
7
  log_to_console = False
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def add_text(history, text):
11
  history = history + [(text, None)]
12
  return history, gr.Textbox(value="", interactive=False)
13
 
14
-
15
  def add_file(history, file):
16
  with open(file.name, mode="rb") as f:
17
  content = f.read()
@@ -28,6 +62,15 @@ def add_file(history, file):
28
 
29
  return history
30
 
 
 
 
 
 
 
 
 
 
31
  def submit_text(txt_value):
32
  return add_text([chatbot, txt_value], [chatbot, txt_value])
33
 
@@ -61,16 +104,35 @@ def bot(message, history, oai_key, system_prompt, seed, temperature, max_tokens,
61
  if seed:
62
  seed_i = int(seed)
63
 
 
 
 
64
  history_openai_format = []
 
65
  if system_prompt:
66
  history_openai_format.append({"role": "system", "content": system_prompt})
67
  for human, assi in history:
68
  if human is not None:
69
- history_openai_format.append({"role": "user", "content": human})
 
 
 
 
 
 
 
70
  if assi is not None:
 
 
 
 
71
  history_openai_format.append({"role": "assistant", "content": assi})
 
72
  if message:
73
- history_openai_format.append({"role": "user", "content": message})
 
 
 
74
 
75
  if log_to_console:
76
  print(f"br_prompt: {str(history_openai_format)}")
@@ -101,7 +163,7 @@ with gr.Blocks() as demo:
101
  with gr.Accordion("Settings"):
102
  oai_key = gr.Textbox(label="OpenAI API Key", elem_id="oai_key")
103
  model = gr.Dropdown(label="Model", value="gpt-4-1106-preview", allow_custom_value=True, elem_id="model",
104
- choices=["gpt-4-1106-preview", "gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-1106"])
105
  system_prompt = gr.TextArea("You are a helpful AI.", label="System Prompt", lines=3, max_lines=250, elem_id="system_prompt")
106
  seed = gr.Textbox(label="Seed", elem_id="seed")
107
  temp = gr.Slider(0, 1, label="Temperature", elem_id="temp", value=1)
@@ -155,6 +217,7 @@ with gr.Blocks() as demo:
155
 
156
  with gr.Row():
157
  btn = gr.UploadButton("📁 Upload", size="sm")
 
158
  undo_btn = gr.Button("↩️ Undo")
159
  undo_btn.click(undo, inputs=[chatbot], outputs=[chatbot])
160
 
@@ -171,5 +234,6 @@ with gr.Blocks() as demo:
171
  )
172
  txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
173
  file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False, postprocess=False)
 
174
 
175
  demo.queue().launch()
 
1
  import gradio as gr
2
+ import base64
3
  import os
4
  import openai
5
 
6
  dump_controls = False
7
  log_to_console = False
8
 
9
+ # constants
10
+ image_embed_prefix = "🖼️🆙 "
11
+
12
+ def encode_image(image_data):
13
+ """Generates a prefix for image base64 data in the required format for the
14
+ four known image formats: png, jpeg, gif, and webp.
15
+
16
+ Args:
17
+ image_data: The image data, encoded in base64.
18
+
19
+ Returns:
20
+ A string containing the prefix.
21
+ """
22
+
23
+ # Get the first few bytes of the image data.
24
+ magic_number = image_data[:4]
25
+
26
+ # Check the magic number to determine the image type.
27
+ if magic_number.startswith(b'\x89PNG'):
28
+ image_type = 'png'
29
+ elif magic_number.startswith(b'\xFF\xD8'):
30
+ image_type = 'jpeg'
31
+ elif magic_number.startswith(b'GIF89a'):
32
+ image_type = 'gif'
33
+ elif magic_number.startswith(b'RIFF'):
34
+ if image_data[8:12] == b'WEBP':
35
+ image_type = 'webp'
36
+ else:
37
+ # Unknown image type.
38
+ raise Exception("Unknown image type")
39
+ else:
40
+ # Unknown image type.
41
+ raise Exception("Unknown image type")
42
+
43
+ return f"data:image/{image_type};base64,{base64.b64encode(image_data).decode('utf-8')}"
44
 
45
  def add_text(history, text):
46
  history = history + [(text, None)]
47
  return history, gr.Textbox(value="", interactive=False)
48
 
 
49
  def add_file(history, file):
50
  with open(file.name, mode="rb") as f:
51
  content = f.read()
 
62
 
63
  return history
64
 
65
+ def add_img(history, file):
66
+ if log_to_console:
67
+ print(f"add_img {file.name}")
68
+ history = history + [(image_embed_prefix + file.name, None)]
69
+
70
+ gr.Info(f"Image added as {file.name}")
71
+
72
+ return history
73
+
74
  def submit_text(txt_value):
75
  return add_text([chatbot, txt_value], [chatbot, txt_value])
76
 
 
104
  if seed:
105
  seed_i = int(seed)
106
 
107
+ if log_to_console:
108
+ print(f"bot history: {str(history)}")
109
+
110
  history_openai_format = []
111
+ user_msg_parts = []
112
  if system_prompt:
113
  history_openai_format.append({"role": "system", "content": system_prompt})
114
  for human, assi in history:
115
  if human is not None:
116
+ if human.startswith(image_embed_prefix):
117
+ with open(human.lstrip(image_embed_prefix), mode="rb") as f:
118
+ content = f.read()
119
+ user_msg_parts.append({"type": "image_url",
120
+ "image_url":{"url": encode_image(content)}})
121
+ else:
122
+ user_msg_parts.append({"type": "text", "text": human})
123
+
124
  if assi is not None:
125
+ if user_msg_parts:
126
+ history_openai_format.append({"role": "user", "content": user_msg_parts})
127
+ user_msg_parts = []
128
+
129
  history_openai_format.append({"role": "assistant", "content": assi})
130
+
131
  if message:
132
+ user_msg_parts.append({"type": "text", "text": human})
133
+
134
+ if user_msg_parts:
135
+ history_openai_format.append({"role": "user", "content": user_msg_parts})
136
 
137
  if log_to_console:
138
  print(f"br_prompt: {str(history_openai_format)}")
 
163
  with gr.Accordion("Settings"):
164
  oai_key = gr.Textbox(label="OpenAI API Key", elem_id="oai_key")
165
  model = gr.Dropdown(label="Model", value="gpt-4-1106-preview", allow_custom_value=True, elem_id="model",
166
+ choices=["gpt-4-1106-preview", "gpt-4", "gpt-4-vision-preview", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-3.5-turbo-1106"])
167
  system_prompt = gr.TextArea("You are a helpful AI.", label="System Prompt", lines=3, max_lines=250, elem_id="system_prompt")
168
  seed = gr.Textbox(label="Seed", elem_id="seed")
169
  temp = gr.Slider(0, 1, label="Temperature", elem_id="temp", value=1)
 
217
 
218
  with gr.Row():
219
  btn = gr.UploadButton("📁 Upload", size="sm")
220
+ img_btn = gr.UploadButton("🖼️ Upload", size="sm", file_types=["image"])
221
  undo_btn = gr.Button("↩️ Undo")
222
  undo_btn.click(undo, inputs=[chatbot], outputs=[chatbot])
223
 
 
234
  )
235
  txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)
236
  file_msg = btn.upload(add_file, [chatbot, btn], [chatbot], queue=False, postprocess=False)
237
+ img_msg = img_btn.upload(add_img, [chatbot, img_btn], [chatbot], queue=False, postprocess=False)
238
 
239
  demo.queue().launch()