Haofei Yu commited on
Commit
a917903
1 Parent(s): b4021c8

Feature/test vision processors pass (#15)

Browse files

* support pre-commit

* support running

* pass text-based processor

* pass vision-based processor

Files changed (1) hide show
  1. app.py +23 -12
app.py CHANGED
@@ -2,6 +2,9 @@ import os
2
  import sys
3
 
4
  import gradio as gr
 
 
 
5
 
6
  sys.path.append("../CTM/")
7
  from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
@@ -12,6 +15,15 @@ ctm.add_supervisor("gpt4_supervisor")
12
  DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
13
 
14
 
 
 
 
 
 
 
 
 
 
15
  def introduction():
16
  with gr.Column(scale=2):
17
  gr.Image(
@@ -44,7 +56,7 @@ def processor_tab():
44
  "gpt4v_cloth_fashion_processor",
45
  "gpt4v_face_emotion_processor",
46
  "gpt4v_ocr_processor",
47
- "gpt4v_posture",
48
  "gpt4v_scene_location_processor",
49
  ]
50
 
@@ -95,10 +107,11 @@ def processor_tab():
95
  )
96
 
97
 
98
- def forward(query, content, image, state):
99
  state["question"] = query
 
100
  ask_processors_output_info, state = ask_processors(
101
- query, content, image, state
102
  )
103
  uptree_competition_output_info, state = uptree_competition(state)
104
  ask_supervisor_output_info, state = ask_supervisor(state)
@@ -113,14 +126,12 @@ def forward(query, content, image, state):
113
  )
114
 
115
 
116
- def ask_processors(query, content, image, state):
117
  # Simulate processing here
118
  processor_output = ctm.ask_processors(
119
  query=query,
120
- text=content,
121
- #image_path=None,
122
- #audio_path=None,
123
- #video_path=None,
124
  )
125
  output_info = ""
126
  for name, info in processor_output.items():
@@ -156,11 +167,11 @@ def interface_tab():
156
 
157
  with gr.Column():
158
  # Inputs
159
- content = gr.Textbox(label="Enter your text here")
160
  query = gr.Textbox(label="Enter your query here")
161
  image = gr.Image(label="Upload your image")
162
- audio = gr.Audio(label="Upload or Record Audio")
163
- video = gr.Video(label="Upload or Record Video")
164
 
165
  # Processing buttons
166
  forward_button = gr.Button("Start CTM forward process")
@@ -179,7 +190,7 @@ def interface_tab():
179
  # Set up button to start or continue processing
180
  forward_button.click(
181
  fn=forward,
182
- inputs=[query, content, image, state],
183
  outputs=[
184
  processors_output,
185
  competition_output,
 
2
  import sys
3
 
4
  import gradio as gr
5
+ import base64
6
+ import io
7
+ from PIL import Image
8
 
9
  sys.path.append("../CTM/")
10
  from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
 
15
  DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
16
 
17
 
18
+ def convert_base64(image_array):
19
+ image = Image.fromarray(image_array)
20
+ buffer = io.BytesIO()
21
+ image.save(buffer, format="PNG")
22
+ byte_data = buffer.getvalue()
23
+ base64_string = base64.b64encode(byte_data).decode('utf-8')
24
+ return base64_string
25
+
26
+
27
  def introduction():
28
  with gr.Column(scale=2):
29
  gr.Image(
 
56
  "gpt4v_cloth_fashion_processor",
57
  "gpt4v_face_emotion_processor",
58
  "gpt4v_ocr_processor",
59
+ "gpt4v_posture_processor",
60
  "gpt4v_scene_location_processor",
61
  ]
62
 
 
107
  )
108
 
109
 
110
+ def forward(query, text, image, state):
111
  state["question"] = query
112
+ image = convert_base64(image)
113
  ask_processors_output_info, state = ask_processors(
114
+ query, text, image, state
115
  )
116
  uptree_competition_output_info, state = uptree_competition(state)
117
  ask_supervisor_output_info, state = ask_supervisor(state)
 
126
  )
127
 
128
 
129
+ def ask_processors(query, text, image, state):
130
  # Simulate processing here
131
  processor_output = ctm.ask_processors(
132
  query=query,
133
+ text=text,
134
+ image=image,
 
 
135
  )
136
  output_info = ""
137
  for name, info in processor_output.items():
 
167
 
168
  with gr.Column():
169
  # Inputs
170
+ text = gr.Textbox(label="Enter your text here")
171
  query = gr.Textbox(label="Enter your query here")
172
  image = gr.Image(label="Upload your image")
173
+ #audio = gr.Audio(label="Upload or Record Audio")
174
+ #video = gr.Video(label="Upload or Record Video")
175
 
176
  # Processing buttons
177
  forward_button = gr.Button("Start CTM forward process")
 
190
  # Set up button to start or continue processing
191
  forward_button.click(
192
  fn=forward,
193
+ inputs=[query, text, image, state],
194
  outputs=[
195
  processors_output,
196
  competition_output,