Spaces:

lwaekfjlk
/

ctm-space

Runtime error

App Files Files Community

Haofei Yu commited on Apr 25

Commit

a917903

•

1 Parent(s): b4021c8

Feature/test vision processors pass (#15)

Browse files

* support pre-commit

* support running

* pass text-based processor

* pass vision-based processor

Files changed (1) hide show

app.py +23 -12

app.py CHANGED Viewed

@@ -2,6 +2,9 @@ import os
 import sys
 import gradio as gr
 sys.path.append("../CTM/")
 from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
@@ -12,6 +15,15 @@ ctm.add_supervisor("gpt4_supervisor")
 DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
 def introduction():
     with gr.Column(scale=2):
         gr.Image(
@@ -44,7 +56,7 @@ def processor_tab():
         "gpt4v_cloth_fashion_processor",
         "gpt4v_face_emotion_processor",
         "gpt4v_ocr_processor",
-        "gpt4v_posture",
         "gpt4v_scene_location_processor",
     ]
@@ -95,10 +107,11 @@ def processor_tab():
                     )
-def forward(query, content, image, state):
     state["question"] = query
     ask_processors_output_info, state = ask_processors(
-        query, content, image, state
     )
     uptree_competition_output_info, state = uptree_competition(state)
     ask_supervisor_output_info, state = ask_supervisor(state)
@@ -113,14 +126,12 @@ def forward(query, content, image, state):
     )
-def ask_processors(query, content, image, state):
     # Simulate processing here
     processor_output = ctm.ask_processors(
         query=query,
-        text=content,
-        #image_path=None,
-        #audio_path=None,
-        #video_path=None,
     )
     output_info = ""
     for name, info in processor_output.items():
@@ -156,11 +167,11 @@ def interface_tab():
         with gr.Column():
             # Inputs
-            content = gr.Textbox(label="Enter your text here")
             query = gr.Textbox(label="Enter your query here")
             image = gr.Image(label="Upload your image")
-            audio = gr.Audio(label="Upload or Record Audio")
-            video = gr.Video(label="Upload or Record Video")
             # Processing buttons
             forward_button = gr.Button("Start CTM forward process")
@@ -179,7 +190,7 @@ def interface_tab():
         # Set up button to start or continue processing
         forward_button.click(
             fn=forward,
-            inputs=[query, content, image, state],
             outputs=[
                 processors_output,
                 competition_output,

 import sys
 import gradio as gr
+import base64
+import io
+from PIL import Image
 sys.path.append("../CTM/")
 from ctm.ctms.ctm_base import BaseConsciousnessTuringMachine
 DEPLOYED = os.getenv("DEPLOYED", "true").lower() == "true"
+def convert_base64(image_array):
+    image = Image.fromarray(image_array)
+    buffer = io.BytesIO()
+    image.save(buffer, format="PNG")
+    byte_data = buffer.getvalue()
+    base64_string = base64.b64encode(byte_data).decode('utf-8')
+    return base64_string
 def introduction():
     with gr.Column(scale=2):
         gr.Image(
         "gpt4v_cloth_fashion_processor",
         "gpt4v_face_emotion_processor",
         "gpt4v_ocr_processor",
+        "gpt4v_posture_processor",
         "gpt4v_scene_location_processor",
     ]
                     )
+def forward(query, text, image, state):
     state["question"] = query
+    image = convert_base64(image)
     ask_processors_output_info, state = ask_processors(
+        query, text, image, state
     )
     uptree_competition_output_info, state = uptree_competition(state)
     ask_supervisor_output_info, state = ask_supervisor(state)
     )
+def ask_processors(query, text, image, state):
     # Simulate processing here
     processor_output = ctm.ask_processors(
         query=query,
+        text=text,
+        image=image,
     )
     output_info = ""
     for name, info in processor_output.items():
         with gr.Column():
             # Inputs
+            text = gr.Textbox(label="Enter your text here")
             query = gr.Textbox(label="Enter your query here")
             image = gr.Image(label="Upload your image")
+            #audio = gr.Audio(label="Upload or Record Audio")
+            #video = gr.Video(label="Upload or Record Video")
             # Processing buttons
             forward_button = gr.Button("Start CTM forward process")
         # Set up button to start or continue processing
         forward_button.click(
             fn=forward,
+            inputs=[query, text, image, state],
             outputs=[
                 processors_output,
                 competition_output,