Spaces:
Running
on
Zero
Running
on
Zero
added basic UI for testing
Browse files- src/app.py +30 -3
src/app.py
CHANGED
@@ -62,7 +62,6 @@ def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, floa
|
|
62 |
|
63 |
def process_video(video_path: str, max_images: int) -> list[dict]:
|
64 |
result_content = []
|
65 |
-
# TODO: Change max_image to slider
|
66 |
frames = get_frames(video_path, max_images)
|
67 |
for frame in frames:
|
68 |
image, timestamp = frame
|
@@ -124,7 +123,11 @@ def process_history(history: list[dict]) -> list[dict]:
|
|
124 |
|
125 |
@spaces.GPU(duration=120)
|
126 |
def run(
|
127 |
-
message: dict,
|
|
|
|
|
|
|
|
|
128 |
) -> Iterator[str]:
|
129 |
|
130 |
messages = []
|
@@ -133,7 +136,9 @@ def run(
|
|
133 |
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
134 |
)
|
135 |
messages.extend(process_history(history))
|
136 |
-
messages.append(
|
|
|
|
|
137 |
|
138 |
inputs = input_processor.apply_chat_template(
|
139 |
messages,
|
@@ -158,3 +163,25 @@ def run(
|
|
158 |
for delta in streamer:
|
159 |
output += delta
|
160 |
yield output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
def process_video(video_path: str, max_images: int) -> list[dict]:
|
64 |
result_content = []
|
|
|
65 |
frames = get_frames(video_path, max_images)
|
66 |
for frame in frames:
|
67 |
image, timestamp = frame
|
|
|
123 |
|
124 |
@spaces.GPU(duration=120)
|
125 |
def run(
|
126 |
+
message: dict,
|
127 |
+
history: list[dict],
|
128 |
+
system_prompt: str,
|
129 |
+
max_new_tokens: int,
|
130 |
+
max_images: int,
|
131 |
) -> Iterator[str]:
|
132 |
|
133 |
messages = []
|
|
|
136 |
{"role": "system", "content": [{"type": "text", "text": system_prompt}]}
|
137 |
)
|
138 |
messages.extend(process_history(history))
|
139 |
+
messages.append(
|
140 |
+
{"role": "user", "content": process_user_input(message, max_images)}
|
141 |
+
)
|
142 |
|
143 |
inputs = input_processor.apply_chat_template(
|
144 |
messages,
|
|
|
163 |
for delta in streamer:
|
164 |
output += delta
|
165 |
yield output
|
166 |
+
|
167 |
+
|
168 |
+
demo = gr.ChatInterface(
|
169 |
+
fn=run,
|
170 |
+
type="messages",
|
171 |
+
chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
|
172 |
+
textbox=gr.MultimodalTextbox(
|
173 |
+
file_types=["image", ".mp4"], file_count="multiple", autofocus=True
|
174 |
+
),
|
175 |
+
multimodal=True,
|
176 |
+
additional_inputs=[
|
177 |
+
gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
|
178 |
+
gr.Slider(
|
179 |
+
label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
|
180 |
+
),
|
181 |
+
gr.Slider(label="Max Images", minimum=1, maximum=4, step=1, value=2),
|
182 |
+
],
|
183 |
+
stop_btn=False,
|
184 |
+
)
|
185 |
+
|
186 |
+
if __name__ == "__main__":
|
187 |
+
demo.launch()
|