AC2513 commited on
Commit
397b627
·
1 Parent(s): aeeaba2

added basic UI for testing

Browse files
Files changed (1) hide show
  1. src/app.py +30 -3
src/app.py CHANGED
@@ -62,7 +62,6 @@ def get_frames(video_path: str, max_images: int) -> list[tuple[Image.Image, floa
62
 
63
  def process_video(video_path: str, max_images: int) -> list[dict]:
64
  result_content = []
65
- # TODO: Change max_image to slider
66
  frames = get_frames(video_path, max_images)
67
  for frame in frames:
68
  image, timestamp = frame
@@ -124,7 +123,11 @@ def process_history(history: list[dict]) -> list[dict]:
124
 
125
  @spaces.GPU(duration=120)
126
  def run(
127
- message: dict, history: list[dict], system_prompt: str, max_new_tokens: int = 512
 
 
 
 
128
  ) -> Iterator[str]:
129
 
130
  messages = []
@@ -133,7 +136,9 @@ def run(
133
  {"role": "system", "content": [{"type": "text", "text": system_prompt}]}
134
  )
135
  messages.extend(process_history(history))
136
- messages.append({"role": "user", "content": process_user_input(message)})
 
 
137
 
138
  inputs = input_processor.apply_chat_template(
139
  messages,
@@ -158,3 +163,25 @@ def run(
158
  for delta in streamer:
159
  output += delta
160
  yield output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  def process_video(video_path: str, max_images: int) -> list[dict]:
64
  result_content = []
 
65
  frames = get_frames(video_path, max_images)
66
  for frame in frames:
67
  image, timestamp = frame
 
123
 
124
  @spaces.GPU(duration=120)
125
  def run(
126
+ message: dict,
127
+ history: list[dict],
128
+ system_prompt: str,
129
+ max_new_tokens: int,
130
+ max_images: int,
131
  ) -> Iterator[str]:
132
 
133
  messages = []
 
136
  {"role": "system", "content": [{"type": "text", "text": system_prompt}]}
137
  )
138
  messages.extend(process_history(history))
139
+ messages.append(
140
+ {"role": "user", "content": process_user_input(message, max_images)}
141
+ )
142
 
143
  inputs = input_processor.apply_chat_template(
144
  messages,
 
163
  for delta in streamer:
164
  output += delta
165
  yield output
166
+
167
+
168
+ demo = gr.ChatInterface(
169
+ fn=run,
170
+ type="messages",
171
+ chatbot=gr.Chatbot(type="messages", scale=1, allow_tags=["image"]),
172
+ textbox=gr.MultimodalTextbox(
173
+ file_types=["image", ".mp4"], file_count="multiple", autofocus=True
174
+ ),
175
+ multimodal=True,
176
+ additional_inputs=[
177
+ gr.Textbox(label="System Prompt", value="You are a helpful assistant."),
178
+ gr.Slider(
179
+ label="Max New Tokens", minimum=100, maximum=2000, step=10, value=700
180
+ ),
181
+ gr.Slider(label="Max Images", minimum=1, maximum=4, step=1, value=2),
182
+ ],
183
+ stop_btn=False,
184
+ )
185
+
186
+ if __name__ == "__main__":
187
+ demo.launch()