Spaces:
Running
on
Zero
Running
on
Zero
fix image upload bug
Browse files- app.py +5 -5
- tools/conversation.py +5 -0
app.py
CHANGED
@@ -25,10 +25,10 @@ import torch
|
|
25 |
|
26 |
# huggingface-cli login
|
27 |
|
28 |
-
device = 'cuda'
|
29 |
model_path = os.getenv("MODEL_PATH", "omni-research/Tarsier2-7b")
|
30 |
-
max_n_frames = int(os.getenv("MAX_N_FRAMES",
|
31 |
debug = False
|
|
|
32 |
|
33 |
# ========================================
|
34 |
# Model Initialization
|
@@ -60,7 +60,7 @@ def gradio_reset(chat_state, img_file, img_list):
|
|
60 |
|
61 |
|
62 |
def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
63 |
-
print(gr_img,
|
64 |
conv_type = ''
|
65 |
if 'tarsier2-7b' in model_path.lower():
|
66 |
conv_type = 'tarsier2-7b'
|
@@ -78,7 +78,7 @@ def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
|
78 |
if gr_img is None and gr_video is None and gr_gif is None:
|
79 |
return None, None, None, gr.update(interactive=True), gr.update(interactive=True, placeholder='Please upload video/image first!'), chat_state, None, None
|
80 |
if gr_video or gr_img or gr_gif:
|
81 |
-
for img_file in [gr_video,
|
82 |
if img_file is not None:
|
83 |
break
|
84 |
return gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_file, img_list
|
@@ -87,7 +87,7 @@ def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
|
87 |
def gradio_ask(user_message, chatbot, chat_state):
|
88 |
if len(user_message) == 0:
|
89 |
return gr.update(interactive=True, placeholder='Input should not be empty!'), chatbot, chat_state
|
90 |
-
chat_state =
|
91 |
chatbot = chatbot + [[user_message, None]]
|
92 |
return '', chatbot, chat_state
|
93 |
|
|
|
25 |
|
26 |
# huggingface-cli login
|
27 |
|
|
|
28 |
model_path = os.getenv("MODEL_PATH", "omni-research/Tarsier2-7b")
|
29 |
+
max_n_frames = int(os.getenv("MAX_N_FRAMES", 16))
|
30 |
debug = False
|
31 |
+
device = 'cuda' if not debug else 'cpu'
|
32 |
|
33 |
# ========================================
|
34 |
# Model Initialization
|
|
|
60 |
|
61 |
|
62 |
def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
|
63 |
+
print("video, image or gif:", gr_video, gr_img, gr_gif)
|
64 |
conv_type = ''
|
65 |
if 'tarsier2-7b' in model_path.lower():
|
66 |
conv_type = 'tarsier2-7b'
|
|
|
78 |
if gr_img is None and gr_video is None and gr_gif is None:
|
79 |
return None, None, None, gr.update(interactive=True), gr.update(interactive=True, placeholder='Please upload video/image first!'), chat_state, None, None
|
80 |
if gr_video or gr_img or gr_gif:
|
81 |
+
for img_file in [gr_video, gr_img, gr_gif]:
|
82 |
if img_file is not None:
|
83 |
break
|
84 |
return gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_file, img_list
|
|
|
87 |
def gradio_ask(user_message, chatbot, chat_state):
|
88 |
if len(user_message) == 0:
|
89 |
return gr.update(interactive=True, placeholder='Input should not be empty!'), chatbot, chat_state
|
90 |
+
chat_state = chat.ask(user_message, chat_state)
|
91 |
chatbot = chatbot + [[user_message, None]]
|
92 |
return '', chatbot, chat_state
|
93 |
|
tools/conversation.py
CHANGED
@@ -78,6 +78,7 @@ class Chat:
|
|
78 |
|
79 |
def prepare_model_inputs(self, conv, visual_data_file=None, images=None, n_frames=None):
|
80 |
conv.messages.append([conv.roles[1], None])
|
|
|
81 |
conv.messages[0][1] = re.sub(f"({IMAGE_TOKEN}|{VIDEO_TOKEN})\n*", "", conv.messages[0][1])
|
82 |
|
83 |
if images is None or isinstance(images, list) and len(images) == 0:
|
@@ -89,6 +90,10 @@ class Chat:
|
|
89 |
images = None
|
90 |
else:
|
91 |
raise NotImplementedError
|
|
|
|
|
|
|
|
|
92 |
|
93 |
if isinstance(images, list) and len(images) > 0:
|
94 |
conv.messages[0][1] = IMAGE_TOKEN*len(images) + '\n' + conv.messages[0][1]
|
|
|
78 |
|
79 |
def prepare_model_inputs(self, conv, visual_data_file=None, images=None, n_frames=None):
|
80 |
conv.messages.append([conv.roles[1], None])
|
81 |
+
print(conv.messages)
|
82 |
conv.messages[0][1] = re.sub(f"({IMAGE_TOKEN}|{VIDEO_TOKEN})\n*", "", conv.messages[0][1])
|
83 |
|
84 |
if images is None or isinstance(images, list) and len(images) == 0:
|
|
|
90 |
images = None
|
91 |
else:
|
92 |
raise NotImplementedError
|
93 |
+
|
94 |
+
# os.system("rm tmp_images/*")
|
95 |
+
# for i, img in enumerate(images):
|
96 |
+
# img.save(f"tmp_images/{i+1}.jpg")
|
97 |
|
98 |
if isinstance(images, list) and len(images) > 0:
|
99 |
conv.messages[0][1] = IMAGE_TOKEN*len(images) + '\n' + conv.messages[0][1]
|