0nejiawei commited on
Commit
9a1832e
·
1 Parent(s): 0621bf5

fix image upload bug

Browse files
Files changed (2) hide show
  1. app.py +5 -5
  2. tools/conversation.py +5 -0
app.py CHANGED
@@ -25,10 +25,10 @@ import torch
25
 
26
  # huggingface-cli login
27
 
28
- device = 'cuda'
29
  model_path = os.getenv("MODEL_PATH", "omni-research/Tarsier2-7b")
30
- max_n_frames = int(os.getenv("MAX_N_FRAMES", 8))
31
  debug = False
 
32
 
33
  # ========================================
34
  # Model Initialization
@@ -60,7 +60,7 @@ def gradio_reset(chat_state, img_file, img_list):
60
 
61
 
62
  def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
63
- print(gr_img, gr_video)
64
  conv_type = ''
65
  if 'tarsier2-7b' in model_path.lower():
66
  conv_type = 'tarsier2-7b'
@@ -78,7 +78,7 @@ def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
78
  if gr_img is None and gr_video is None and gr_gif is None:
79
  return None, None, None, gr.update(interactive=True), gr.update(interactive=True, placeholder='Please upload video/image first!'), chat_state, None, None
80
  if gr_video or gr_img or gr_gif:
81
- for img_file in [gr_video, gr_video, gr_gif]:
82
  if img_file is not None:
83
  break
84
  return gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_file, img_list
@@ -87,7 +87,7 @@ def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
87
  def gradio_ask(user_message, chatbot, chat_state):
88
  if len(user_message) == 0:
89
  return gr.update(interactive=True, placeholder='Input should not be empty!'), chatbot, chat_state
90
- chat_state = chat.ask(user_message, chat_state)
91
  chatbot = chatbot + [[user_message, None]]
92
  return '', chatbot, chat_state
93
 
 
25
 
26
  # huggingface-cli login
27
 
 
28
  model_path = os.getenv("MODEL_PATH", "omni-research/Tarsier2-7b")
29
+ max_n_frames = int(os.getenv("MAX_N_FRAMES", 16))
30
  debug = False
31
+ device = 'cuda' if not debug else 'cpu'
32
 
33
  # ========================================
34
  # Model Initialization
 
60
 
61
 
62
  def upload_img(gr_img, gr_video, gr_gif, chat_state, num_frames):
63
+ print("video, image or gif:", gr_video, gr_img, gr_gif)
64
  conv_type = ''
65
  if 'tarsier2-7b' in model_path.lower():
66
  conv_type = 'tarsier2-7b'
 
78
  if gr_img is None and gr_video is None and gr_gif is None:
79
  return None, None, None, gr.update(interactive=True), gr.update(interactive=True, placeholder='Please upload video/image first!'), chat_state, None, None
80
  if gr_video or gr_img or gr_gif:
81
+ for img_file in [gr_video, gr_img, gr_gif]:
82
  if img_file is not None:
83
  break
84
  return gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_file, img_list
 
87
  def gradio_ask(user_message, chatbot, chat_state):
88
  if len(user_message) == 0:
89
  return gr.update(interactive=True, placeholder='Input should not be empty!'), chatbot, chat_state
90
+ chat_state = chat.ask(user_message, chat_state)
91
  chatbot = chatbot + [[user_message, None]]
92
  return '', chatbot, chat_state
93
 
tools/conversation.py CHANGED
@@ -78,6 +78,7 @@ class Chat:
78
 
79
  def prepare_model_inputs(self, conv, visual_data_file=None, images=None, n_frames=None):
80
  conv.messages.append([conv.roles[1], None])
 
81
  conv.messages[0][1] = re.sub(f"({IMAGE_TOKEN}|{VIDEO_TOKEN})\n*", "", conv.messages[0][1])
82
 
83
  if images is None or isinstance(images, list) and len(images) == 0:
@@ -89,6 +90,10 @@ class Chat:
89
  images = None
90
  else:
91
  raise NotImplementedError
 
 
 
 
92
 
93
  if isinstance(images, list) and len(images) > 0:
94
  conv.messages[0][1] = IMAGE_TOKEN*len(images) + '\n' + conv.messages[0][1]
 
78
 
79
  def prepare_model_inputs(self, conv, visual_data_file=None, images=None, n_frames=None):
80
  conv.messages.append([conv.roles[1], None])
81
+ print(conv.messages)
82
  conv.messages[0][1] = re.sub(f"({IMAGE_TOKEN}|{VIDEO_TOKEN})\n*", "", conv.messages[0][1])
83
 
84
  if images is None or isinstance(images, list) and len(images) == 0:
 
90
  images = None
91
  else:
92
  raise NotImplementedError
93
+
94
+ # os.system("rm tmp_images/*")
95
+ # for i, img in enumerate(images):
96
+ # img.save(f"tmp_images/{i+1}.jpg")
97
 
98
  if isinstance(images, list) and len(images) > 0:
99
  conv.messages[0][1] = IMAGE_TOKEN*len(images) + '\n' + conv.messages[0][1]