Reuben Tan commited on
Commit
a3e4fa6
1 Parent(s): eed8ced

fix upload error

Browse files
app.py CHANGED
@@ -111,18 +111,30 @@ def gradio_reset(chat_state, img_list):
111
  img_list = []
112
  return None, gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), gr.update(placeholder='Please upload your video first', interactive=False),gr.update(value="Upload & Start Chat", interactive=True), chat_state, img_list
113
 
114
- def upload_imgorvideo(gr_video, text_input, chat_state,chatbot):
115
  if args.model_type == 'vicuna':
116
  chat_state = default_conversation.copy()
117
  else:
118
  chat_state = conv_llava_llama_2.copy()
119
-
120
- print(gr_video)
121
- chatbot = chatbot + [((gr_video,), None)]
122
- chat_state.system = "You are able to understand the visual content that the user provides. Follow the instructions carefully and explain your answers in detail."
123
- img_list = []
124
- llm_message = chat.upload_video_without_audio(gr_video, chat_state, img_list)
125
- return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  def gradio_ask(user_message, chatbot, chat_state):
128
  if len(user_message) == 0:
@@ -185,7 +197,7 @@ with gr.Blocks() as demo:
185
  with gr.Row():
186
  with gr.Column(scale=0.5):
187
  video = gr.Video()
188
- #image = gr.Image(type="filepath")
189
  #gr.Markdown(case_note_upload)
190
 
191
  upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
@@ -226,8 +238,9 @@ with gr.Blocks() as demo:
226
  ], inputs=[video, text_input])'''
227
 
228
  gr.Markdown(cite_markdown)
229
- upload_button.click(upload_imgorvideo, [video, text_input, chat_state,chatbot], [video, text_input, upload_button, chat_state, img_list,chatbot])
230
-
 
231
  text_input.submit(gradio_ask, [text_input, chatbot, chat_state], [text_input, chatbot, chat_state]).then(
232
  gradio_answer, [chatbot, chat_state, img_list, num_beams, temperature], [chatbot, chat_state, img_list]
233
  )
 
111
  img_list = []
112
  return None, gr.update(value=None, interactive=True), gr.update(value=None, interactive=True), gr.update(placeholder='Please upload your video first', interactive=False),gr.update(value="Upload & Start Chat", interactive=True), chat_state, img_list
113
 
114
+ def upload_imgorvideo(gr_video, gr_img, text_input, chat_state,chatbot):
115
  if args.model_type == 'vicuna':
116
  chat_state = default_conversation.copy()
117
  else:
118
  chat_state = conv_llava_llama_2.copy()
119
+ if gr_img is None and gr_video is None:
120
+ return None, None, None, gr.update(interactive=True), chat_state, None
121
+ elif gr_img is not None and gr_video is None:
122
+ print(gr_img)
123
+ chatbot = chatbot + [((gr_img,), None)]
124
+ chat_state.system = "You are able to understand the visual content that the user provides. Follow the instructions carefully and explain your answers in detail."
125
+ img_list = []
126
+ llm_message = chat.upload_img(gr_img, chat_state, img_list)
127
+ return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
128
+ elif gr_video is not None and gr_img is None:
129
+ print(gr_video)
130
+ chatbot = chatbot + [((gr_video,), None)]
131
+ chat_state.system = "You are able to understand the visual content that the user provides. Follow the instructions carefully and explain your answers in detail."
132
+ img_list = []
133
+ llm_message = chat.upload_video_without_audio(gr_video, chat_state, img_list)
134
+ return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
135
+ else:
136
+ # img_list = []
137
+ return gr.update(interactive=False), gr.update(interactive=False, placeholder='Currently, only one input is supported'), gr.update(value="Currently, only one input is supported", interactive=False), chat_state, None,chatbot
138
 
139
  def gradio_ask(user_message, chatbot, chat_state):
140
  if len(user_message) == 0:
 
197
  with gr.Row():
198
  with gr.Column(scale=0.5):
199
  video = gr.Video()
200
+ image = gr.Image(type="filepath")
201
  #gr.Markdown(case_note_upload)
202
 
203
  upload_button = gr.Button(value="Upload & Start Chat", interactive=True, variant="primary")
 
238
  ], inputs=[video, text_input])'''
239
 
240
  gr.Markdown(cite_markdown)
241
+ #upload_button.click(upload_imgorvideo, [video, text_input, chat_state,chatbot], [video, text_input, upload_button, chat_state, img_list,chatbot])
242
+ upload_button.click(upload_imgorvideo, [video, image, text_input, chat_state,chatbot], [video, image, text_input, upload_button, chat_state, img_list,chatbot])
243
+
244
  text_input.submit(gradio_ask, [text_input, chatbot, chat_state], [text_input, chatbot, chat_state]).then(
245
  gradio_answer, [chatbot, chat_state, img_list, num_beams, temperature], [chatbot, chat_state, img_list]
246
  )
global_local/models/video_instruction_ft_model.py CHANGED
@@ -127,6 +127,7 @@ class VideoInstructionFTLLAMA(Blip2Base):
127
 
128
  logging.info('Loading LLAMA Tokenizer')
129
  self.llama_tokenizer = LlamaTokenizer.from_pretrained(llama_model, use_fast=False, token=os.environ['LLAMA_TOKEN'])
 
130
  if self.llama_tokenizer.pad_token is None:
131
  self.llama_tokenizer.pad_token = self.llama_tokenizer.unk_token
132
  DEFAULT_IMAGE_PATCH_TOKEN = '<ImageHere>'
 
127
 
128
  logging.info('Loading LLAMA Tokenizer')
129
  self.llama_tokenizer = LlamaTokenizer.from_pretrained(llama_model, use_fast=False, token=os.environ['LLAMA_TOKEN'])
130
+ #self.llama_tokenizer = LlamaTokenizer.from_pretrained(llama_model, use_fast=False)
131
  if self.llama_tokenizer.pad_token is None:
132
  self.llama_tokenizer.pad_token = self.llama_tokenizer.unk_token
133
  DEFAULT_IMAGE_PATCH_TOKEN = '<ImageHere>'