praeclarumjj3 commited on
Commit
2d49497
1 Parent(s): 7015bfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -5,7 +5,6 @@ import os
5
  import time
6
 
7
  import gradio as gr
8
- import requests
9
  import hashlib
10
 
11
  from vcoder_llava.vcoder_conversation import (default_conversation, conv_templates,
@@ -200,7 +199,8 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
200
  yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
201
  return
202
  time.sleep(0.03)
203
- except:
 
204
  state.messages[-1][-1] = server_error_msg
205
  yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
206
  return
@@ -225,23 +225,24 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, request:
225
  }
226
  fout.write(json.dumps(data) + "\n")
227
 
228
- title_markdown = ("""
229
- # 🌋 LLaVA: Large Language and Vision Assistant
230
- [[Project Page]](https://llava-vl.github.io) [[Paper]](https://arxiv.org/abs/2304.08485) [[Code]](https://github.com/haotian-liu/LLaVA) [[Model]](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)
231
- """)
 
 
 
232
 
233
  tos_markdown = ("""
234
  ### Terms of use
235
  By using this service, users are required to agree to the following terms:
236
- The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes. The service may collect user dialogue data for future research.
237
- Please click the "Flag" button if you get any inappropriate answer! We will collect those to keep improving our moderator.
238
- For an optimal experience, please use desktop computers for this demo, as mobile devices may compromise its quality.
239
  """)
240
 
241
 
242
  learn_more_markdown = ("""
243
  ### License
244
- The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
245
  """)
246
 
247
  block_css = """
@@ -259,7 +260,8 @@ def build_demo(embed_mode):
259
  state = gr.State()
260
 
261
  if not embed_mode:
262
- gr.Markdown(title_markdown)
 
263
 
264
  with gr.Row():
265
  with gr.Column(scale=3):
@@ -284,15 +286,9 @@ def build_demo(embed_mode):
284
  value="Default",
285
  label="Preprocess for non-square Seg Map", visible=False)
286
 
287
- cur_dir = os.path.dirname(os.path.abspath(__file__))
288
- gr.Examples(examples=[
289
- [f"{cur_dir}/examples/3.jpg", f"{cur_dir}/examples/3_pan.png", "What objects can be seen in the image?"],
290
- [f"{cur_dir}/examples/3.jpg", f"{cur_dir}/examples/3_ins.png", "What objects can be seen in the image?"],
291
- ], inputs=[imagebox, segbox, textbox])
292
-
293
  with gr.Accordion("Parameters", open=False) as parameter_row:
294
- temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
295
- top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
296
  max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
297
 
298
  with gr.Column(scale=8):
@@ -310,6 +306,16 @@ def build_demo(embed_mode):
310
  regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
311
  clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
312
 
 
 
 
 
 
 
 
 
 
 
313
  if not embed_mode:
314
  gr.Markdown(tos_markdown)
315
  gr.Markdown(learn_more_markdown)
@@ -342,7 +348,7 @@ def build_demo(embed_mode):
342
 
343
  if __name__ == "__main__":
344
  parser = argparse.ArgumentParser()
345
- parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
346
  parser.add_argument("--model-base", type=str, default=None)
347
  parser.add_argument("--model-name", type=str)
348
  parser.add_argument("--load-8bit", action="store_true")
@@ -386,4 +392,4 @@ if __name__ == "__main__":
386
  server_name=args.host,
387
  server_port=args.port,
388
  share=args.share
389
- )
 
5
  import time
6
 
7
  import gradio as gr
 
8
  import hashlib
9
 
10
  from vcoder_llava.vcoder_conversation import (default_conversation, conv_templates,
 
199
  yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
200
  return
201
  time.sleep(0.03)
202
+ except Exception:
203
+ gr.Warning(server_error_msg)
204
  state.messages[-1][-1] = server_error_msg
205
  yield (state, state.to_gradio_chatbot()) + (disable_btn, disable_btn, disable_btn, enable_btn, enable_btn)
206
  return
 
225
  }
226
  fout.write(json.dumps(data) + "\n")
227
 
228
+
229
+ title = "<h1 style='margin-bottom: -10px; text-align: center'>VCoder: Versatile Vision Encoders for Multimodal Large Language Models</h1>"
230
+ # style='
231
+ description = "<p style='font-size: 16px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://praeclarumjj3.github.io/' style='text-decoration:none' target='_blank'>Jitesh Jain, </a> <a href='https://jwyang.github.io/' style='text-decoration:none' target='_blank'>Jianwei Yang, <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Humphrey Shi</a></p>" \
232
+ + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/vcoder/' target='_blank'>Project Page</a> | <a href='https://praeclarumjj3.github.io/vcoder/' target='_blank'>Video</a> | <a href='https://arxiv.org/abs/2211.06220' target='_blank'>ArXiv Paper</a> | <a href='https://github.com/SHI-Labs/VCoder' target='_blank'>Github Repo</a></p>" \
233
+ + "<p style='text-align: center; font-size: 16px; margin: 5px; font-weight: w300;'> [Note: Please click on Regenerate button if you are unsatisfied with the generated response. You may find screenshots of our demo trials <a href='https://github.com/SHI-Labs/VCoder/blob/main/images/' style='text-decoration:none' target='_blank'>here</a>.]</p>" \
234
+ + "<p style='text-align: center; font-size: 16px; margin: 5px; font-weight: w300;'> [Note: You can obtain segmentation maps for your image using the <a href='https://huggingface.co/spaces/shi-labs/OneFormer' style='text-decoration:none' target='_blank'>OneFormer Demo</a>. Please click on Regenerate button if you are unsatisfied with the generated response. You may find screenshots of our demo trials <a href='https://github.com/SHI-Labs/VCoder/blob/main/images/' style='text-decoration:none' target='_blank'>here</a>.]</p>"
235
 
236
  tos_markdown = ("""
237
  ### Terms of use
238
  By using this service, users are required to agree to the following terms:
239
+ The service is a research preview intended for non-commercial use only. It only provides limited safety measures and may generate offensive content. It must not be used for any illegal, harmful, violent, racist, or sexual purposes.
 
 
240
  """)
241
 
242
 
243
  learn_more_markdown = ("""
244
  ### License
245
+ The service is a research preview intended for non-commercial use only, subject to the [License](https://huggingface.co/lmsys/vicuna-7b-v1.5) of Vicuna-v1.5, [License](https://github.com/haotian-liu/LLaVA/blob/main/LICENSE) of LLaVA, [Terms of Use](https://cocodataset.org/#termsofuse) of the COCO dataset, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
246
  """)
247
 
248
  block_css = """
 
260
  state = gr.State()
261
 
262
  if not embed_mode:
263
+ gr.Markdown(title)
264
+ gr.Markdown(description)
265
 
266
  with gr.Row():
267
  with gr.Column(scale=3):
 
286
  value="Default",
287
  label="Preprocess for non-square Seg Map", visible=False)
288
 
 
 
 
 
 
 
289
  with gr.Accordion("Parameters", open=False) as parameter_row:
290
+ temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.8, step=0.1, interactive=True, label="Temperature",)
291
+ top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.9, step=0.1, interactive=True, label="Top P",)
292
  max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
293
 
294
  with gr.Column(scale=8):
 
306
  regenerate_btn = gr.Button(value="🔄 Regenerate", interactive=False)
307
  clear_btn = gr.Button(value="🗑️ Clear", interactive=False)
308
 
309
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
310
+ gr.Examples(examples=[
311
+ [f"{cur_dir}/examples/people.jpg", f"{cur_dir}/examples/people_pan.png", "What objects can be seen in the image?", "0.9", "1.0"],
312
+ [f"{cur_dir}/examples/corgi.jpg", f"{cur_dir}/examples/corgi_pan.png", "What objects can be seen in the image?", "0.6", "0.7"],
313
+ [f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "Can you count the number of people in the image?", "0.8", "0.9"],
314
+ [f"{cur_dir}/examples/friends.jpg", f"{cur_dir}/examples/friends_pan.png", "What is happening in the image?", "0.8", "0.9"],
315
+ [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_pan.png", "What objects can be seen in the image?", "0.5", "0.5"],
316
+ [f"{cur_dir}/examples/suits.jpg", f"{cur_dir}/examples/suits_ins.png", "What objects can be seen in the image?", "0.5", "0.5"],
317
+ ], inputs=[imagebox, segbox, textbox, temperature, top_p])
318
+
319
  if not embed_mode:
320
  gr.Markdown(tos_markdown)
321
  gr.Markdown(learn_more_markdown)
 
348
 
349
  if __name__ == "__main__":
350
  parser = argparse.ArgumentParser()
351
+ parser.add_argument("--model-path", type=str, default="shi-labs/vcoder_ds_llava-v1.5-13b")
352
  parser.add_argument("--model-base", type=str, default=None)
353
  parser.add_argument("--model-name", type=str)
354
  parser.add_argument("--load-8bit", action="store_true")
 
392
  server_name=args.host,
393
  server_port=args.port,
394
  share=args.share
395
+ )