wcy1122 commited on
Commit
78fcda9
1 Parent(s): 40d1339

update demo

Browse files
app.py CHANGED
@@ -20,7 +20,7 @@ from diffusers import StableDiffusionXLPipeline
20
  from minigemini.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
21
  from minigemini.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
22
  from minigemini.conversation import default_conversation, conv_templates, SeparatorStyle, Conversation
23
- from minigemini.serve.gradio_web_server import function_markdown, tos_markdown, learn_more_markdown, title_markdown, block_css
24
  from minigemini.model.builder import load_pretrained_model
25
 
26
  # os.system('python -m pip install paddlepaddle-gpu==2.4.2.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html')
@@ -35,7 +35,7 @@ def download_model(repo_id):
35
 
36
  if not os.path.exists('./checkpoints/'):
37
  os.makedirs('./checkpoints/')
38
- download_model('YanweiLi/Mini-Gemini-13B-HD')
39
  download_model('laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup')
40
 
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -43,8 +43,8 @@ load_8bit = False
43
  load_4bit = False
44
  dtype = torch.float16
45
  conv_mode = "vicuna_v1"
46
- model_path = './checkpoints/Mini-Gemini-13B-HD'
47
- model_name = 'Mini-Gemini-13B-HD'
48
  model_base = None
49
 
50
  tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name,
@@ -242,7 +242,7 @@ def delete_text(state, image_process_mode):
242
 
243
 
244
  textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
245
- with gr.Blocks(title='Mini-Gemini') as demo:
246
  gr.Markdown(title_markdown)
247
  state = gr.State()
248
 
@@ -272,7 +272,7 @@ with gr.Blocks(title='Mini-Gemini') as demo:
272
  with gr.Column(scale=7):
273
  chatbot = gr.Chatbot(
274
  elem_id="chatbot",
275
- label="Mini-Gemini Chatbot",
276
  height=850,
277
  layout="panel",
278
  )
@@ -291,6 +291,7 @@ with gr.Blocks(title='Mini-Gemini') as demo:
291
  gr.Markdown(function_markdown)
292
  gr.Markdown(tos_markdown)
293
  gr.Markdown(learn_more_markdown)
 
294
 
295
  btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
296
  upvote_btn.click(
 
20
  from minigemini.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
21
  from minigemini.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
22
  from minigemini.conversation import default_conversation, conv_templates, SeparatorStyle, Conversation
23
+ from minigemini.serve.gradio_web_server import function_markdown, tos_markdown, learn_more_markdown, title_markdown, ack_markdown, block_css
24
  from minigemini.model.builder import load_pretrained_model
25
 
26
  # os.system('python -m pip install paddlepaddle-gpu==2.4.2.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html')
 
35
 
36
  if not os.path.exists('./checkpoints/'):
37
  os.makedirs('./checkpoints/')
38
+ download_model('YanweiLi/MGM-13B-HD')
39
  download_model('laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup')
40
 
41
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
43
  load_4bit = False
44
  dtype = torch.float16
45
  conv_mode = "vicuna_v1"
46
+ model_path = './checkpoints/MGM-13B-HD'
47
+ model_name = 'MGM-13B-HD'
48
  model_base = None
49
 
50
  tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name,
 
242
 
243
 
244
  textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
245
+ with gr.Blocks(title='MGM') as demo:
246
  gr.Markdown(title_markdown)
247
  state = gr.State()
248
 
 
272
  with gr.Column(scale=7):
273
  chatbot = gr.Chatbot(
274
  elem_id="chatbot",
275
+ label="MGM Chatbot",
276
  height=850,
277
  layout="panel",
278
  )
 
291
  gr.Markdown(function_markdown)
292
  gr.Markdown(tos_markdown)
293
  gr.Markdown(learn_more_markdown)
294
+ gr.Markdown(ack_markdown)
295
 
296
  btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
297
  upvote_btn.click(
minigemini/model/builder.py CHANGED
@@ -49,7 +49,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
49
 
50
  logging.getLogger("transformers").setLevel(logging.ERROR)
51
 
52
- if 'mini-gemini' in model_name.lower():
53
  # Load MiniGemini model
54
  if model_base is not None:
55
  # this may be mm projector only
@@ -116,7 +116,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
116
  vision_tower.to(device=device, dtype=torch.float16)
117
  image_processor = vision_tower.image_processor
118
 
119
- if 'mini-gemini' in model_name.lower():
120
  vision_tower_aux = model.get_vision_tower_aux()
121
  if not vision_tower_aux.is_loaded:
122
  vision_tower_aux.load_model()
 
49
 
50
  logging.getLogger("transformers").setLevel(logging.ERROR)
51
 
52
+ if 'mgm' in model_name.lower():
53
  # Load MiniGemini model
54
  if model_base is not None:
55
  # this may be mm projector only
 
116
  vision_tower.to(device=device, dtype=torch.float16)
117
  image_processor = vision_tower.image_processor
118
 
119
+ if 'mgm' in model_name.lower():
120
  vision_tower_aux = model.get_vision_tower_aux()
121
  if not vision_tower_aux.is_loaded:
122
  vision_tower_aux.load_model()
minigemini/serve/gradio_web_server.py CHANGED
@@ -280,7 +280,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, gen_imag
280
  fout.write(json.dumps(data) + "\n")
281
 
282
  title_markdown = ("""
283
- # Mini-Gemini: Mining the Potential of Multi-modality Vision Language Models
284
  [[Project Page]](https://mini-gemini.github.io/) [[Paper]](https://arxiv.org/abs/2403.18814) [[Code]](https://github.com/dvlab-research/MiniGemini) [[Model]](https://huggingface.co/collections/YanweiLi/mini-gemini-6603c50b9b43d044171d0854) [[Data]](https://huggingface.co/collections/YanweiLi/mini-gemini-data-660463ea895a01d8f367624e) <br>
285
  This is Mini-Gemini-13B-HD version. The Mini-Gemini-34B-HD is deployed on [[here]](http://10.81.134.110:7860/)
286
  """)
@@ -304,6 +304,11 @@ learn_more_markdown = ("""
304
  The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
305
  """)
306
 
 
 
 
 
 
307
  block_css = """
308
 
309
  #buttons button {
 
280
  fout.write(json.dumps(data) + "\n")
281
 
282
  title_markdown = ("""
283
+ # Official demo for the paper "Mini-Gemini: Mining the Potential of Multi-modality Vision Language Models
284
  [[Project Page]](https://mini-gemini.github.io/) [[Paper]](https://arxiv.org/abs/2403.18814) [[Code]](https://github.com/dvlab-research/MiniGemini) [[Model]](https://huggingface.co/collections/YanweiLi/mini-gemini-6603c50b9b43d044171d0854) [[Data]](https://huggingface.co/collections/YanweiLi/mini-gemini-data-660463ea895a01d8f367624e) <br>
285
  This is Mini-Gemini-13B-HD version. The Mini-Gemini-34B-HD is deployed on [[here]](http://10.81.134.110:7860/)
286
  """)
 
304
  The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
305
  """)
306
 
307
+ ack_markdown = ("""
308
+ ### Acknowledgement
309
+ This project is not affiliated with Google LLC.
310
+ """)
311
+
312
  block_css = """
313
 
314
  #buttons button {