Spaces:

wcy1122
/

MGM

Runtime error

App Files Files Community

wcy1122 commited on Apr 21

Commit

78fcda9

•

1 Parent(s): 40d1339

update demo

Browse files

Files changed (3) hide show

app.py +7 -6
minigemini/model/builder.py +2 -2
minigemini/serve/gradio_web_server.py +6 -1

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ from diffusers import StableDiffusionXLPipeline
 from minigemini.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
 from minigemini.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
 from minigemini.conversation import default_conversation, conv_templates, SeparatorStyle, Conversation
-from minigemini.serve.gradio_web_server import function_markdown, tos_markdown, learn_more_markdown, title_markdown, block_css
 from minigemini.model.builder import load_pretrained_model
 # os.system('python -m pip install paddlepaddle-gpu==2.4.2.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html')
@@ -35,7 +35,7 @@ def download_model(repo_id):
 if not os.path.exists('./checkpoints/'):
     os.makedirs('./checkpoints/')
-download_model('YanweiLi/Mini-Gemini-13B-HD')
 download_model('laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup')
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -43,8 +43,8 @@ load_8bit = False
 load_4bit = False
 dtype = torch.float16
 conv_mode = "vicuna_v1"
-model_path = './checkpoints/Mini-Gemini-13B-HD'
-model_name = 'Mini-Gemini-13B-HD'
 model_base = None
 tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name,
@@ -242,7 +242,7 @@ def delete_text(state, image_process_mode):
 textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
-with gr.Blocks(title='Mini-Gemini') as demo:
     gr.Markdown(title_markdown)
     state = gr.State()
@@ -272,7 +272,7 @@ with gr.Blocks(title='Mini-Gemini') as demo:
         with gr.Column(scale=7):
             chatbot = gr.Chatbot(
                 elem_id="chatbot",
-                label="Mini-Gemini Chatbot",
                 height=850,
                 layout="panel",
             )
@@ -291,6 +291,7 @@ with gr.Blocks(title='Mini-Gemini') as demo:
     gr.Markdown(function_markdown)
     gr.Markdown(tos_markdown)
     gr.Markdown(learn_more_markdown)
     btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
     upvote_btn.click(

 from minigemini.constants import DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
 from minigemini.mm_utils import process_images, load_image_from_base64, tokenizer_image_token
 from minigemini.conversation import default_conversation, conv_templates, SeparatorStyle, Conversation
+from minigemini.serve.gradio_web_server import function_markdown, tos_markdown, learn_more_markdown, title_markdown, ack_markdown, block_css
 from minigemini.model.builder import load_pretrained_model
 # os.system('python -m pip install paddlepaddle-gpu==2.4.2.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html')
 if not os.path.exists('./checkpoints/'):
     os.makedirs('./checkpoints/')
+download_model('YanweiLi/MGM-13B-HD')
 download_model('laion/CLIP-convnext_large_d_320.laion2B-s29B-b131K-ft-soup')
 device = "cuda" if torch.cuda.is_available() else "cpu"
 load_4bit = False
 dtype = torch.float16
 conv_mode = "vicuna_v1"
+model_path = './checkpoints/MGM-13B-HD'
+model_name = 'MGM-13B-HD'
 model_base = None
 tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, model_base, model_name,
 textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
+with gr.Blocks(title='MGM') as demo:
     gr.Markdown(title_markdown)
     state = gr.State()
         with gr.Column(scale=7):
             chatbot = gr.Chatbot(
                 elem_id="chatbot",
+                label="MGM Chatbot",
                 height=850,
                 layout="panel",
             )
     gr.Markdown(function_markdown)
     gr.Markdown(tos_markdown)
     gr.Markdown(learn_more_markdown)
+    gr.Markdown(ack_markdown)
     btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn]
     upvote_btn.click(

minigemini/model/builder.py CHANGED Viewed

@@ -49,7 +49,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
     logging.getLogger("transformers").setLevel(logging.ERROR)
-    if 'mini-gemini' in model_name.lower():
         # Load MiniGemini model
         if model_base is not None:
             # this may be mm projector only
@@ -116,7 +116,7 @@ def load_pretrained_model(model_path, model_base, model_name, load_8bit=False, l
     vision_tower.to(device=device, dtype=torch.float16)
     image_processor = vision_tower.image_processor
-    if 'mini-gemini' in model_name.lower():
         vision_tower_aux = model.get_vision_tower_aux()
         if not vision_tower_aux.is_loaded:
             vision_tower_aux.load_model()

     logging.getLogger("transformers").setLevel(logging.ERROR)
+    if 'mgm' in model_name.lower():
         # Load MiniGemini model
         if model_base is not None:
             # this may be mm projector only
     vision_tower.to(device=device, dtype=torch.float16)
     image_processor = vision_tower.image_processor
+    if 'mgm' in model_name.lower():
         vision_tower_aux = model.get_vision_tower_aux()
         if not vision_tower_aux.is_loaded:
             vision_tower_aux.load_model()

minigemini/serve/gradio_web_server.py CHANGED Viewed

@@ -280,7 +280,7 @@ def http_bot(state, model_selector, temperature, top_p, max_new_tokens, gen_imag
         fout.write(json.dumps(data) + "\n")
 title_markdown = ("""
-# Mini-Gemini: Mining the Potential of Multi-modality Vision Language Models
 [[Project Page]](https://mini-gemini.github.io/) [[Paper]](https://arxiv.org/abs/2403.18814) [[Code]](https://github.com/dvlab-research/MiniGemini) [[Model]](https://huggingface.co/collections/YanweiLi/mini-gemini-6603c50b9b43d044171d0854) [[Data]](https://huggingface.co/collections/YanweiLi/mini-gemini-data-660463ea895a01d8f367624e) <br>
 This is Mini-Gemini-13B-HD version. The Mini-Gemini-34B-HD is deployed on [[here]](http://10.81.134.110:7860/)
 """)
@@ -304,6 +304,11 @@ learn_more_markdown = ("""
 The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
 """)
 block_css = """
 #buttons button {

         fout.write(json.dumps(data) + "\n")
 title_markdown = ("""
+# Official demo for the paper "Mini-Gemini: Mining the Potential of Multi-modality Vision Language Models
 [[Project Page]](https://mini-gemini.github.io/) [[Paper]](https://arxiv.org/abs/2403.18814) [[Code]](https://github.com/dvlab-research/MiniGemini) [[Model]](https://huggingface.co/collections/YanweiLi/mini-gemini-6603c50b9b43d044171d0854) [[Data]](https://huggingface.co/collections/YanweiLi/mini-gemini-data-660463ea895a01d8f367624e) <br>
 This is Mini-Gemini-13B-HD version. The Mini-Gemini-34B-HD is deployed on [[here]](http://10.81.134.110:7860/)
 """)
 The service is a research preview intended for non-commercial use only, subject to the model [License](https://github.com/facebookresearch/llama/blob/main/MODEL_CARD.md) of LLaMA, [Terms of Use](https://openai.com/policies/terms-of-use) of the data generated by OpenAI, and [Privacy Practices](https://chrome.google.com/webstore/detail/sharegpt-share-your-chatg/daiacboceoaocpibfodeljbdfacokfjb) of ShareGPT. Please contact us if you find any potential violation.
 """)
+ack_markdown = ("""
+### Acknowledgement
+This project is not affiliated with Google LLC.
+""")
 block_css = """
 #buttons button {