update

Browse files

Files changed (17) hide show

app.py +25 -3
examples/image1.jpg +0 -0
examples/image10.jpg +0 -0
examples/image11.jpg +0 -0
examples/image2.jpg +0 -0
examples/image3.jpg +0 -0
examples/image4.jpg +0 -0
examples/image5.jpg +0 -0
examples/image6.jpg +0 -0
examples/image7.jpg +0 -0
examples/image8.jpg +0 -0
examples/image9.jpg +0 -0
models/mllava/__pycache__/__init__.cpython-39.pyc +0 -0
models/mllava/__pycache__/configuration_llava.cpython-39.pyc +0 -0
models/mllava/__pycache__/modeling_llava.cpython-39.pyc +0 -0
models/mllava/__pycache__/processing_llava.cpython-39.pyc +0 -0
models/mllava/__pycache__/utils.cpython-39.pyc +0 -0

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import spaces
 import time
 from PIL import Image
 from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
@@ -97,7 +98,9 @@ def build_demo():
     with gr.Blocks() as demo:
         gr.Markdown(""" # Mantis
-                    Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
         """)
         chatbot = gr.Chatbot(line_breaks=True)
         chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
@@ -117,8 +120,27 @@ def build_demo():
             bot, chatbot, chatbot, api_name="bot_response"
         )
     return demo

 import gradio as gr
 import spaces
+import os
 import time
 from PIL import Image
 from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
     with gr.Blocks() as demo:
         gr.Markdown(""" # Mantis
+Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
+| [Github](https://github.com/TIGER-AI-Lab/Mantis) | [Blog](https://tiger-ai-lab.github.io/Blog/mantis) | [Models](https://huggingface.co/collections/TIGER-Lab/mantis-6619b0834594c878cdb1d6e4) |
         """)
         chatbot = gr.Chatbot(line_breaks=True)
         chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
             bot, chatbot, chatbot, api_name="bot_response"
         )
+        gr.Examples(
+            examples=[
+                {
+                    "text": "<image> <image> How many dices are there in each image?",
+                    "files": ["./examples/image10.jpg", "./examples/image11.jpg"]
+                },
+                {
+                    "text": "<image> <image> What's the difference between these two images? Please describe as much as you can.",
+                    "files": ["./examples/image1.jpg", "./examples/image2.jpg"]
+                },
+                {
+                    "text": "<image> <image> Which image shows an older dog?",
+                    "files": ["./examples/image8.jpg", "./examples/image9.jpg"]
+                },
+                {
+                    "text": "Write a description for the given image sequence in a single paragraph, what is happening in this episode?",
+                    "files": ["./examples/image3.jpg", "./examples/image4.jpg", "./examples/image5.jpg", "./examples/image6.jpg", "./examples/image7.jpg"]
+                },
+            ],
+            inputs=[chat_input],
+        )
     return demo