DongfuJiang commited on
Commit
a7a2242
1 Parent(s): 062730b
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import spaces
 
3
  import time
4
  from PIL import Image
5
  from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
@@ -97,7 +98,9 @@ def build_demo():
97
  with gr.Blocks() as demo:
98
 
99
  gr.Markdown(""" # Mantis
100
- Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
 
 
101
  """)
102
  chatbot = gr.Chatbot(line_breaks=True)
103
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
@@ -117,8 +120,27 @@ def build_demo():
117
  bot, chatbot, chatbot, api_name="bot_response"
118
  )
119
 
120
-
121
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  return demo
124
 
 
1
  import gradio as gr
2
  import spaces
3
+ import os
4
  import time
5
  from PIL import Image
6
  from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
 
98
  with gr.Blocks() as demo:
99
 
100
  gr.Markdown(""" # Mantis
101
+ Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
102
+
103
+ | [Github](https://github.com/TIGER-AI-Lab/Mantis) | [Blog](https://tiger-ai-lab.github.io/Blog/mantis) | [Models](https://huggingface.co/collections/TIGER-Lab/mantis-6619b0834594c878cdb1d6e4) |
104
  """)
105
  chatbot = gr.Chatbot(line_breaks=True)
106
  chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload images. Please use <image> to indicate the position of uploaded images", show_label=True)
 
120
  bot, chatbot, chatbot, api_name="bot_response"
121
  )
122
 
123
+ gr.Examples(
124
+ examples=[
125
+ {
126
+ "text": "<image> <image> How many dices are there in each image?",
127
+ "files": ["./examples/image10.jpg", "./examples/image11.jpg"]
128
+ },
129
+ {
130
+ "text": "<image> <image> What's the difference between these two images? Please describe as much as you can.",
131
+ "files": ["./examples/image1.jpg", "./examples/image2.jpg"]
132
+ },
133
+ {
134
+ "text": "<image> <image> Which image shows an older dog?",
135
+ "files": ["./examples/image8.jpg", "./examples/image9.jpg"]
136
+ },
137
+ {
138
+ "text": "Write a description for the given image sequence in a single paragraph, what is happening in this episode?",
139
+ "files": ["./examples/image3.jpg", "./examples/image4.jpg", "./examples/image5.jpg", "./examples/image6.jpg", "./examples/image7.jpg"]
140
+ },
141
+ ],
142
+ inputs=[chat_input],
143
+ )
144
 
145
  return demo
146
 
examples/image1.jpg ADDED
examples/image10.jpg ADDED
examples/image11.jpg ADDED
examples/image2.jpg ADDED
examples/image3.jpg ADDED
examples/image4.jpg ADDED
examples/image5.jpg ADDED
examples/image6.jpg ADDED
examples/image7.jpg ADDED
examples/image8.jpg ADDED
examples/image9.jpg ADDED
models/mllava/__pycache__/__init__.cpython-39.pyc DELETED
Binary file (375 Bytes)
 
models/mllava/__pycache__/configuration_llava.cpython-39.pyc DELETED
Binary file (4.26 kB)
 
models/mllava/__pycache__/modeling_llava.cpython-39.pyc DELETED
Binary file (22.7 kB)
 
models/mllava/__pycache__/processing_llava.cpython-39.pyc DELETED
Binary file (10.8 kB)
 
models/mllava/__pycache__/utils.cpython-39.pyc DELETED
Binary file (3.24 kB)