DongfuJiang commited on
Commit
75c15ae
1 Parent(s): 3f7f343
Files changed (2) hide show
  1. app.py +1 -1
  2. models/mllava/utils.py +1 -1
app.py CHANGED
@@ -109,7 +109,7 @@ def build_demo():
109
  gr.Markdown(""" # Mantis
110
  Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
111
 
112
- ### [Paper](https://arxiv.org/abs/2405.01483) | [Github](https://github.com/TIGER-AI-Lab/Mantis) | [Models](https://huggingface.co/collections/TIGER-Lab/mantis-6619b0834594c878cdb1d6e4) | [Dataset](https://huggingface.co/datasets/TIGER-Lab/Mantis-Instruct)
113
  """)
114
 
115
  gr.Markdown("""## Chat with Mantis
 
109
  gr.Markdown(""" # Mantis
110
  Mantis is a multimodal conversational AI model that can chat with users about images and text. It's optimized for multi-image reasoning, where inverleaved text and images can be used to generate responses.
111
 
112
+ ### [Paper](https://arxiv.org/abs/2405.01483) | [Github](https://github.com/TIGER-AI-Lab/Mantis) | [Models](https://huggingface.co/collections/TIGER-Lab/mantis-6619b0834594c878cdb1d6e4) | [Dataset](https://huggingface.co/datasets/TIGER-Lab/Mantis-Instruct) | [Website](https://tiger-ai-lab.github.io/Mantis/)
113
  """)
114
 
115
  gr.Markdown("""## Chat with Mantis
models/mllava/utils.py CHANGED
@@ -55,7 +55,7 @@ def chat_mllava(
55
  if images:
56
  for i in range(len(images)):
57
  if isinstance(images[i], str):
58
- images[i] = PIL.Image.open(images[i])
59
 
60
  inputs = processor(images=images, text=prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
61
  for k, v in inputs.items():
 
55
  if images:
56
  for i in range(len(images)):
57
  if isinstance(images[i], str):
58
+ images[i] = PIL.Image.open(images[i]).convert("RGB")
59
 
60
  inputs = processor(images=images, text=prompt, return_tensors="pt", truncation=True, max_length=max_input_length)
61
  for k, v in inputs.items():