VideoScore

Running on Zero

App Files Files Community

DongfuJiang commited on May 3

Commit

335eee6

•

1 Parent(s): c862a9f

update

Browse files

Files changed (4) hide show

app.py +9 -5
models/mllava/__init__.py +1 -1
models/mllava/utils.py +11 -3
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -4,10 +4,12 @@ import os
 import time
 from PIL import Image
 import functools
-from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava, MLlavaForConditionalGeneration
 from typing import List
 processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-8B-siglip-llama3")
 model = LlavaForConditionalGeneration.from_pretrained("TIGER-Lab/Mantis-8B-siglip-llama3")
 @spaces.GPU
 def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
@@ -15,7 +17,7 @@ def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
     model = model.to("cuda")
     if not images:
         images = None
-    for text, history in chat_mllava(text, images, model, processor, history=history, stream=True, **kwargs):
         yield text
     return text
@@ -38,15 +40,17 @@ def print_like_dislike(x: gr.LikeData):
 def get_chat_history(history):
     chat_history = []
     for i, message in enumerate(history):
         if isinstance(message[0], str):
-            chat_history.append({"role": "user", "text": message[0]})
             if i != len(history) - 1:
                 assert message[1], "The bot message is not provided, internal error"
-                chat_history.append({"role": "assistant", "text": message[1]})
             else:
                 assert not message[1], "the bot message internal error, get: {}".format(message[1])
-                chat_history.append({"role": "assistant", "text": ""})
     return chat_history

 import time
 from PIL import Image
 import functools
+from models.mllava import MLlavaProcessor, LlavaForConditionalGeneration, chat_mllava_stream, MLlavaForConditionalGeneration
+from models.conversation import conv_templates
 from typing import List
 processor = MLlavaProcessor.from_pretrained("TIGER-Lab/Mantis-8B-siglip-llama3")
 model = LlavaForConditionalGeneration.from_pretrained("TIGER-Lab/Mantis-8B-siglip-llama3")
+conv_template = conv_templates['llama_3']
 @spaces.GPU
 def generate(text:str, images:List[Image.Image], history: List[dict], **kwargs):
     model = model.to("cuda")
     if not images:
         images = None
+    for text, history in chat_mllava_stream(text, images, model, processor, history=history, **kwargs):
         yield text
     return text
 def get_chat_history(history):
     chat_history = []
+    user_role = conv_template.roles[0]
+    assistant_role = conv_template.roles[1]
     for i, message in enumerate(history):
         if isinstance(message[0], str):
+            chat_history.append({"role": user_role, "text": message[0]})
             if i != len(history) - 1:
                 assert message[1], "The bot message is not provided, internal error"
+                chat_history.append({"role": assistant_role, "text": message[1]})
             else:
                 assert not message[1], "the bot message internal error, get: {}".format(message[1])
+                chat_history.append({"role": assistant_role, "text": ""})
     return chat_history

models/mllava/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 from .modeling_llava import LlavaForConditionalGeneration, MLlavaForConditionalGeneration
 from .processing_llava import MLlavaProcessor
 from .configuration_llava import LlavaConfig
-from .utils import chat_mllava

 from .modeling_llava import LlavaForConditionalGeneration, MLlavaForConditionalGeneration
 from .processing_llava import MLlavaProcessor
 from .configuration_llava import LlavaConfig
+from .utils import chat_mllava, chat_mllava_stream

models/mllava/utils.py CHANGED Viewed

@@ -44,7 +44,6 @@ def chat_mllava(
     conv.messages = []
     if history is not None:
         for message in history:
-            message["role"] = message["role"].upper()
             assert message["role"] in conv.roles
             conv.append_message(message["role"], message["text"])
     else:
@@ -105,11 +104,20 @@ def chat_mllava_stream(
     """
-    conv = default_conv.copy()
     conv.messages = []
     if history is not None:
         for message in history:
-            message["role"] = message["role"].upper()
             assert message["role"] in conv.roles
             conv.append_message(message["role"], message["text"])
     else:

     conv.messages = []
     if history is not None:
         for message in history:
             assert message["role"] in conv.roles
             conv.append_message(message["role"], message["text"])
     else:
     """
+    if "llama-3" in model.language_model.name_or_path.lower():
+        conv = conv_templates['llama_3']
+        terminators = [
+            processor.tokenizer.eos_token_id,
+            processor.tokenizer.convert_tokens_to_ids("<|eot_id|>")
+        ]
+    else:
+        conv = default_conv
+        terminators = None
+    kwargs["eos_token_id"] = terminators
+    conv = conv.copy()
     conv.messages = []
     if history is not None:
         for message in history:
             assert message["role"] in conv.roles
             conv.append_message(message["role"], message["text"])
     else:

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ transformers
 Pillow
 gradio
 spaces
-multiprocess

 Pillow
 gradio
 spaces
+multiprocess
+flash-attn