Spaces:

vilarin
/

VL-Chatbox

Running on Zero

App Files Files Community

vilarin commited on May 28

Commit

b13c502

•

1 Parent(s): ad20a0f

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -81

app.py CHANGED Viewed

@@ -1,11 +1,12 @@
 from PIL import Image
 import gradio as gr
 import spaces
 import os
-from huggingface_hub import hf_hub_download
-import base64
-from llama_cpp import Llama
-from llama_cpp.llama_chat_format import Llava15ChatHandler
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 MODEL_LIST = ["openbmb/MiniCPM-Llama3-V-2_5","openbmb/MiniCPM-Llama3-V-2_5-int4"]
@@ -26,108 +27,64 @@ CSS = """
 }
 """
-chat_handler = Llava15ChatHandler.from_pretrained(
-    repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
-    filename="*mmproj*",
-)
-llm = Llama.from_pretrained(
-  repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
-  filename="ggml-model-Q5_K_M.gguf",
-  chat_handler=chat_handler,
-  n_ctx=4096,
-  verbose=True
-)
-'''
-filenames = [
-    "*mmproj*",
-    "ggml-model-Q5_K_M.gguf"
-]
-for filename in filenames:
-    downloaded_model_path = hf_hub_download(
-        repo_id="openbmb/MiniCPM-Llama3-V-2_5-gguf",
-        filename=filename,
-        local_dir="model"
-    )
-'''
-def image_to_base64_data_uri(file_path):
-    with open(file_path, "rb") as img_file:
-        base64_data = base64.b64encode(img_file.read()).decode('utf-8')
-        return f"data:image/png;base64,{base64_data}"
-@spaces.GPU(queue=False)
 def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
     print(f'message is - {message}')
     print(f'history is - {history}')
-    messages = []
     if message["files"]:
-        image = message["files"][-1]
-        messages.append({
-            "role": "user",
-            "content": [
-                {"type": "text", "text": message['text']},
-                {"type": "image_url", "image_url":{"url": image}}
-            ]
-        })
     else:
         if len(history) == 0:
             raise gr.Error("Please upload an image first.")
             image = None
         else:
-            image = history[0][0][0]
             for prompt, answer in history:
                 if answer is None:
-                    messages.extend([{
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": prompt},
-                            {"type": "image_url", "image_url": {"url": image}}
-                        ]
-                    },{
-                        "role": "assistant",
-                        "content": ""
-                    }])
                 else:
-                    messages.extend([{
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": prompt},
-                            {"type": "image_url", "image_url": {"url": image}}
-                        ]
-                    }, {
-                        "role": "assistant",
-                        "content": answer
-                    }])
-            messages.append({"role": "user", "content": message['text']})
-    print(f"Messages is -\n{messages}")
-    response = llm.create_chat_completion(
-        messages = messages,
         temperature=temperature,
-        max_tokens=max_new_tokens,
     )
-    return response["choices"][0]["text"]
 chatbot = gr.Chatbot(height=450)
 chat_input = gr.MultimodalTextbox(
-    interactive=True,
-    file_types=["image"],
-    placeholder="Enter message or upload file...",
     show_label=False,
 )
 EXAMPLES = [
-        [{"text": "What is on the desk?", "files": ["./laptop.jpg"]}],
-        [{"text": "Where it is?", "files": ["./hotel.jpg"]}],
-        [{"text": "Can yo describe this image?", "files": ["./spacecat.png"]}]
 ]
 with gr.Blocks(css=CSS) as demo:

+from threading import Thread
+import torch
 from PIL import Image
 import gradio as gr
 import spaces
+from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer
 import os
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 MODEL_LIST = ["openbmb/MiniCPM-Llama3-V-2_5","openbmb/MiniCPM-Llama3-V-2_5-int4"]
 }
 """
+model = AutoModel.from_pretrained(
+    MODEL_ID,
+    torch_dtype=torch.float16,
+    trust_remote_code=True
+).to(0)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model.eval()
+@spaces.GPU()
 def stream_chat(message, history: list, temperature: float, max_new_tokens: int):
     print(f'message is - {message}')
     print(f'history is - {history}')
+    conversation = []
     if message["files"]:
+        image = Image.open(message["files"][-1]).convert('RGB')
+        conversation.append({"role": "user", "content": message['text']})
     else:
         if len(history) == 0:
             raise gr.Error("Please upload an image first.")
             image = None
         else:
+            image = Image.open(history[0][0][0])
             for prompt, answer in history:
                 if answer is None:
+                    conversation.extend([{"role": "user", "content": prompt},{"role": "assistant", "content": ""}])
                 else:
+                    conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
+            conversation.append({"role": "user", "content": message['text']})
+    print(f"Conversation is -\n{conversation}")
+    generate_kwargs = dict(
+        image=image,
+        msgs=conversation,
+        max_new_tokens=max_new_tokens,
         temperature=temperature,
+        sampling=True,
+        tokenizer=tokenizer,
     )
+    if temperature == 0:
+        generate_kwargs["sampling"] = False
+    response = model.chat(**generate_kwargs)
+    return response
 chatbot = gr.Chatbot(height=450)
 chat_input = gr.MultimodalTextbox(
+    interactive=True,
+    file_types=["image"],
+    placeholder="Enter message or upload file...",
     show_label=False,
 )
 EXAMPLES = [
+        [{"text": "Describe it in great detailed.", "files": ["./laptop.jpg"]}],
+        [{"text": "Describe it in great detailed.", "files": ["./hotel.jpg"]}],
+        [{"text": "Describe it in great detailed.", "files": ["./spacecat.png"]}]
 ]
 with gr.Blocks(css=CSS) as demo: