FinLLaVA

Running on Zero

App Files Files Community

TobyYang7 commited on Aug 29

Commit

afff347

•

1 Parent(s): daee25b

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -73

app.py CHANGED Viewed

@@ -1,20 +1,37 @@
 from threading import Thread
-from llava_llama3.serve.cli import chat_llava
-from llava_llama3.model.builder import load_pretrained_model
 import gradio as gr
 import torch
 from PIL import Image
 import argparse
-import spaces
 import os
-import time
 root_path = os.path.dirname(os.path.abspath(__file__))
-print(root_path)
 parser = argparse.ArgumentParser()
-parser.add_argument("--model-path", type=str, default="TheFinAI/FinLLaVA")
-parser.add_argument("--device", type=str, default="cuda:0")
 parser.add_argument("--conv-mode", type=str, default="llama_3")
 parser.add_argument("--temperature", type=float, default=0.7)
 parser.add_argument("--max-new-tokens", type=int, default=512)
@@ -22,87 +39,56 @@ parser.add_argument("--load-8bit", action="store_true")
 parser.add_argument("--load-4bit", action="store_true")
 args = parser.parse_args()
-# load model
 tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
     args.model_path,
     None,
     'llava_llama3',
     args.load_8bit,
     args.load_4bit,
-    device=args.device
-)
-@spaces.GPU
 def bot_streaming(message, history):
     print(message)
-    image_path = None
-    # Check if there's an image in the current message
     if message["files"]:
-        # message["files"][-1] could be a dictionary or a string
-        if isinstance(message["files"][-1], dict):
-            image_path = message["files"][-1]["path"]
         else:
-            image_path = message["files"][-1]
     else:
-        # If no image in the current message, look in the history for the last image path
         for hist in history:
-            if isinstance(hist[0], tuple):
-                image_path = hist[0][0]
-    # Error handling if no image path is found
-    if image_path is None:
-        raise gr.Error("You need to upload an image for LLaVA to work.")
-    # If the image_path is a string, no need to load it into a PIL image
-    # Just use the path directly in the next steps
-    print(f"\033[91m{image_path}, {type(image_path)}\033[0m")
-    # Generate the prompt for the model
-    prompt = message['text']
-    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-    # Set up the generation arguments, including the streamer
-    generation_kwargs = dict(
-        args=args,
-        image_file=image_path,
-        text=prompt,
-        tokenizer=tokenizer,
-        model=llava_model,
-        streamer=streamer,
-        image_processor=image_processor,  # todo: input model name or path
-        context_len=context_len)
-    # Define the function to call `chat_llava` with the given arguments
-    def generate_output(generation_kwargs):
-        chat_llava(**generation_kwargs)
-    # Start the generation in a separate thread
-    thread = Thread(target=generate_output, kwargs=generation_kwargs)
     thread.start()
-    # Initialize a buffer to accumulate the generated text
     buffer = ""
-    # Allow the generation to start
-    time.sleep(0.5)
-    # Iterate over the streamer to handle the incoming text in chunks
     for new_text in streamer:
-        # Look for the end of text token and remove it
-        if "<|eot_id|>" in new_text:
-            new_text = new_text.split("<|eot_id|>")[0]
-        # Add the new text to the buffer
         buffer += new_text
-        # Remove the prompt from the generated text (if necessary)
-        generated_text_without_prompt = buffer[len(prompt):]
-        # Simulate processing time (optional)
         time.sleep(0.06)
-        # Yield the current generated text for further processing or display
         yield generated_text_without_prompt
 chatbot = gr.Chatbot(scale=1)
@@ -110,10 +96,11 @@ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeh
 with gr.Blocks(fill_height=True) as demo:
     gr.ChatInterface(
         fn=bot_streaming,
-        title="FinLLaVA",
-        examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
-                  {"text": "How to make this pastry?", "files": ["./baklava.png"]},
-                  {"text":"What is this?","files":["http://images.cocodataset.org/val2017/000000039769.jpg"]}],
         stop_btn="Stop Generation",
         multimodal=True,
         textbox=chat_input,

+import time
 from threading import Thread
 import gradio as gr
 import torch
 from PIL import Image
+from transformers import AutoProcessor, LlavaForConditionalGeneration, TextIteratorStreamer, TextStreamer
+# import spaces
 import argparse
+from llava_llama3.model.builder import load_pretrained_model
+from llava_llama3.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
+from llava_llama3.conversation import conv_templates, SeparatorStyle
+from llava_llama3.utils import disable_torch_init
+from llava_llama3.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path
+from llava_llama3.serve.cli import chat_llava
+import requests
+from io import BytesIO
+import base64
 import os
+import glob
+import pandas as pd
+from tqdm import tqdm
+import json
 root_path = os.path.dirname(os.path.abspath(__file__))
+print(f'\033[92m{root_path}\033[0m')
+os.environ['GRADIO_TEMP_DIR'] = root_path
 parser = argparse.ArgumentParser()
+parser.add_argument("--model-path", type=str, default="/mnt/nvme1n1/toby/LLaVA/checkpoints/0806_onlyllava_llava-finma-8B-v0.4-v8/checkpoint-2000")
+parser.add_argument("--device", type=str, default="cuda")
 parser.add_argument("--conv-mode", type=str, default="llama_3")
 parser.add_argument("--temperature", type=float, default=0.7)
 parser.add_argument("--max-new-tokens", type=int, default=512)
 parser.add_argument("--load-4bit", action="store_true")
 args = parser.parse_args()
+# Load model
 tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
     args.model_path,
     None,
     'llava_llama3',
     args.load_8bit,
     args.load_4bit,
+    device=args.device)
 def bot_streaming(message, history):
     print(message)
+    image_file = None
     if message["files"]:
+        if type(message["files"][-1]) == dict:
+            image_file = message["files"][-1]["path"]
         else:
+            image_file = message["files"][-1]
     else:
         for hist in history:
+            if type(hist[0]) == tuple:
+                image_file = hist[0][0]
+    if image_file is None:
+        gr.Error("You need to upload an image for LLaVA to work.")
+        return
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+    def generate():
+        print('\033[92mRunning chat\033[0m')
+        output = chat_llava(
+                    args=args,
+                    image_file=image_file,
+                    text=message['text'],
+                    tokenizer=tokenizer,
+                    model=llava_model,
+                    image_processor=image_processor,
+                    context_len=context_len,
+                    streamer=streamer)
+        return output
+    thread = Thread(target=generate)
     thread.start()
+    # thread.join()
     buffer = ""
+    # output = generate()
     for new_text in streamer:
         buffer += new_text
+        generated_text_without_prompt = buffer
         time.sleep(0.06)
         yield generated_text_without_prompt
 chatbot = gr.Chatbot(scale=1)
 with gr.Blocks(fill_height=True) as demo:
     gr.ChatInterface(
         fn=bot_streaming,
+        title="FinLLaVA Demo",
+        examples=[
+            {"text": "What is in this picture?", "files": ["http://images.cocodataset.org/val2017/000000039769.jpg"]},
+        ],
+        description="",
         stop_btn="Stop Generation",
         multimodal=True,
         textbox=chat_input,