FinLLaVA

Running on Zero

App Files Files Community

TobyYang7 commited on Aug 28, 2024

Commit

ea37c27

verified ·

1 Parent(s): ed5a7bf

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -31

app.py CHANGED Viewed

@@ -1,22 +1,22 @@
-import gradio as gr
 from llava_llama3.serve.cli import chat_llava
 from llava_llama3.model.builder import load_pretrained_model
-from PIL import Image
 import torch
 import spaces
 # Model configuration
-model_path = "TheFinAI/FinLLaVA"
-device = "cuda"
-conv_mode = "llama_3"
-temperature = 0
-max_new_tokens = 512
 load_8bit = False
 load_4bit = False
 # Load the pretrained model
 tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
-    model_path,
     None,
     'llava_llama3',
     load_8bit,
@@ -24,38 +24,67 @@ tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
     device=device
 )
-# Define the prediction function
 @spaces.GPU
-def bot_streaming(image, text, history):
     output = chat_llava(
         args=None,
         image_file=image,
-        text=text,
         tokenizer=tokenizer,
         model=llava_model,
         image_processor=image_processor,
         context_len=context_len
     )
-    history.append((text, output))
-    return history, gr.update(value="")
-# Create the Gradio interface
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot(label="FinLLaVA Chatbot")
-    image_input = gr.Image(type="filepath", label="Upload Image")
-    text_input = gr.Textbox(label="Enter your message")
-    submit_btn = gr.Button("Submit")
-    # Define interaction: when submit is clicked, call bot_streaming and update the chatbot
-    submit_btn.click(fn=bot_streaming, inputs=[image_input, text_input, chatbot], outputs=[chatbot, text_input])
-    # Add example inputs
-    gr.Examples(
-        examples=[["./bee.jpg", "What is on the flower?"],
-                  ["./baklava.png", "How to make this pastry?"]],
-        inputs=[image_input, text_input]
     )
-# Launch the Gradio app
 demo.queue(api_open=False)
-demo.launch(show_api=False, share=False)

+import time
+from threading import Thread
 from llava_llama3.serve.cli import chat_llava
 from llava_llama3.model.builder import load_pretrained_model
+import gradio as gr
 import torch
+from PIL import Image
 import spaces
 # Model configuration
+model_id = "TheFinAI/FinLLaVA"
+device = "cuda:0"
 load_8bit = False
 load_4bit = False
 # Load the pretrained model
 tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
+    model_id,
     None,
     'llava_llama3',
     load_8bit,
     device=device
 )
 @spaces.GPU
+def bot_streaming(message, history):
+    print(message)
+    image = None
+    # Check if there's an image in the current message
+    if message["files"]:
+        # message["files"][-1] could be a dictionary or a string
+        if isinstance(message["files"][-1], dict):
+            image = message["files"][-1]["path"]
+        else:
+            image = message["files"][-1]
+    else:
+        # If no image in the current message, look in the history for the last image
+        for hist in history:
+            if isinstance(hist[0], tuple):
+                image = hist[0][0]
+    # Error handling if no image is found
+    if image is None:
+        raise gr.Error("You need to upload an image for LLaVA to work.")
+    # Load the image
+    image = Image.open(image)
+    # Generate the prompt for the model
+    prompt = message['text']
+    # Call the chat_llava function to generate the output
     output = chat_llava(
         args=None,
         image_file=image,
+        text=prompt,
         tokenizer=tokenizer,
         model=llava_model,
         image_processor=image_processor,
         context_len=context_len
     )
+    # Stream the output
+    buffer = ""
+    for new_text in output:
+        buffer += new_text
+        yield buffer
+chatbot=gr.Chatbot(scale=1)
+chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
+with gr.Blocks(fill_height=True, ) as demo:
+    gr.ChatInterface(
+    fn=bot_streaming,
+    title="LLaVA Llama-3-8B",
+    examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
+              {"text": "How to make this pastry?", "files": ["./baklava.png"]}],
+    stop_btn="Stop Generation",
+    multimodal=True,
+    textbox=chat_input,
+    chatbot=chatbot,
     )
 demo.queue(api_open=False)
+demo.launch(show_api=False, share=False)