FinLLaVA

import gradio as gr
from llava_llama3.serve.cli import chat_llava
from llava_llama3.model.builder import load_pretrained_model
from PIL import Image
import torch

model_path = "TheFinAI/FinLLaVA"
device = "cuda"
conv_mode = "llama_3"
temperature = 0
max_new_tokens = 512
load_8bit = False
load_4bit = False

tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
    model_path, 
    None, 
    'llava_llama3', 
    load_8bit, 
    load_4bit, 
    device=device
)

def predict(image, text):
    output = chat_llava(
        args=None,
        image_file=image,
        text=text,
        tokenizer=tokenizer,
        model=llava_model,
        image_processor=image_processor,
        context_len=context_len
    )
    return output

PLACEHOLDER = """
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
   <h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">FinLLaVA</h1>
</div>
"""

chatbot = gr.Chatbot(placeholder=None, scale=1)
chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)

with gr.Blocks(fill_height=True) as demo:
    gr.ChatInterface(
        fn=predict,
        title="FinLLaVA",
        examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]},
                  {"text": "How to make this pastry?", "files": ["./baklava.png"]}],
        description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.",
        stop_btn="Stop Generation",
        multimodal=True,
        textbox=chat_input,
        chatbot=chatbot,
    )

demo.queue(api_open=False)
demo.launch(show_api=False, share=False)