import gradio as gr from llava_llama3.serve.cli import chat_llava from llava_llama3.model.builder import load_pretrained_model from PIL import Image import torch model_path = "TheFinAI/FinLLaVA" device = "cuda" conv_mode = "llama_3" temperature = 0 max_new_tokens = 512 load_8bit = False load_4bit = False tokenizer, llava_model, image_processor, context_len = load_pretrained_model( model_path, None, 'llava_llama3', load_8bit, load_4bit, device=device ) def predict(image, text): output = chat_llava( args=None, image_file=image, text=text, tokenizer=tokenizer, model=llava_model, image_processor=image_processor, context_len=context_len ) return output PLACEHOLDER = """

FinLLaVA

""" chatbot = gr.Chatbot(placeholder=None, scale=1) chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False) with gr.Blocks(fill_height=True) as demo: gr.ChatInterface( fn=predict, title="FinLLaVA", examples=[{"text": "What is on the flower?", "files": ["./bee.jpg"]}, {"text": "How to make this pastry?", "files": ["./baklava.png"]}], description="Try [LLaVA Llama-3-8B](https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-transformers). Upload an image and start chatting about it, or simply try one of the examples below. If you don't upload an image, you will receive an error.", stop_btn="Stop Generation", multimodal=True, textbox=chat_input, chatbot=chatbot, ) demo.queue(api_open=False) demo.launch(show_api=False, share=False)