import gradio as gr gr.load("models/4bit/llava-v1.5-13b-4GB-8bit").launch()