import gradio as gr from transformers import pipeline, BitsAndBytesConfig import torch quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16 ) model_id = "llava-hf/llava-1.5-7b-hf" pipe = pipeline("image-to-text", model=model_id, model_kwargs={"quantization_config": quantization_config} ) def launch(image, prompt): prompt = f"USER: \n{prompt}\nASSISTANT:" outputs = pipe(image, prompt=prompt, generate_kwargs={"max_new_tokens": 200}) return out[0]['generated_text'] iface = gr.Interface(launch, inputs=[gr.Image(type='pil'), 'text'], outputs="text") iface.launch()