from transformers import AutoProcessor from unsloth import UnslothModel from PIL import Image import gradio as gr import torch model, tokenizer = UnslothModel.from_pretrained( model_name="unsloth/Llama-3.2-11B-Vision-bnb-4bit", adapter_path="ArnavLatiyan/my-lora-leaf-model", load_in_4bit=True ) processor = AutoProcessor.from_pretrained("unsloth/llama-3.2-vision-11b") def predict(image): inputs = processor(images=image, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") prompt = tokenizer("What disease is this?", return_tensors="pt").to(inputs["pixel_values"].device) inputs.update(prompt) outputs = model.generate(**inputs, max_new_tokens=50) return tokenizer.decode(outputs[0], skip_special_tokens=True) gr.Interface(fn=predict, inputs="image", outputs="text").launch()