--- license: apache-2.0 --- ``` pip install git+https://github.com/huggingface/transformers ``` ``` from transformers import AutoProcessor, PaliGemmaForConditionalGeneration from PIL import Image import requests import torch model_id = "gokaygokay/imageinwords-paligemma-transformers" url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true" image = Image.open(requests.get(url, stream=True).raw) model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).eval() processor = AutoProcessor.from_pretrained(model_id) ## prefix prompt = "caption en" model_inputs = processor(text=prompt, images=image, return_tensors="pt") input_len = model_inputs["input_ids"].shape[-1] with torch.inference_mode(): generation = model.generate(**model_inputs, max_new_tokens=512, do_sample=False) generation = generation[0][input_len:] decoded = processor.decode(generation, skip_special_tokens=True) print(decoded) ```