|
--- |
|
library_name: transformers |
|
license: apache-2.0 |
|
pipeline_tag: image-to-text |
|
--- |
|
|
|
# BLIP-Image-to-recip |
|
|
|
|
|
|
|
# Inference code |
|
|
|
import requests |
|
from PIL import Image |
|
|
|
|
|
|
|
from transformers import BlipForConditionalGeneration, AutoProcessor |
|
|
|
img_url = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcSQuFg4LTHUattLGPU0kLzYpBGHRtuqgJY8Gho3uZe_cg&s' |
|
image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB') |
|
|
|
model = BlipForConditionalGeneration.from_pretrained("Fatehmujtaba/BLIP-Image-to-recipe").to(device) |
|
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
|
|
|
inputs = processor(images=image, return_tensors="pt").to(device) |
|
pixel_values = inputs.pixel_values |
|
generated_ids = model.generate(pixel_values=pixel_values, max_length=50) |
|
generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
|
|