from fastapi import FastAPI from fastapi.responses import RedirectResponse # from transformers import pipeline from PIL import Image import requests from transformers import AutoProcessor, Pix2StructForConditionalGeneration processor = AutoProcessor.from_pretrained("google/pix2struct-textcaps-base") model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-textcaps-base") app = FastAPI() @app.get("/") async def docs_redirect(): return RedirectResponse(url='/docs') @app.get("/generate") def generate(url: str): """ https://huggingface.co/docs/transformers/main/en/model_doc/pix2struct#transformers.Pix2StructForConditionalGeneration https://huggingface.co/google/pix2struct-widget-captioning-large/blob/main/README.md """ # url = "https://www.ilankelman.org/stopsigns/australia.jpg" image = Image.open(requests.get(url, stream=True).raw) inputs = processor(images=image, return_tensors="pt") # autoregressive generation generated_ids = model.generate(**inputs, max_new_tokens=50) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] print(generated_text) # conditional generation text = "A picture of" inputs = processor(text=text, images=image, return_tensors="pt", add_special_tokens=False) generated_ids1 = model.generate(**inputs, max_new_tokens=50) generated_text1 = processor.batch_decode(generated_ids1, skip_special_tokens=True)[0] return {"output": {"autoregressive": generated_text, "conditional": generated_text1},}