DocVQA-Sanctum / pix2struct.py
krishnapal2308
adding description and pix2struct output fix
0f51c16
raw
history blame
No virus
621 Bytes
from PIL import Image
from transformers import Pix2StructForConditionalGeneration as psg
from transformers import Pix2StructProcessor as psp
def get_result(image_path, question):
model = psg.from_pretrained("google/pix2struct-docvqa-large")
processor = psp.from_pretrained("google/pix2struct-docvqa-large")
image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=256)
predicted_answer = processor.batch_decode(predictions, skip_special_tokens=True)
return predicted_answer[0]