DocVQA-Sanctum / pix2struct.py
krishnapal2308
Initial Commit
0bd5bed
raw
history blame
618 Bytes
from PIL import Image
from transformers import Pix2StructForConditionalGeneration as psg
from transformers import Pix2StructProcessor as psp
def get_result(image_path, question):
model = psg.from_pretrained("google/pix2struct-docvqa-large")
processor = psp.from_pretrained("google/pix2struct-docvqa-large")
image = Image.open(image_path).convert("RGB")
inputs = processor(images=image, text=question, return_tensors="pt")
predictions = model.generate(**inputs, max_new_tokens=256)
predicted_answer = processor.batch_decode(predictions, skip_special_tokens=True)
return predicted_answer