nbroad
/

p2s-infographic-lg-endpt

Inference Endpoints

Model card Files Files and versions Community

p2s-infographic-lg-endpt / handler.py

nbroad's picture

nbroad HF staff

Upload 3 files

2cdc125 7 months ago

raw history blame

No virus

1.75 kB

	import base64
	from io import BytesIO
	from typing import Dict, List, Any
	from transformers import Pix2StructForConditionalGeneration, AutoProcessor
	from PIL import Image
	import torch

	class EndpointHandler():

	def __init__(self):

	model_name = "google/pix2struct-infographics-vqa-large"


	self.model = Pix2StructForConditionalGeneration.from_pretrained(model_name)
	self.processor = AutoProcessor.from_pretrained(model_name)
	self.text_prompt = None #

	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	self.model.to(self.device)


	def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
	"""
	Args:
	data (:obj:):
	includes the input data and the parameters for the inference.
	Return:
	a dictionary with the output of the model. The only key is `output` and the
	value is a list of str.
	"""
	inputs = data.pop("inputs", data)
	parameters = data.pop("parameters", {})

	if isinstance(inputs["image"], list):
	img = [Image.open(BytesIO(base64.b64decode(img))) for img in inputs['image']]
	else:
	img = Image.open(BytesIO(base64.b64decode(inputs['image'])))

	question = inputs['question']



	with torch.inference_mode():
	model_inputs = self.processor(images=img, text=question, return_tensors="pt").to(self.device)

	raw_output = self.model.generate(model_inputs, parameters)

	decoded_output = self.processor.batch_decode(raw_output, skip_special_tokens=True)


	# postprocess the prediction
	return {
	"output": decoded_output
	}