Spaces:

Sarath2002
/

Form_Understanding_using_LayoutLMV3

Runtime error

App Files Files Community

Form_Understanding_using_LayoutLMV3 / support.py

Sarath2002

Update support.py

340cd2c about 1 year ago

raw

history blame

2.33 kB

	from datasets import load_dataset
	import numpy as np
	from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
	from datasets import load_dataset
	from PIL import Image, ImageDraw, ImageFont
	import torch

	tokenizer = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
	model = LayoutLMv3ForTokenClassification.from_pretrained(r"models")
	"""device = torch.device("cuda")
	model.cuda()
	"""
	labels = ['O', 'B-HEADER', 'I-HEADER', 'B-QUESTION', 'I-QUESTION', 'B-ANSWER', 'I-ANSWER']
	id2label = {v: k for v, k in enumerate(labels)}
	label2color = {
	"question": "blue",
	"answer": "green",
	"header": "orange",
	"other": "violet",
	}


	def unnormalize_box(bbox, width, height):
	return [
	width * (bbox[0] / 1000),
	height * (bbox[1] / 1000),
	width * (bbox[2] / 1000),
	height * (bbox[3] / 1000),
	]


	def iob_to_label(label):
	label = label[2:]
	if not label:
	return "other"
	return label


	def processor(image):
	image = image.convert("RGB")
	width, height = image.size


	# encode
	encoding = tokenizer(
	image, truncation=True, return_offsets_mapping=True, return_tensors="pt"
	)
	offset_mapping = encoding.pop("offset_mapping")

	encoding = encoding.to('cuda')

	# forward pass
	outputs = model(**encoding)

	# get predictions
	predictions = outputs.logits.argmax(-1).squeeze().tolist()
	token_boxes = encoding.bbox.squeeze().tolist()


	# only keep non-subword predictions
	is_subword = np.array(offset_mapping.squeeze().tolist())[:, 0] != 0
	true_predictions = [
	id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]
	]
	true_boxes = [
	unnormalize_box(box, width, height)
	for idx, box in enumerate(token_boxes)
	if not is_subword[idx]
	]



	draw = ImageDraw.Draw(image)
	font = ImageFont.load_default()
	for prediction, box in zip(true_predictions, true_boxes):
	predicted_label = iob_to_label(prediction).lower()
	draw.rectangle(box, outline=label2color[predicted_label])
	draw.text(
	(box[0] + 10, box[1] - 10),
	text=predicted_label,
	fill=label2color[predicted_label],
	font=font,
	)

	return image