tatr-demo

Running

App Files Files Community

tatr-demo / app.py

nielsr HF staff

Create app.py

f1d8127 7 months ago

raw history blame

No virus

4.72 kB

	import matplotlib.pyplot as plt
	import matplotlib.patches as patches
	from matplotlib.patches import Patch
	import io
	from PIL import Image

	from transformers import TableTransformerImageProcessor, AutoModelForObjectDetection
	import torch

	import gradio as gr

	# load table detection model
	processor = TableTransformerImageProcessor(max_size=800)
	model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-detection", revision="no_timm")


	# for output bounding box post-processing
	def box_cxcywh_to_xyxy(x):
	x_c, y_c, w, h = x.unbind(-1)
	b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
	return torch.stack(b, dim=1)


	def rescale_bboxes(out_bbox, size):
	img_w, img_h = size
	b = box_cxcywh_to_xyxy(out_bbox)
	b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
	return b


	def outputs_to_objects(outputs, img_size, id2label):
	m = outputs.logits.softmax(-1).max(-1)
	pred_labels = list(m.indices.detach().cpu().numpy())[0]
	pred_scores = list(m.values.detach().cpu().numpy())[0]
	pred_bboxes = outputs['pred_boxes'].detach().cpu()[0]
	pred_bboxes = [elem.tolist() for elem in rescale_bboxes(pred_bboxes, img_size)]

	objects = []
	for label, score, bbox in zip(pred_labels, pred_scores, pred_bboxes):
	class_label = id2label[int(label)]
	if not class_label == 'no object':
	objects.append({'label': class_label, 'score': float(score),
	'bbox': [float(elem) for elem in bbox]})

	return objects


	def fig2img(fig):
	"""Convert a Matplotlib figure to a PIL Image and return it"""
	buf = io.BytesIO()
	fig.savefig(buf)
	buf.seek(0)
	img = Image.open(buf)
	return img


	def visualize_detected_tables(img, det_tables):
	plt.imshow(img, interpolation="lanczos")
	fig = plt.gcf()
	fig.set_size_inches(20, 20)
	ax = plt.gca()

	for det_table in det_tables:
	bbox = det_table['bbox']

	if det_table['label'] == 'table':
	facecolor = (1, 0, 0.45)
	edgecolor = (1, 0, 0.45)
	alpha = 0.3
	linewidth = 2
	hatch='//////'
	elif det_table['label'] == 'table rotated':
	facecolor = (0.95, 0.6, 0.1)
	edgecolor = (0.95, 0.6, 0.1)
	alpha = 0.3
	linewidth = 2
	hatch='//////'
	else:
	continue

	rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=linewidth,
	edgecolor='none',facecolor=facecolor, alpha=0.1)
	ax.add_patch(rect)
	rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=linewidth,
	edgecolor=edgecolor,facecolor='none',linestyle='-', alpha=alpha)
	ax.add_patch(rect)
	rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=0,
	edgecolor=edgecolor,facecolor='none',linestyle='-', hatch=hatch, alpha=0.2)
	ax.add_patch(rect)

	plt.xticks([], [])
	plt.yticks([], [])

	legend_elements = [Patch(facecolor=(1, 0, 0.45), edgecolor=(1, 0, 0.45),
	label='Table', hatch='//////', alpha=0.3),
	Patch(facecolor=(0.95, 0.6, 0.1), edgecolor=(0.95, 0.6, 0.1),
	label='Table (rotated)', hatch='//////', alpha=0.3)]
	plt.legend(handles=legend_elements, bbox_to_anchor=(0.5, -0.02), loc='upper center', borderaxespad=0,
	fontsize=10, ncol=2)
	plt.gcf().set_size_inches(10, 10)
	plt.axis('off')

	return fig


	def detect_table(image):
	# prepare image for the model
	pixel_values = processor(image, return_tensors="pt").pixel_values

	# forward pass
	with torch.no_grad():
	outputs = model(pixel_values)

	# postprocess to get detected tables
	id2label = model.config.id2label
	id2label[len(model.config.id2label)] = "no object"
	detected_tables = outputs_to_objects(outputs, image.size, id2label)

	# visualize
	fig = visualize_detected_tables(img, detected_tables)
	image = fig2img(fig)

	return image


	title = "Demo: table detection with Table Transformer"
	description = "Demo for the Table Transformer (TATR)."
	examples =[['example_pdf.jpg']]

	interface = gr.Interface(fn=detect_table,
	inputs=gr.Image(type="pil"),
	outputs=gr.Image(type="pil", label="Detected table"),
	title=title,
	description=description,
	examples=examples,
	enable_queue=True)
	interface.launch(debug=True)