Spaces:

napatswift
/

table-det

Runtime error

table-det / main.py

napatswift

Change seq type

ba7e7bf over 1 year ago

3.83 kB

	from mmdet.apis import init_detector, inference_detector
	import gradio as gr
	import cv2
	import sys
	import torch
	import numpy as np
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	print('Loading model...')
	device = 'gpu' if torch.cuda.is_available() else 'cpu'

	table_det = init_detector('model/table-det/config.py',
	'model/table-det/model.pth', device=device)


	def get_corners(points):
	"""
	Returns the top-left, top-right, bottom-right, and bottom-left corners
	of a rectangle defined by a list of four points in the form of tuples.
	"""
	# Sort points by x-coordinate
	sorted_points = sorted(points, key=lambda p: p[0])

	# Split sorted points into left and right halves
	left_points = sorted_points[:2]
	right_points = sorted_points[2:]

	# Sort left and right points by y-coordinate
	left_points = sorted(left_points, key=lambda p: p[1])
	right_points = sorted(right_points, key=lambda p: p[1], reverse=True)

	# Return corners in order: top-left, top-right, bottom-right, bottom-left
	return (left_points[0], right_points[0], right_points[1], left_points[1])


	def get_bbox(mask_array):
	"""
	Gets the bounding boxes of tables in a mask array.

	Args:
	mask_array (numpy.ndarray): The mask array to be processed.

	Returns:
	list[tuple(int, int, int, int)]: A list of bounding boxes, where each bounding box is a tuple of (top left x, top left y, bottom right x, bottom right y).
	"""

	# Find the contours in the mask array.
	contours, hierarchy = cv2.findContours(
	mask_array, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

	# For each contour, get the bounding box.
	table_bboxes = []
	for cnt in contours:

	# Get the minimum area rectangle that encloses the contour.
	rect = cv2.minAreaRect(cnt)

	# Get the corners of the minimum area rectangle.
	box = cv2.boxPoints(rect)

	# Get the epsilon value, which is used to approximate the contour.
	epsilon = cv2.arcLength(cnt, True)

	# Approximate the contour using the epsilon value.
	approx = cv2.approxPolyDP(cnt, 0.02 * epsilon, True)

	# Get the points of the approximated contour.
	points = np.squeeze(approx)

	# If the number of points is not 4, then use the points of the minimum area rectangle.
	if len(points) != 4:
	points = box

	# Get the top left, bottom right, bottom left, and top right corners of the bounding box.
	tl, br, bl, tr = get_corners(points.tolist())

	# Add the bounding box to the list of bounding boxes.
	table_bboxes.append([tl, tr, br, bl])

	# Return the list of bounding boxes.
	return table_bboxes


	def predict(image_input):
	logger.info(f"Image input: {image_input}")

	# Inference the tables in the image.
	result = inference_detector(table_det, image_input)

	# Get the masks of the tables.
	mask_images = result.pred_instances.masks.cpu().numpy()
	scores = result.pred_instances.scores.cpu().numpy()
	bboxes = result.pred_instances.bboxes.cpu().numpy()

	logger.info(f"Result: {result}")

	bbox_list = []

	# Filter out the masks with a score less than 0.5.
	filtered_mask_images = mask_images[scores > 0.5]
	filtered_bboxes = bboxes[scores > 0.5]

	# Get the bounding boxes of the tables.
	for mask_image in filtered_mask_images:
	bbox_list.extend(get_bbox(mask_image.astype(np.uint8)))

	return {'rect-fit': bbox_list, 'bbox': filtered_bboxes.tolist()}


	def run():
	demo = gr.Interface(
	fn=predict,
	inputs=gr.components.Image(),
	outputs=gr.JSON(),
	)

	demo.launch(server_name="0.0.0.0", server_port=7860)


	if __name__ == "__main__":
	run()