Spaces:

mckabue
/

document-similarity-matching-using-visual-layout-features

Sleeping

App Files Files Community

document-similarity-matching-using-visual-layout-features / utils /get_features.py

mckabue

RE_UPLOAD-REBUILD-RESTART

93264c9 verified about 1 year ago

raw

history blame contribute delete

5.3 kB

	import layoutparser as lp
	from PIL import Image
	import tensorflow as tf
	import numpy as np
	import torch
	import torchvision.ops.boxes as box_ops
	from typing import List, Tuple
	from .split_image import split_image
	from .get_unique_values import get_unique_values

	def get_vectors(*,
	predicted_bboxes: List[Tuple[int, int, int, int]],
	predicted_scores: List[float],
	predicted_labels: List[str],
	label_names: List[str],
	sub_images_bboxes: List[Tuple[int, int, int, int]],
	index_start: int = 0.17,
	index_end: int = 1,
	weighted_jaccard_index = False):
	bboxes_tensor: torch.Tensor = torch.tensor(predicted_bboxes)
	labels_nonce = { value:key for key, value in zip(get_unique_values(start = index_start, end = index_end, count = len(label_names)), list(label_names)) }

	def get_vector(bbox: Tuple[int, int, int, int], region_nonce: int):
	# bbox: Expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
	bbox_tensor: torch.Tensor = torch.tensor([bbox])
	[jaccard_indexes] = box_ops.box_iou(bbox_tensor, bboxes_tensor)
	'''
	Either get the index of bounding box with largest jaccard_index (Intersection Over Union) or
	get the index of bounding box with largest jaccard_index (Intersection Over Union) multiplied by the score.
	By doing this we strike a balance between accuracy and relative position.
	'''
	index_of_jaccard_index = jaccard_indexes.argmax() if not weighted_jaccard_index else np.multiply(jaccard_indexes, predicted_scores).argmax()
	jaccard_index = jaccard_indexes[index_of_jaccard_index]
	jaccard_index_bbox_label__nonce = labels_nonce[predicted_labels[index_of_jaccard_index]]
	jaccard_index_bbox_score = predicted_scores[index_of_jaccard_index]
	vector = region_nonce * jaccard_index * jaccard_index_bbox_label__nonce * jaccard_index_bbox_score
	return vector.item()
	sub_images_nonces = get_unique_values(start = index_start, end = index_end, count = len(sub_images_bboxes))
	for sub_image_bbox, region_nonce in zip(sub_images_bboxes, sub_images_nonces):
	yield get_vector(sub_image_bbox, region_nonce)

	def get_predictions(
	image: Image.Image,
	model: lp.Detectron2LayoutModel,
	predictions_reducer = lambda *args: args):
	layout_predicted = model.detect(image)
	if len(layout_predicted) > 0:
	predicted_bboxes = [block.coordinates for block in layout_predicted]
	predicted_scores = [block.score for block in layout_predicted]
	predicted_labels = [block.type for block in layout_predicted]
	[predicted_bboxes, predicted_scores, predicted_labels] = predictions_reducer(
	predicted_bboxes,
	predicted_scores,
	predicted_labels)
	return {
	'predicted_bboxes': predicted_bboxes,
	'predicted_scores': predicted_scores,
	'predicted_labels': predicted_labels,
	}
	else:
	return {
	'predicted_bboxes': [],
	'predicted_scores': [],
	'predicted_labels': [],
	}

	def predictions_reducer(
	predicted_bboxes: List[Tuple[int, int, int, int]],
	predicted_scores: List[float],
	predicted_labels: List[str]):
	selected_indices = tf.image.non_max_suppression(
	boxes = predicted_bboxes,
	scores = predicted_scores ,
	max_output_size = len(predicted_bboxes),
	iou_threshold = 0.01)
	return {
	'predicted_bboxes': tf.gather(predicted_bboxes, selected_indices).numpy().tolist(), # List[List[int, int, int, int]]
	'predicted_scores': tf.gather(predicted_scores, selected_indices).numpy().astype(float).tolist(),
	'predicted_labels': tf.gather(predicted_labels, selected_indices).numpy().astype(str).tolist()
	}

	def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_names: List[str], width_parts = 100, height_parts = 100):
	predictions = get_predictions(image, model)
	reduced_predictions = predictions_reducer(**predictions)
	sub_images_bboxes = list(split_image(np.array(image), width_parts, height_parts, result = 'bboxes'))

	vectors = get_vectors(
	sub_images_bboxes = sub_images_bboxes,
	label_names = label_names,
	weighted_jaccard_index = False,
	**predictions)

	weighted_vectors = get_vectors(
	sub_images_bboxes = sub_images_bboxes,
	label_names = label_names,
	weighted_jaccard_index = True,
	**predictions)

	reduced_vectors = get_vectors(
	sub_images_bboxes = sub_images_bboxes,
	label_names = label_names,
	weighted_jaccard_index = False,
	**reduced_predictions)

	reduced_weighted_vectors = get_vectors(
	sub_images_bboxes = sub_images_bboxes,
	label_names = label_names,
	weighted_jaccard_index = True,
	**reduced_predictions)

	return {
	'predicted_bboxes': predictions['predicted_bboxes'],
	'predicted_scores': predictions['predicted_scores'],
	'predicted_labels': predictions['predicted_labels'],
	'vectors': list(vectors),
	'weighted_vectors': list(weighted_vectors),

	'reduced_predicted_bboxes': reduced_predictions['predicted_bboxes'],
	'reduced_predicted_scores': reduced_predictions['predicted_scores'],
	'reduced_predicted_labels': reduced_predictions['predicted_labels'],
	'reduced_vectors': list(reduced_vectors),
	'reduced_weighted_vectors': list(reduced_weighted_vectors),
	}