|
import layoutparser as lp |
|
from PIL import Image |
|
import tensorflow as tf |
|
import numpy as np |
|
import torch |
|
import torchvision.ops.boxes as box_ops |
|
from typing import List, Tuple |
|
from .split_image import split_image |
|
from .get_unique_values import get_unique_values |
|
|
|
def get_vectors(*, |
|
predicted_bboxes: List[Tuple[int, int, int, int]], |
|
predicted_scores: List[float], |
|
predicted_labels: List[str], |
|
label_names: List[str], |
|
sub_images_bboxes: List[Tuple[int, int, int, int]], |
|
index_start: int = 0.17, |
|
index_end: int = 1, |
|
weighted_jaccard_index = False): |
|
bboxes_tensor: torch.Tensor = torch.tensor(predicted_bboxes) |
|
labels_nonce = { value:key for key, value in zip(get_unique_values(start = index_start, end = index_end, count = len(label_names)), list(label_names)) } |
|
|
|
def get_vector(bbox: Tuple[int, int, int, int], region_nonce: int): |
|
|
|
bbox_tensor: torch.Tensor = torch.tensor([bbox]) |
|
[jaccard_indexes] = box_ops.box_iou(bbox_tensor, bboxes_tensor) |
|
''' |
|
Either get the index of bounding box with largest jaccard_index (Intersection Over Union) or |
|
get the index of bounding box with largest jaccard_index (Intersection Over Union) multiplied by the score. |
|
By doing this we strike a balance between accuracy and relative position. |
|
''' |
|
index_of_jaccard_index = jaccard_indexes.argmax() if not weighted_jaccard_index else np.multiply(jaccard_indexes, predicted_scores).argmax() |
|
jaccard_index = jaccard_indexes[index_of_jaccard_index] |
|
jaccard_index_bbox_label__nonce = labels_nonce[predicted_labels[index_of_jaccard_index]] |
|
jaccard_index_bbox_score = predicted_scores[index_of_jaccard_index] |
|
vector = region_nonce * jaccard_index * jaccard_index_bbox_label__nonce * jaccard_index_bbox_score |
|
return vector.item() |
|
sub_images_nonces = get_unique_values(start = index_start, end = index_end, count = len(sub_images_bboxes)) |
|
for sub_image_bbox, region_nonce in zip(sub_images_bboxes, sub_images_nonces): |
|
yield get_vector(sub_image_bbox, region_nonce) |
|
|
|
def get_predictions( |
|
image: Image.Image, |
|
model: lp.Detectron2LayoutModel, |
|
predictions_reducer = lambda *args: args): |
|
layout_predicted = model.detect(image) |
|
if len(layout_predicted) > 0: |
|
predicted_bboxes = [block.coordinates for block in layout_predicted] |
|
predicted_scores = [block.score for block in layout_predicted] |
|
predicted_labels = [block.type for block in layout_predicted] |
|
[predicted_bboxes, predicted_scores, predicted_labels] = predictions_reducer( |
|
predicted_bboxes, |
|
predicted_scores, |
|
predicted_labels) |
|
return { |
|
'predicted_bboxes': predicted_bboxes, |
|
'predicted_scores': predicted_scores, |
|
'predicted_labels': predicted_labels, |
|
} |
|
else: |
|
return { |
|
'predicted_bboxes': [], |
|
'predicted_scores': [], |
|
'predicted_labels': [], |
|
} |
|
|
|
def predictions_reducer( |
|
predicted_bboxes: List[Tuple[int, int, int, int]], |
|
predicted_scores: List[float], |
|
predicted_labels: List[str]): |
|
selected_indices = tf.image.non_max_suppression( |
|
boxes = predicted_bboxes, |
|
scores = predicted_scores , |
|
max_output_size = len(predicted_bboxes), |
|
iou_threshold = 0.01) |
|
return { |
|
'predicted_bboxes': tf.gather(predicted_bboxes, selected_indices).numpy().tolist(), |
|
'predicted_scores': tf.gather(predicted_scores, selected_indices).numpy().astype(float).tolist(), |
|
'predicted_labels': tf.gather(predicted_labels, selected_indices).numpy().astype(str).tolist() |
|
} |
|
|
|
def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_names: List[str], width_parts = 100, height_parts = 100): |
|
predictions = get_predictions(image, model) |
|
reduced_predictions = predictions_reducer(**predictions) |
|
sub_images_bboxes = list(split_image(np.array(image), width_parts, height_parts, result = 'bboxes')) |
|
|
|
vectors = get_vectors( |
|
sub_images_bboxes = sub_images_bboxes, |
|
label_names = label_names, |
|
weighted_jaccard_index = False, |
|
**predictions) |
|
|
|
weighted_vectors = get_vectors( |
|
sub_images_bboxes = sub_images_bboxes, |
|
label_names = label_names, |
|
weighted_jaccard_index = True, |
|
**predictions) |
|
|
|
reduced_vectors = get_vectors( |
|
sub_images_bboxes = sub_images_bboxes, |
|
label_names = label_names, |
|
weighted_jaccard_index = False, |
|
**reduced_predictions) |
|
|
|
reduced_weighted_vectors = get_vectors( |
|
sub_images_bboxes = sub_images_bboxes, |
|
label_names = label_names, |
|
weighted_jaccard_index = True, |
|
**reduced_predictions) |
|
|
|
return { |
|
'predicted_bboxes': predictions['predicted_bboxes'], |
|
'predicted_scores': predictions['predicted_scores'], |
|
'predicted_labels': predictions['predicted_labels'], |
|
'vectors': list(vectors), |
|
'weighted_vectors': list(weighted_vectors), |
|
|
|
'reduced_predicted_bboxes': reduced_predictions['predicted_bboxes'], |
|
'reduced_predicted_scores': reduced_predictions['predicted_scores'], |
|
'reduced_predicted_labels': reduced_predictions['predicted_labels'], |
|
'reduced_vectors': list(reduced_vectors), |
|
'reduced_weighted_vectors': list(reduced_weighted_vectors), |
|
} |
|
|