File size: 5,303 Bytes
93264c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import layoutparser as lp
from PIL import Image
import tensorflow as tf
import numpy as np
import torch
import torchvision.ops.boxes as box_ops
from typing import List, Tuple
from .split_image import split_image
from .get_unique_values import get_unique_values

def get_vectors(*,
    predicted_bboxes: List[Tuple[int, int, int, int]],
    predicted_scores: List[float],
    predicted_labels: List[str],
    label_names: List[str],
    sub_images_bboxes: List[Tuple[int, int, int, int]],
    index_start: int = 0.17,
    index_end: int = 1,
    weighted_jaccard_index = False):
  bboxes_tensor: torch.Tensor = torch.tensor(predicted_bboxes)
  labels_nonce = { value:key for key, value in zip(get_unique_values(start = index_start, end = index_end, count = len(label_names)), list(label_names)) }
  
  def get_vector(bbox: Tuple[int, int, int, int], region_nonce: int):
    # bbox: Expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
    bbox_tensor: torch.Tensor = torch.tensor([bbox])
    [jaccard_indexes] = box_ops.box_iou(bbox_tensor, bboxes_tensor)
    '''
      Either get the index of bounding box with largest jaccard_index (Intersection Over Union) or
      get the index of bounding box with largest jaccard_index (Intersection Over Union) multiplied by the score.
      By doing this we strike a balance between accuracy and relative position.
    '''
    index_of_jaccard_index = jaccard_indexes.argmax() if not weighted_jaccard_index else np.multiply(jaccard_indexes, predicted_scores).argmax()
    jaccard_index = jaccard_indexes[index_of_jaccard_index]
    jaccard_index_bbox_label__nonce = labels_nonce[predicted_labels[index_of_jaccard_index]]
    jaccard_index_bbox_score = predicted_scores[index_of_jaccard_index]
    vector = region_nonce * jaccard_index * jaccard_index_bbox_label__nonce * jaccard_index_bbox_score
    return vector.item()
  sub_images_nonces = get_unique_values(start = index_start, end = index_end, count = len(sub_images_bboxes))
  for sub_image_bbox, region_nonce in zip(sub_images_bboxes, sub_images_nonces):
      yield get_vector(sub_image_bbox, region_nonce)

def get_predictions(
    image: Image.Image,
    model: lp.Detectron2LayoutModel,
    predictions_reducer = lambda *args: args):
  layout_predicted = model.detect(image)
  if len(layout_predicted) > 0:
    predicted_bboxes = [block.coordinates for block in layout_predicted]
    predicted_scores = [block.score for block in layout_predicted]
    predicted_labels = [block.type for block in layout_predicted]
    [predicted_bboxes, predicted_scores, predicted_labels] = predictions_reducer(
        predicted_bboxes,
        predicted_scores,
        predicted_labels)
    return {
        'predicted_bboxes': predicted_bboxes,
        'predicted_scores': predicted_scores,
        'predicted_labels': predicted_labels,
    }
  else:
    return {
        'predicted_bboxes': [],
        'predicted_scores': [],
        'predicted_labels': [],
    }

def predictions_reducer(
    predicted_bboxes: List[Tuple[int, int, int, int]],
    predicted_scores: List[float],
    predicted_labels: List[str]):
  selected_indices = tf.image.non_max_suppression(
    boxes = predicted_bboxes,
    scores = predicted_scores ,
    max_output_size = len(predicted_bboxes),
    iou_threshold = 0.01)
  return {
      'predicted_bboxes': tf.gather(predicted_bboxes, selected_indices).numpy().tolist(), # List[List[int, int, int, int]]
      'predicted_scores': tf.gather(predicted_scores, selected_indices).numpy().astype(float).tolist(),
      'predicted_labels': tf.gather(predicted_labels, selected_indices).numpy().astype(str).tolist()
    }

def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_names: List[str], width_parts = 100, height_parts = 100):
  predictions = get_predictions(image, model)
  reduced_predictions = predictions_reducer(**predictions)
  sub_images_bboxes = list(split_image(np.array(image), width_parts, height_parts, result = 'bboxes'))

  vectors = get_vectors(
    sub_images_bboxes = sub_images_bboxes,
    label_names = label_names,
    weighted_jaccard_index = False,
    **predictions)
  
  weighted_vectors = get_vectors(
    sub_images_bboxes = sub_images_bboxes,
    label_names = label_names,
    weighted_jaccard_index = True,
    **predictions)
  
  reduced_vectors = get_vectors(
    sub_images_bboxes = sub_images_bboxes,
    label_names = label_names,
    weighted_jaccard_index = False,
    **reduced_predictions)
  
  reduced_weighted_vectors = get_vectors(
    sub_images_bboxes = sub_images_bboxes,
    label_names = label_names,
    weighted_jaccard_index = True,
    **reduced_predictions)

  return {
      'predicted_bboxes': predictions['predicted_bboxes'],
      'predicted_scores': predictions['predicted_scores'],
      'predicted_labels': predictions['predicted_labels'],
      'vectors': list(vectors),
      'weighted_vectors': list(weighted_vectors),

      'reduced_predicted_bboxes': reduced_predictions['predicted_bboxes'],
      'reduced_predicted_scores': reduced_predictions['predicted_scores'],
      'reduced_predicted_labels': reduced_predictions['predicted_labels'],
      'reduced_vectors': list(reduced_vectors),
      'reduced_weighted_vectors': list(reduced_weighted_vectors),
  }