Theo Viel commited on
Commit
9b6b8b8
·
1 Parent(s): ac85b57

rerun demo

Browse files
Files changed (4) hide show
  1. .gitattributes +2 -0
  2. Demo.ipynb +2 -2
  3. README.md +9 -5
  4. post_processing/table_struct_pp.py +1 -230
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.ipynb filter=lfs diff=lfs merge=lfs -text
37
+ *.png filter=lfs diff=lfs merge=lfs -text
Demo.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce53661e0fab522f4b08059ad1e0eb08a0a45ae66737706bbd8d0e70f2a224a2
3
- size 784744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e656cf3a473450457a118dcee7f0c65db9167b9aab09554cb3247f6ee1ebf3ec
3
+ size 779791
README.md CHANGED
@@ -90,7 +90,7 @@ Ideal for:
90
  **Architecture Type**: YOLOX <br>
91
  **Network Architecture**: DarkNet53 Backbone \+ FPN Decoupled head (one 1x1 convolution \+ 2 parallel 3x3 convolutions (one for the classification and one for the bounding box prediction). YOLOX is a single-stage object detector that improves on Yolo-v3. <br>
92
  **This model was developed based on the Yolo architecture** <br>
93
- **Number of model parameters**: $5.4*10^7$ <br>
94
 
95
  ### Input
96
 
@@ -159,16 +159,20 @@ with torch.inference_mode():
159
  x = model.preprocess(img)
160
  preds = model(x, img.shape)[0]
161
 
162
- print(preds)
163
-
164
  # Post-processing
165
  boxes, labels, scores = postprocess_preds_table_structure(preds, model.threshold, model.labels)
166
 
167
  # Plot
168
  boxes_plot, confs = reformat_for_plotting(boxes, labels, scores, img.shape, model.num_classes)
169
 
170
- plt.figure(figsize=(15, 10))
171
- plot_sample(img, boxes_plot, confs, labels=model.labels)
 
 
 
 
 
 
172
  plt.show()
173
  ```
174
 
 
90
  **Architecture Type**: YOLOX <br>
91
  **Network Architecture**: DarkNet53 Backbone \+ FPN Decoupled head (one 1x1 convolution \+ 2 parallel 3x3 convolutions (one for the classification and one for the bounding box prediction). YOLOX is a single-stage object detector that improves on Yolo-v3. <br>
92
  **This model was developed based on the Yolo architecture** <br>
93
+ **Number of model parameters**: 5.4e7 <br>
94
 
95
  ### Input
96
 
 
159
  x = model.preprocess(img)
160
  preds = model(x, img.shape)[0]
161
 
 
 
162
  # Post-processing
163
  boxes, labels, scores = postprocess_preds_table_structure(preds, model.threshold, model.labels)
164
 
165
  # Plot
166
  boxes_plot, confs = reformat_for_plotting(boxes, labels, scores, img.shape, model.num_classes)
167
 
168
+ plt.figure(figsize=(30, 15))
169
+ for i in range(1, 4):
170
+ boxes_plot_c = [b if j == i else [] for j, b in enumerate(boxes_plot)]
171
+ confs_c = [c if j == i else [] for j, c in enumerate(confs)]
172
+
173
+ plt.subplot(1, 3, i)
174
+ plt.title(model.labels[i])
175
+ plot_sample(img, boxes_plot_c, confs_c, labels=model.labels, show_text=False)
176
  plt.show()
177
  ```
178
 
post_processing/table_struct_pp.py CHANGED
@@ -1,230 +1 @@
1
- import numpy as np
2
- import numpy.typing as npt
3
- from typing import List, Tuple, Optional
4
-
5
-
6
- def expand_boxes(
7
- boxes: npt.NDArray[np.float64],
8
- r_x: Tuple[float, float] = (1, 1),
9
- r_y: Tuple[float, float] = (1, 1),
10
- size_agnostic: bool = True,
11
- ) -> npt.NDArray[np.float64]:
12
- """
13
- Expands bounding boxes by a specified ratio.
14
- Expected box format is normalized [x_min, y_min, x_max, y_max].
15
-
16
- Args:
17
- boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
18
- r_x (tuple, optional): Left, right expansion ratios. Defaults to (1, 1) (no expansion).
19
- r_y (tuple, optional): Up, down expansion ratios. Defaults to (1, 1) (no expansion).
20
- size_agnostic (bool, optional): Expand independently of the box shape. Defaults to True.
21
-
22
- Returns:
23
- numpy.ndarray: Adjusted bounding boxes clipped to the [0, 1] range.
24
- """
25
- old_boxes = boxes.copy()
26
-
27
- if not size_agnostic:
28
- h = boxes[:, 3] - boxes[:, 1]
29
- w = boxes[:, 2] - boxes[:, 0]
30
- else:
31
- h, w = 1, 1
32
-
33
- boxes[:, 0] -= w * (r_x[0] - 1) # left
34
- boxes[:, 2] += w * (r_x[1] - 1) # right
35
- boxes[:, 1] -= h * (r_y[0] - 1) # up
36
- boxes[:, 3] += h * (r_y[1] - 1) # down
37
-
38
- boxes = np.clip(boxes, 0, 1)
39
-
40
- # Enforce non-overlapping boxes
41
- for i in range(len(boxes)):
42
- for j in range(i + 1, len(boxes)):
43
- iou = bb_iou_array(boxes[i][None], boxes[j])[0]
44
- old_iou = bb_iou_array(old_boxes[i][None], old_boxes[j])[0]
45
- # print(iou, old_iou)
46
- if iou > 0.05 and old_iou < 0.1:
47
- if boxes[i, 1] < boxes[j, 1]: # i above j
48
- boxes[j, 1] = min(old_boxes[j, 1], boxes[i, 3])
49
- if old_iou > 0:
50
- boxes[i, 3] = max(old_boxes[i, 3], boxes[j, 1])
51
- else:
52
- boxes[i, 1] = min(old_boxes[i, 1], boxes[j, 3])
53
- if old_iou > 0:
54
- boxes[j, 3] = max(old_boxes[j, 3], boxes[i, 1])
55
-
56
- return boxes
57
-
58
-
59
- def merge_boxes(
60
- b1: npt.NDArray[np.float64], b2: npt.NDArray[np.float64]
61
- ) -> npt.NDArray[np.float64]:
62
- """
63
- Merges two bounding boxes into a single box that encompasses both.
64
-
65
- Args:
66
- b1 (numpy.ndarray): First bounding box [x_min, y_min, x_max, y_max].
67
- b2 (numpy.ndarray): Second bounding box [x_min, y_min, x_max, y_max].
68
-
69
- Returns:
70
- numpy.ndarray: A single bounding box that covers both input boxes.
71
- """
72
- b = b1.copy()
73
- b[0] = min(b1[0], b2[0])
74
- b[1] = min(b1[1], b2[1])
75
- b[2] = max(b1[2], b2[2])
76
- b[3] = max(b1[3], b2[3])
77
- return b
78
-
79
-
80
- def bb_iou_array(
81
- boxes: npt.NDArray[np.float64], new_box: npt.NDArray[np.float64]
82
- ) -> npt.NDArray[np.float64]:
83
- """
84
- Calculates the Intersection over Union (IoU) between a box and an array of boxes.
85
-
86
- Args:
87
- boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
88
- new_box (numpy.ndarray): A single bounding box [x_min, y_min, x_max, y_max].
89
-
90
- Returns:
91
- numpy.ndarray: Array of IoU values between the new_box and each box in the array.
92
- """
93
- # bb interesection over union
94
- xA = np.maximum(boxes[:, 0], new_box[0])
95
- yA = np.maximum(boxes[:, 1], new_box[1])
96
- xB = np.minimum(boxes[:, 2], new_box[2])
97
- yB = np.minimum(boxes[:, 3], new_box[3])
98
-
99
- interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0)
100
-
101
- # compute the area of both the prediction and ground-truth rectangles
102
- boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
103
- boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1])
104
-
105
- iou = interArea / (boxAArea + boxBArea - interArea)
106
-
107
- return iou
108
-
109
-
110
- def match_with_title(
111
- box: npt.NDArray[np.float64],
112
- title_boxes: npt.NDArray[np.float64],
113
- match_dist: float = 0.1,
114
- delta: float = 1.,
115
- already_matched: List[int] = [],
116
- ) -> Tuple[Optional[npt.NDArray[np.float64]], Optional[List[int]]]:
117
- """
118
- Matches a bounding box with a title bounding box based on IoU or proximity.
119
-
120
- Args:
121
- box (numpy.ndarray): Bounding box to match with title [x_min, y_min, x_max, y_max].
122
- title_boxes (numpy.ndarray): Array of title bounding boxes with shape (N, 4).
123
- match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
124
- delta (float, optional): Multiplier for matching several titles. Defaults to 1..
125
- already_matched (list, optional): List of already matched title indices. Defaults to [].
126
-
127
- Returns:
128
- tuple or None: If matched, returns a tuple of (merged_bbox, updated_title_boxes).
129
- If no match is found, returns None, None.
130
- """
131
- if not len(title_boxes):
132
- return None, None
133
-
134
- dist_above = np.abs(title_boxes[:, 3] - box[1])
135
- dist_below = np.abs(box[3] - title_boxes[:, 1])
136
-
137
- dist_left = np.abs(title_boxes[:, 0] - box[0])
138
- dist_center = np.abs(title_boxes[:, 0] + title_boxes[:, 2] - box[0] - box[2]) / 2
139
-
140
- dists = np.min([dist_above, dist_below], 0)
141
- dists += np.min([dist_left, dist_center], 0) / 2
142
-
143
- ious = bb_iou_array(title_boxes, box)
144
- dists = np.where(ious > 0, min(match_dist - 0.01, np.min(dists)) / delta, dists)
145
-
146
- if len(already_matched):
147
- dists[already_matched] = match_dist * 10 # Remove already matched titles
148
-
149
- matches = None
150
- if np.min(dists) <= match_dist:
151
- matches = np.where(
152
- dists <= min(match_dist, np.min(dists) * delta)
153
- )[0]
154
-
155
- if matches is not None:
156
- new_bbox = box
157
- for match in matches:
158
- new_bbox = merge_boxes(new_bbox, title_boxes[match])
159
- return new_bbox, list(matches)
160
- else:
161
- return None, None
162
-
163
-
164
- def match_boxes_with_title(
165
- boxes: npt.NDArray[np.float64],
166
- confs: npt.NDArray[np.float64],
167
- labels: npt.NDArray[np.int_],
168
- classes: List[str],
169
- to_match_labels: List[str] = ["chart"],
170
- remove_matched_titles: bool = False,
171
- match_dist: float = 0.1,
172
- ) -> Tuple[
173
- npt.NDArray[np.float64],
174
- npt.NDArray[np.float64],
175
- npt.NDArray[np.int_],
176
- List[int],
177
- ]:
178
- """
179
- Matches charts with title.
180
-
181
- Args:
182
- boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
183
- confs (numpy.ndarray): Array of confidence scores with shape (N,).
184
- labels (numpy.ndarray): Array of labels with shape (N,).
185
- classes (list): List of class names.
186
- to_match_labels (list): List of class names to match with titles.
187
- remove_matched_titles (bool): Whether to remove matched titles from the boxes.
188
-
189
- Returns:
190
- boxes (numpy.ndarray): Array of bounding boxes with shape (M, 4).
191
- confs (numpy.ndarray): Array of confidence scores with shape (M,).
192
- labels (numpy.ndarray): Array of labels with shape (M,).
193
- found_title (list): List of indices of matched titles.
194
- no_found_title (list): List of indices of unmatched titles.
195
- match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
196
- """
197
- # Put titles at the end
198
- title_ids = np.where(labels == classes.index("title"))[0]
199
- order = np.concatenate([np.delete(np.arange(len(boxes)), title_ids), title_ids])
200
- boxes = boxes[order]
201
- confs = confs[order]
202
- labels = labels[order]
203
-
204
- # Ids
205
- title_ids = np.where(labels == classes.index("title"))[0]
206
- to_match = np.where(np.isin(labels, [classes.index(c) for c in to_match_labels]))[0]
207
-
208
- # Matching
209
- found_title, already_matched = [], []
210
- for i in range(len(boxes)):
211
- if i not in to_match:
212
- continue
213
- merged_box, matched_title_ids = match_with_title(
214
- boxes[i],
215
- boxes[title_ids],
216
- already_matched=already_matched,
217
- match_dist=match_dist,
218
- )
219
- if matched_title_ids is not None:
220
- # print(f'Merged {classes[int(labels[i])]} at idx #{i} with title {matched_title_ids[-1]}') # noqa
221
- boxes[i] = merged_box
222
- already_matched += matched_title_ids
223
- found_title.append(i)
224
-
225
- if remove_matched_titles and len(already_matched):
226
- boxes = np.delete(boxes, title_ids[already_matched], axis=0)
227
- confs = np.delete(confs, title_ids[already_matched], axis=0)
228
- labels = np.delete(labels, title_ids[already_matched], axis=0)
229
-
230
- return boxes, confs, labels, found_title
 
1
+ # TODO