rerun demo

Files changed (4) hide show

.gitattributes +2 -0
Demo.ipynb +2 -2
README.md +9 -5
post_processing/table_struct_pp.py +1 -230

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ipynb filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

Demo.ipynb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce53661e0fab522f4b08059ad1e0eb08a0a45ae66737706bbd8d0e70f2a224a2
-size 784744

 version https://git-lfs.github.com/spec/v1
+oid sha256:e656cf3a473450457a118dcee7f0c65db9167b9aab09554cb3247f6ee1ebf3ec
+size 779791

README.md CHANGED Viewed

@@ -90,7 +90,7 @@ Ideal for:
 **Architecture Type**: YOLOX <br>
 **Network Architecture**: DarkNet53 Backbone \+ FPN Decoupled head (one 1x1 convolution \+ 2 parallel 3x3 convolutions (one for the classification and one for the bounding box prediction). YOLOX is a single-stage object detector that improves on Yolo-v3. <br>
 **This model was developed based on the Yolo architecture** <br>
-**Number of model parameters**: $5.4*10^7$ <br>
 ### Input
@@ -159,16 +159,20 @@ with torch.inference_mode():
     x = model.preprocess(img)
     preds = model(x, img.shape)[0]
-print(preds)
 # Post-processing
 boxes, labels, scores = postprocess_preds_table_structure(preds, model.threshold, model.labels)
 # Plot
 boxes_plot, confs = reformat_for_plotting(boxes, labels, scores, img.shape, model.num_classes)
-plt.figure(figsize=(15, 10))
-plot_sample(img, boxes_plot, confs, labels=model.labels)
 plt.show()
 ```

 **Architecture Type**: YOLOX <br>
 **Network Architecture**: DarkNet53 Backbone \+ FPN Decoupled head (one 1x1 convolution \+ 2 parallel 3x3 convolutions (one for the classification and one for the bounding box prediction). YOLOX is a single-stage object detector that improves on Yolo-v3. <br>
 **This model was developed based on the Yolo architecture** <br>
+**Number of model parameters**: 5.4e7 <br>
 ### Input
     x = model.preprocess(img)
     preds = model(x, img.shape)[0]
 # Post-processing
 boxes, labels, scores = postprocess_preds_table_structure(preds, model.threshold, model.labels)
 # Plot
 boxes_plot, confs = reformat_for_plotting(boxes, labels, scores, img.shape, model.num_classes)
+plt.figure(figsize=(30, 15))
+for i in range(1, 4):
+    boxes_plot_c = [b if j == i else [] for j, b in enumerate(boxes_plot)]
+    confs_c = [c if j == i else [] for j, c in enumerate(confs)]
+    plt.subplot(1, 3, i)
+    plt.title(model.labels[i])
+    plot_sample(img, boxes_plot_c, confs_c, labels=model.labels, show_text=False)
 plt.show()
 ```

post_processing/table_struct_pp.py CHANGED Viewed

@@ -1,230 +1 @@
-import numpy as np
-import numpy.typing as npt
-from typing import List, Tuple, Optional
-def expand_boxes(
-    boxes: npt.NDArray[np.float64],
-    r_x: Tuple[float, float] = (1, 1),
-    r_y: Tuple[float, float] = (1, 1),
-    size_agnostic: bool = True,
-) -> npt.NDArray[np.float64]:
-    """
-    Expands bounding boxes by a specified ratio.
-    Expected box format is normalized [x_min, y_min, x_max, y_max].
-    Args:
-        boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
-        r_x (tuple, optional): Left, right expansion ratios. Defaults to (1, 1) (no expansion).
-        r_y (tuple, optional): Up, down expansion ratios. Defaults to (1, 1) (no expansion).
-        size_agnostic (bool, optional): Expand independently of the box shape. Defaults to True.
-    Returns:
-        numpy.ndarray: Adjusted bounding boxes clipped to the [0, 1] range.
-    """
-    old_boxes = boxes.copy()
-    if not size_agnostic:
-        h = boxes[:, 3] - boxes[:, 1]
-        w = boxes[:, 2] - boxes[:, 0]
-    else:
-        h, w = 1, 1
-    boxes[:, 0] -= w * (r_x[0] - 1)  # left
-    boxes[:, 2] += w * (r_x[1] - 1)  # right
-    boxes[:, 1] -= h * (r_y[0] - 1)  # up
-    boxes[:, 3] += h * (r_y[1] - 1)  # down
-    boxes = np.clip(boxes, 0, 1)
-    # Enforce non-overlapping boxes
-    for i in range(len(boxes)):
-        for j in range(i + 1, len(boxes)):
-            iou = bb_iou_array(boxes[i][None], boxes[j])[0]
-            old_iou = bb_iou_array(old_boxes[i][None], old_boxes[j])[0]
-            # print(iou, old_iou)
-            if iou > 0.05 and old_iou < 0.1:
-                if boxes[i, 1] < boxes[j, 1]:  # i above j
-                    boxes[j, 1] = min(old_boxes[j, 1], boxes[i, 3])
-                    if old_iou > 0:
-                        boxes[i, 3] = max(old_boxes[i, 3], boxes[j, 1])
-                else:
-                    boxes[i, 1] = min(old_boxes[i, 1], boxes[j, 3])
-                    if old_iou > 0:
-                        boxes[j, 3] = max(old_boxes[j, 3], boxes[i, 1])
-    return boxes
-def merge_boxes(
-    b1: npt.NDArray[np.float64], b2: npt.NDArray[np.float64]
-) -> npt.NDArray[np.float64]:
-    """
-    Merges two bounding boxes into a single box that encompasses both.
-    Args:
-        b1 (numpy.ndarray): First bounding box [x_min, y_min, x_max, y_max].
-        b2 (numpy.ndarray): Second bounding box [x_min, y_min, x_max, y_max].
-    Returns:
-        numpy.ndarray: A single bounding box that covers both input boxes.
-    """
-    b = b1.copy()
-    b[0] = min(b1[0], b2[0])
-    b[1] = min(b1[1], b2[1])
-    b[2] = max(b1[2], b2[2])
-    b[3] = max(b1[3], b2[3])
-    return b
-def bb_iou_array(
-    boxes: npt.NDArray[np.float64], new_box: npt.NDArray[np.float64]
-) -> npt.NDArray[np.float64]:
-    """
-    Calculates the Intersection over Union (IoU) between a box and an array of boxes.
-    Args:
-        boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
-        new_box (numpy.ndarray): A single bounding box [x_min, y_min, x_max, y_max].
-    Returns:
-        numpy.ndarray: Array of IoU values between the new_box and each box in the array.
-    """
-    # bb interesection over union
-    xA = np.maximum(boxes[:, 0], new_box[0])
-    yA = np.maximum(boxes[:, 1], new_box[1])
-    xB = np.minimum(boxes[:, 2], new_box[2])
-    yB = np.minimum(boxes[:, 3], new_box[3])
-    interArea = np.maximum(xB - xA, 0) * np.maximum(yB - yA, 0)
-    # compute the area of both the prediction and ground-truth rectangles
-    boxAArea = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-    boxBArea = (new_box[2] - new_box[0]) * (new_box[3] - new_box[1])
-    iou = interArea / (boxAArea + boxBArea - interArea)
-    return iou
-def match_with_title(
-    box: npt.NDArray[np.float64],
-    title_boxes: npt.NDArray[np.float64],
-    match_dist: float = 0.1,
-    delta: float = 1.,
-    already_matched: List[int] = [],
-) -> Tuple[Optional[npt.NDArray[np.float64]], Optional[List[int]]]:
-    """
-    Matches a bounding box with a title bounding box based on IoU or proximity.
-    Args:
-        box (numpy.ndarray): Bounding box to match with title [x_min, y_min, x_max, y_max].
-        title_boxes (numpy.ndarray): Array of title bounding boxes with shape (N, 4).
-        match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
-        delta (float, optional): Multiplier for matching several titles. Defaults to 1..
-        already_matched (list, optional): List of already matched title indices. Defaults to [].
-    Returns:
-        tuple or None: If matched, returns a tuple of (merged_bbox, updated_title_boxes).
-                       If no match is found, returns None, None.
-    """
-    if not len(title_boxes):
-        return None, None
-    dist_above = np.abs(title_boxes[:, 3] - box[1])
-    dist_below = np.abs(box[3] - title_boxes[:, 1])
-    dist_left = np.abs(title_boxes[:, 0] - box[0])
-    dist_center = np.abs(title_boxes[:, 0] + title_boxes[:, 2] - box[0] - box[2]) / 2
-    dists = np.min([dist_above, dist_below], 0)
-    dists += np.min([dist_left, dist_center], 0) / 2
-    ious = bb_iou_array(title_boxes, box)
-    dists = np.where(ious > 0, min(match_dist - 0.01, np.min(dists)) / delta, dists)
-    if len(already_matched):
-        dists[already_matched] = match_dist * 10  # Remove already matched titles
-    matches = None
-    if np.min(dists) <= match_dist:
-        matches = np.where(
-            dists <= min(match_dist, np.min(dists) * delta)
-        )[0]
-    if matches is not None:
-        new_bbox = box
-        for match in matches:
-            new_bbox = merge_boxes(new_bbox, title_boxes[match])
-        return new_bbox, list(matches)
-    else:
-        return None, None
-def match_boxes_with_title(
-    boxes: npt.NDArray[np.float64],
-    confs: npt.NDArray[np.float64],
-    labels: npt.NDArray[np.int_],
-    classes: List[str],
-    to_match_labels: List[str] = ["chart"],
-    remove_matched_titles: bool = False,
-    match_dist: float = 0.1,
-) -> Tuple[
-    npt.NDArray[np.float64],
-    npt.NDArray[np.float64],
-    npt.NDArray[np.int_],
-    List[int],
-]:
-    """
-    Matches charts with title.
-    Args:
-        boxes (numpy.ndarray): Array of bounding boxes with shape (N, 4).
-        confs (numpy.ndarray): Array of confidence scores with shape (N,).
-        labels (numpy.ndarray): Array of labels with shape (N,).
-        classes (list): List of class names.
-        to_match_labels (list): List of class names to match with titles.
-        remove_matched_titles (bool): Whether to remove matched titles from the boxes.
-    Returns:
-        boxes (numpy.ndarray): Array of bounding boxes with shape (M, 4).
-        confs (numpy.ndarray): Array of confidence scores with shape (M,).
-        labels (numpy.ndarray): Array of labels with shape (M,).
-        found_title (list): List of indices of matched titles.
-        no_found_title (list): List of indices of unmatched titles.
-        match_dist (float, optional): Maximum distance for matching. Defaults to 0.1.
-    """
-    # Put titles at the end
-    title_ids = np.where(labels == classes.index("title"))[0]
-    order = np.concatenate([np.delete(np.arange(len(boxes)), title_ids), title_ids])
-    boxes = boxes[order]
-    confs = confs[order]
-    labels = labels[order]
-    # Ids
-    title_ids = np.where(labels == classes.index("title"))[0]
-    to_match = np.where(np.isin(labels, [classes.index(c) for c in to_match_labels]))[0]
-    # Matching
-    found_title, already_matched = [], []
-    for i in range(len(boxes)):
-        if i not in to_match:
-            continue
-        merged_box, matched_title_ids = match_with_title(
-            boxes[i],
-            boxes[title_ids],
-            already_matched=already_matched,
-            match_dist=match_dist,
-        )
-        if matched_title_ids is not None:
-            # print(f'Merged {classes[int(labels[i])]} at idx #{i} with title {matched_title_ids[-1]}')  # noqa
-            boxes[i] = merged_box
-            already_matched += matched_title_ids
-            found_title.append(i)
-    if remove_matched_titles and len(already_matched):
-        boxes = np.delete(boxes, title_ids[already_matched], axis=0)
-        confs = np.delete(confs, title_ids[already_matched], axis=0)
-        labels = np.delete(labels, title_ids[already_matched], axis=0)
-    return boxes, confs, labels, found_title


1	+ # TODO