RT-Detr-ArabicLayoutAnalysisR

Sleeping

App Files Files Community

omarelsayeed commited on Nov 26, 2024

Commit

14db543

•

1 Parent(s): b3157e9

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -2

app.py CHANGED Viewed

@@ -54,7 +54,7 @@ def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
     return ret
 def get_orders(image_path, boxes):
-    print(boxes)
     inputs = boxes2inputs(boxes)
     inputs = {k: v.to(layout_model.device) for k, v in inputs.items()}  # Move inputs to model device
     logits = layout_model(**inputs).logits.cpu().squeeze(0)  # Perform inference and get logits
@@ -111,7 +111,7 @@ def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
     }
     # Open the image using PIL
-    image = image_path
     # Prepare to draw on the image
     draw = ImageDraw.Draw(image)
@@ -157,6 +157,50 @@ def draw_bboxes_on_image(image_path, bboxes, classes, reading_order):
     # Return the modified image as a PIL image object
     return image
 from PIL import Image, ImageDraw
 def is_inside(box1, box2):
@@ -199,6 +243,8 @@ def remove_overlapping_and_inside_boxes(boxes, classes):
         del classes[idx]
     return boxes, classes
 def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
     bboxes, classes = detect_layout(IMAGE_PATH, conf_threshold, iou_threshold)
     bboxes, classes = remove_overlapping_and_inside_boxes(bboxes, classes)
@@ -207,6 +253,7 @@ def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
     return final_image
 iface = gr.Interface(
     fn=full_predictions,
     inputs=[

     return ret
 def get_orders(image_path, boxes):
+    boxes = scale_and_normalize_boxes(boxes)
     inputs = boxes2inputs(boxes)
     inputs = {k: v.to(layout_model.device) for k, v in inputs.items()}  # Move inputs to model device
     logits = layout_model(**inputs).logits.cpu().squeeze(0)  # Perform inference and get logits
     }
     # Open the image using PIL
+    image = Image.open(image_path)
     # Prepare to draw on the image
     draw = ImageDraw.Draw(image)
     # Return the modified image as a PIL image object
     return image
+def scale_and_normalize_boxes(bboxes, old_width = 1024, old_height= 1024, new_width=640, new_height=640, normalize_width=1000, normalize_height=1000):
+    """
+    Scales and normalizes bounding boxes from original dimensions to new dimensions.
+    Args:
+        bboxes (list of lists): List of bounding boxes in [x_min, y_min, x_max, y_max] format.
+        old_width (int or float): Width of the original image.
+        old_height (int or float): Height of the original image.
+        new_width (int or float): Width of the scaled image.
+        new_height (int or float): Height of the scaled image.
+        normalize_width (int or float): Width of the normalization range (e.g., target resolution width).
+        normalize_height (int or float): Height of the normalization range (e.g., target resolution height).
+    Returns:
+        list of lists: Scaled and normalized bounding boxes in [x_min, y_min, x_max, y_max] format.
+    """
+    scale_x = new_width / old_width
+    scale_y = new_height / old_height
+    def scale_and_normalize_single(bbox):
+        # Extract coordinates
+        x_min, y_min, x_max, y_max = bbox
+        # Scale to new dimensions
+        x_min *= scale_x
+        y_min *= scale_y
+        x_max *= scale_x
+        y_max *= scale_y
+        # Normalize to the target range
+        x_min = int(normalize_width * (x_min / new_width))
+        y_min = int(normalize_height * (y_min / new_height))
+        x_max = int(normalize_width * (x_max / new_width))
+        y_max = int(normalize_height * (y_max / new_height))
+        return [x_min, y_min, x_max, y_max]
+    # Process all bounding boxes
+    return [scale_and_normalize_single(bbox) for bbox in bboxes]
 from PIL import Image, ImageDraw
 def is_inside(box1, box2):
         del classes[idx]
     return boxes, classes
 def full_predictions(IMAGE_PATH, conf_threshold, iou_threshold):
     bboxes, classes = detect_layout(IMAGE_PATH, conf_threshold, iou_threshold)
     bboxes, classes = remove_overlapping_and_inside_boxes(bboxes, classes)
     return final_image
 iface = gr.Interface(
     fn=full_predictions,
     inputs=[