Spaces:

zliang
/

fastpaperlayout

Sleeping

zliang commited on May 31, 2024

Commit

b1e4794

verified ·

1 Parent(s): cff5fa2

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,13 +5,16 @@ import spaces
 from ultralytics import YOLOv10
 # Load the trained model
 model = YOLOv10("best.pt")
 # Define the class indices for figures and tables
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
-# Function to perform inference on a single image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     results = model.predict(image)
     boxes = [
@@ -22,6 +25,7 @@ def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
     cropped_images = [
         image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
@@ -44,7 +48,7 @@ def process_pdf(pdf_file):
     # Pre-cache all page pixmaps at low DPI
     low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
     # Loop through each page
     for page_num, low_res_pix in enumerate(low_res_pixmaps):
         low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
@@ -74,5 +78,3 @@ iface = gr.Interface(
 # Launch the app
 iface.launch()

 from ultralytics import YOLOv10
 # Load the trained model
 model = YOLOv10("best.pt")
 # Define the class indices for figures and tables
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
+# Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     results = model.predict(image)
     boxes = [
     return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
     cropped_images = [
         image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
     # Pre-cache all page pixmaps at low DPI
     low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
     # Loop through each page
     for page_num, low_res_pix in enumerate(low_res_pixmaps):
         low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
 # Launch the app
 iface.launch()