document-layout-comparison

Sleeping

App Files Files Community

atlury commited on Jun 12, 2024

Commit

34e259e

verified ·

1 Parent(s): d2cecea

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -35

app.py CHANGED Viewed

@@ -1,47 +1,56 @@
 import gradio as gr
 from ultralytics import YOLO
-import spaces
 import torch
-# Load pre-trained YOLOv8 model
-model = YOLO("yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt")
-# Get class names from model
-class_names = model.names
-@spaces.GPU(duration=60)
 def process_image(image):
-    try:
-        # Process the image
-        results = model(source=image, save=False, show_labels=True, show_conf=True, show_boxes=True)
-        result = results[0]  # Get the first result
-        # Extract annotated image and labels with class names
-        annotated_image = result.plot()
-        # Use cls attribute for labels and get class name from model, DO NOT use .item() on box.conf
-        detected_areas_labels = "\n".join([
-            f"{class_names[int(box.cls.item())].upper()}: {box.conf:.2f}" for box in result.boxes
-        ])
-        return annotated_image, detected_areas_labels
-    except Exception as e:
-        return None, f"Error processing image: {e}
-# Create the Gradio Interface
-with gr.Blocks() as demo:
-    gr.Markdown("# Document Segmentation Demo (ZeroGPU)")
-    # Input Components
-    input_image = gr.Image(type="pil", label="Upload Image")
-    # Output Components
     output_image = gr.Image(type="pil", label="Annotated Image")
     output_text = gr.Textbox(label="Detected Areas and Labels")
-    # Button to trigger inference
-    btn = gr.Button("Run Document Segmentation")
-    btn.click(fn=process_image, inputs=input_image, outputs=[output_image, output_text])
-# Launch the demo
-demo.queue(max_size=1).launch()  # Queue to handle concurrent requests

 import gradio as gr
 from ultralytics import YOLO
+import cv2
+import numpy as np
+import os
+import requests
 import torch
+import spaces  # Import spaces to use ZeroGPU functionality
+# Ensure the model file is in the correct location
+model_path = "yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
+if not os.path.exists(model_path):
+    # Download the model file if it doesn't exist
+    model_url = "https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/yolov8x-doclaynet-epoch64-imgsz640-initiallr1e-4-finallr1e-5.pt"
+    response = requests.get(model_url)
+    with open(model_path, "wb") as f:
+        f.write(response.content)
+# Load the document segmentation model
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+docseg_model = YOLO(model_path).to(device)
+@spaces.GPU
 def process_image(image):
+    # Convert image to the format YOLO model expects
+    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    results = docseg_model(image)
+    # Extract annotated image from results
+    annotated_img = results[0].plot()
+    annotated_img = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
+    # Prepare detected areas and labels as text output
+    detected_areas_labels = "\n".join(
+        [f"{box.label}: {box.conf:.2f}" for box in results[0].boxes]
+    )
+    return annotated_img, detected_areas_labels
+# Define the Gradio interface
+with gr.Blocks() as interface:
+    gr.Markdown("### Document Segmentation using YOLOv8")
+    input_image = gr.Image(type="pil", label="Input Image")
     output_image = gr.Image(type="pil", label="Annotated Image")
     output_text = gr.Textbox(label="Detected Areas and Labels")
+    gr.Button("Run").click(
+        fn=process_image,
+        inputs=input_image,
+        outputs=[output_image, output_text]
+    )
+interface.launch()
+if __name__ == "__main__":
+    interface.launch()