Spaces:

axelhortua
/

objectlocalization

Sleeping

App Files Files Community

Alex Hortua commited on Mar 14

Commit

b87aa54

1 Parent(s): 9a6ea32

Creating a faster version with a different approach (Training with a frozen Backbone of COCO images)

Browse files

Files changed (14) hide show

.gitignore +6 -2
datasets/examples.json +52 -0
datasets/test_images/0aa7d4a4-e675-11eb-98bd-b0c090bd3910.jpg +0 -0
datasets/test_images/0abfb048-e3a1-11eb-9018-b0c090bd3910.jpg +0 -0
logs/training_log.json +10 -0
logs/training_log.txt +31 -0
src/Attempt1/app.py +59 -0
src/{dataset.py → Attempt1/dataset.py} +0 -0
src/{evaluate.py → Attempt1/evaluate.py} +0 -0
src/{train.py → Attempt1/train.py} +0 -0
src/{utils.py → Attempt1/utils.py} +0 -0
src/app.py +80 -38
src/new_trainer.py +136 -0
src/transformdata.py +70 -0

.gitignore CHANGED Viewed

@@ -2,7 +2,11 @@
 /Annotations
 .qodo
 venv/
 /datasets/annotations/*
 /datasets/images/*
-src/__pycache__/
-models/lego

 /Annotations
 .qodo
 venv/
+src/__pycache__/
+models/lego
+models/records/*
+models/records
+# Datasets
 /datasets/annotations/*
 /datasets/images/*

datasets/examples.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+    "examples": [
+        [
+            "datasets/test_images/0abd88dc-e306-11eb-b5b0-b0c090bd3910.jpg",
+            "[[373, 523, 438, 599], [78, 444, 278, 563], [124, 0, 221, 63], [471, 156, 535, 213]]"
+        ],
+        [
+            "datasets/test_images/0abe1e54-e691-11eb-8391-b0c090bd3910.jpg",
+            "[[29, 82, 270, 299], [23, 0, 78, 80]]"
+        ],
+        [
+            "datasets/test_images/0abd3f80-daff-11eb-8755-3497f683a169.jpg",
+            "[[0, 77, 172, 317], [523, 202, 582, 277], [410, 112, 600, 544], [136, 39, 187, 110]]"
+        ],
+        [
+            "datasets/test_images/0abf3f3e-e4c8-11eb-8f0d-b0c090bd3910.jpg",
+            "[[207, 289, 238, 333], [338, 94, 599, 496], [0, 84, 136, 407], [73, 383, 141, 419]]"
+        ],
+        [
+            "datasets/test_images/0abdc2ee-d9cc-11eb-8cf3-3497f683a169.jpg",
+            "[[225, 437, 386, 600], [305, 0, 369, 100], [353, 346, 453, 445], [113, 28, 234, 134]]"
+        ],
+        [
+            "datasets/test_images/000abf76-e67a-11eb-b56d-b0c090bd3910.jpg",
+            "[[44, 167, 185, 300], [97, 0, 262, 167]]"
+        ],
+        [
+            "datasets/test_images/0abccbc6-e661-11eb-9915-b0c090bd3910.jpg",
+            "[[173, 124, 242, 194], [0, 3, 300, 296]]"
+        ],
+        [
+            "datasets/test_images/0abf4764-e480-11eb-b391-b0c090bd3910.jpg",
+            "[[418, 87, 599, 306], [0, 339, 154, 490], [230, 114, 353, 234], [173, 118, 275, 227]]"
+        ],
+        [
+            "datasets/test_images/0a82869a-e3c3-11eb-9d75-b0c090bd3910.jpg",
+            "[[387, 0, 536, 119], [378, 509, 463, 600], [94, 193, 288, 368], [74, 301, 237, 486]]"
+        ],
+        [
+            "datasets/test_images/0abd5fec-e5c6-11eb-9ac6-b0c090bd3910.jpg",
+            "[[32, 50, 299, 266], [0, 22, 78, 82]]"
+        ],
+        [
+            "datasets/test_images/0abfa74a-e2f3-11eb-abe7-b0c090bd3910.jpg",
+            "[[229, 104, 311, 169], [380, 0, 503, 118], [235, 236, 284, 304], [109, 434, 350, 600]]"
+        ],
+        [
+            "datasets/test_images/0abfa012-e660-11eb-8710-b0c090bd3910.jpg",
+            "[[0, 43, 299, 254], [50, 58, 215, 238]]"
+        ]
+    ]
+}

datasets/test_images/0aa7d4a4-e675-11eb-98bd-b0c090bd3910.jpg DELETED Viewed

Binary file (21.7 kB)

datasets/test_images/0abfb048-e3a1-11eb-9018-b0c090bd3910.jpg DELETED Viewed

Binary file (53 kB)

logs/training_log.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "loss": [
+        5521765.083993731,
+        813867.9704230821
+    ],
+    "mAP": [
+        0.7336118575736044,
+        0.00042658527250095333
+    ]
+}

logs/training_log.txt ADDED Viewed

	@@ -0,0 +1,31 @@

+Starting Epoch 1/10
+Starting Epoch 1/10
+Iteration 1, Loss: 1.19198739528656
+Iteration 101, Loss: 27.87972640991211
+Iteration 201, Loss: 7.156171798706055
+Iteration 301, Loss: 8.546396255493164
+Iteration 401, Loss: 1.7727022171020508
+Iteration 501, Loss: 5.378680229187012
+Iteration 601, Loss: 15.277275085449219
+Iteration 701, Loss: 4.097675800323486
+Iteration 801, Loss: 4.272053241729736
+Iteration 901, Loss: 1.443131446838379
+Starting Epoch 2/10
+Iteration 1, Loss: 2.3286213874816895
+Iteration 101, Loss: 1.8097801208496094
+Iteration 201, Loss: 1.6668422222137451
+Iteration 301, Loss: 2.1733906269073486
+Iteration 401, Loss: 1.8349155187606812
+Iteration 501, Loss: 0.9883778095245361
+Iteration 601, Loss: 1.3832241296768188
+Iteration 701, Loss: 1.6653320789337158
+Iteration 801, Loss: 1.2079124450683594
+Iteration 901, Loss: 28428.40625
+Starting Epoch 3/10
+Iteration 1, Loss: 1006.0529174804688
+Iteration 101, Loss: 0.4594302177429199
+Iteration 201, Loss: 0.4397536516189575
+Iteration 301, Loss: 0.31954655051231384
+Iteration 401, Loss: 0.4685922861099243
+Iteration 501, Loss: 0.31720075011253357
+Iteration 601, Loss: 0.2652203440666199

src/Attempt1/app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import torch
+import torchvision
+import gradio as gr
+import numpy as np
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.transforms import functional as F
+from PIL import Image, ImageDraw
+# Load Trained Model
+def load_model(model_path):
+    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
+    in_features = model.roi_heads.box_predictor.cls_score.in_features
+    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=2)  # Background + 4 LEGO classes
+    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
+    model.eval()
+    return model
+model = load_model("models/lego_fasterrcnn.pth")
+def predict(image):
+    image = Image.fromarray(image).convert("RGB")
+    image_tensor = F.to_tensor(image).unsqueeze(0)  # Add batch dimension
+    with torch.no_grad():
+        predictions = model(image_tensor)[0]
+    boxes = predictions['boxes'].cpu().numpy()
+    labels = predictions['labels'].cpu().numpy()
+    scores = predictions['scores'].cpu().numpy()
+    results = []
+    draw = ImageDraw.Draw(image)
+    for box, label, score in zip(boxes, labels, scores):
+        if score > 0.7:  # Confidence threshold
+            results.append({
+                "box": box.tolist(),
+                "label": str(label),
+                "score": float(score)
+            })
+            draw.rectangle(box.tolist(), outline="red", width=3)
+            draw.text((box[0], box[1]), f"{label} ({score:.2f})", fill="red")
+    return  image, results
+def get_examples():
+    return [os.path.join("datasets/test_images", f) for f in os.listdir("datasets/test_images")]
+# Gradio Interface
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Image(type="numpy"),
+    outputs=[gr.Image(type="pil"), gr.JSON()],
+    title="LEGO Detection with Faster R-CNN",
+    description="Upload an image and the model will detect LEGO bricks with bounding boxes.",
+    examples=get_examples()
+)
+demo.launch()

src/{dataset.py → Attempt1/dataset.py} RENAMED Viewed

File without changes

src/{evaluate.py → Attempt1/evaluate.py} RENAMED Viewed

File without changes

src/{train.py → Attempt1/train.py} RENAMED Viewed

File without changes

src/{utils.py → Attempt1/utils.py} RENAMED Viewed

File without changes

src/app.py CHANGED Viewed

@@ -1,59 +1,101 @@
-import os
 import torch
 import torchvision
 import gradio as gr
-import numpy as np
-from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
-from torchvision.transforms import functional as F
 from PIL import Image, ImageDraw
-# Load Trained Model
-def load_model(model_path):
-    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
-    in_features = model.roi_heads.box_predictor.cls_score.in_features
-    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes=2)  # Background + 4 LEGO classes
-    model.load_state_dict(torch.load(model_path, map_location=torch.device("cpu")))
-    model.eval()
-    return model
-model = load_model("models/lego_fasterrcnn.pth")
-def predict(image):
-    image = Image.fromarray(image).convert("RGB")
-    image_tensor = F.to_tensor(image).unsqueeze(0)  # Add batch dimension
     with torch.no_grad():
-        predictions = model(image_tensor)[0]
-    boxes = predictions['boxes'].cpu().numpy()
-    labels = predictions['labels'].cpu().numpy()
-    scores = predictions['scores'].cpu().numpy()
-    results = []
     draw = ImageDraw.Draw(image)
-    for box, label, score in zip(boxes, labels, scores):
         if score > 0.5:  # Confidence threshold
-            results.append({
-                "box": box.tolist(),
-                "label": str(label),
-                "score": float(score)
-            })
-            draw.rectangle(box.tolist(), outline="red", width=3)
-            draw.text((box[0], box[1]), f"{label} ({score:.2f})", fill="red")
-    return  image, results
 def get_examples():
-    return [os.path.join("datasets/test_images", f) for f in os.listdir("datasets/test_images")]
-# Gradio Interface
 demo = gr.Interface(
     fn=predict,
-    inputs=gr.Image(type="numpy"),
-    outputs=[gr.Image(type="pil"), gr.JSON()],
-    title="LEGO Detection with Faster R-CNN",
-    description="Upload an image and the model will detect LEGO bricks with bounding boxes.",
-    examples=get_examples()
 )
-demo.launch()

 import torch
 import torchvision
+import torchvision.transforms as T
 import gradio as gr
 from PIL import Image, ImageDraw
+import torchvision.ops as ops
+import numpy as np
+import json
+import os
+# LOAD_MODEL_PATH = "models/lego_fasterrcnn.pth"
+LOAD_MODEL_PATH = "models/faster_rcnn_custom.pth"
+# Load trained Faster R-CNN model
+model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=None)
+in_features = model.roi_heads.box_predictor.cls_score.in_features
+model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes=2)
+model.load_state_dict(torch.load(LOAD_MODEL_PATH, map_location=torch.device("cpu")))
+model.eval()
+def compute_iou(box1, box2):
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    intersection = max(0, x2 - x1) * max(0, y2 - y1)
+    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union = area_box1 + area_box2 - intersection
+    return intersection / union if union > 0 else 0
+def mean_average_precision(predictions, ground_truths, iou_threshold=0.5):
+    iou_scores = []
+    for pred_box in predictions:
+        best_iou = 0
+        for gt_box in ground_truths:
+            iou = compute_iou(pred_box, gt_box)
+            best_iou = max(best_iou, iou)
+        if best_iou >= iou_threshold:
+            iou_scores.append(best_iou)
+    return np.mean(iou_scores) if iou_scores else None
+def predict(image, ground_truths_json=""):
+    transform = T.Compose([T.ToTensor()])
+    image_tensor = transform(image).unsqueeze(0)
     with torch.no_grad():
+        predictions = model(image_tensor)
+    boxes = predictions[0]['boxes'].tolist()
+    scores = predictions[0]['scores'].tolist()
+    # Draw boxes on image
     draw = ImageDraw.Draw(image)
+    for box, score in zip(boxes, scores):
         if score > 0.5:  # Confidence threshold
+            draw.rectangle(box, outline="red", width=3)
+            draw.text((box[0], box[1]), f"{score:.2f}", fill="red")
+    # Compute mAP if ground truths are provided
+    mAP = None
+    if ground_truths_json:
+        try:
+            ground_truths = json.loads(ground_truths_json)
+            mAP = mean_average_precision(boxes, ground_truths, iou_threshold=0.5)
+            # Draw ground truth boxes in a different color
+            for gt_box in ground_truths:
+                draw.rectangle(gt_box, outline="green", width=3)
+                draw.text((gt_box[0], gt_box[1]), "GT", fill="green")
+        except json.JSONDecodeError:
+            print("⚠️ Invalid ground truth format. Expecting JSON array of bounding boxes.")
+    # Filter boxes and scores based on confidence threshold
+    filtered_boxes = [box for box, score in zip(boxes, scores) if score > 0.5]
+    return image, filtered_boxes, mAP
 def get_examples():
+    # Load examples from JSON file
+    with open("datasets/examples.json", "r") as f:
+        examples_json = json.load(f)
+    examples_with_annotations = examples_json["examples"]
+    return examples_with_annotations
+# Create Gradio interface
 demo = gr.Interface(
     fn=predict,
+    inputs=[gr.Image(type="pil"), gr.Textbox(placeholder="Enter ground truth bounding boxes as JSON (optional)")],
+    outputs=[gr.Image(type="pil", label="Detected LEGO pieces (Red predictions, green ground truth)"), gr.JSON(label="Predicted bounding boxes"), gr.Textbox(label="Mean Average Precision (mAP @ IoU 0.5)")],
+    title="LEGO Piece Detector",
+    examples=get_examples(),
+    description="Upload an image to detect LEGO pieces using Faster R-CNN. Optionally, enter ground truth bounding boxes to compute mAP. If left empty, mAP will be null."
 )
+# Launch Gradio app
+if __name__ == "__main__":
+    demo.launch()

src/new_trainer.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import torch
+import torchvision
+import torchvision.transforms as T
+import torch.optim as optim
+from torch.utils.data import DataLoader, Dataset, Subset
+import os
+import json
+from PIL import Image
+from tqdm import tqdm  # Import tqdm for loading bar
+# Paths (Modify These)
+DATASET_DIR = "datasets/images"  # Folder containing images
+ANNOTATIONS_FILE = "datasets/annotations.json"  # Path to COCO JSON
+# Define Custom COCO Dataset Class (Without pycocotools)
+class CocoDataset(Dataset):
+    def __init__(self, root, annotation_file, transforms=None):
+        self.root = root
+        with open(annotation_file, 'r') as f:
+            self.coco_data = json.load(f)
+        self.image_data = {img["id"]: img for img in self.coco_data["images"]}
+        self.annotations = self.coco_data["annotations"]
+        self.transforms = transforms
+    def __len__(self):
+        return len(self.image_data)
+    def __getitem__(self, idx):
+        try:
+            image_info = self.image_data[idx]
+            image_path = os.path.join(self.root, image_info["file_name"])
+            image = Image.open(image_path).convert("RGB")
+            img_width, img_height = image.size  # Get image dimensions
+            # Get Annotations
+            annotations = [ann for ann in self.annotations if ann["image_id"] == image_info["id"]]
+            boxes = []
+            labels = []
+            for ann in annotations:
+                xmin, ymin, xmax, ymax = ann["bbox"]  # Now using [xmin, ymin, xmax, ymax]
+                xmin = max(0, xmin)
+                ymin = max(0, ymin)
+                xmax = min(img_width, xmax)
+                ymax = min(img_height, ymax)
+                if xmax > xmin and ymax > ymin:
+                    boxes.append([xmin, ymin, xmax, ymax])
+                    labels.append(ann["category_id"])
+                else:
+                    print(f"⚠️ Skipping invalid bbox {ann['bbox']} in image {image_info['file_name']} (image_id: {image_info['id']})")
+            if len(boxes) == 0:
+                print(f"⚠️ Skipping entire image {image_info['file_name']} because no valid bounding boxes remain.")
+                return None, None
+            # Convert to tensors
+            boxes = torch.as_tensor(boxes, dtype=torch.float32)
+            labels = torch.as_tensor(labels, dtype=torch.int64)
+            target = {"boxes": boxes, "labels": labels}
+            if self.transforms:
+                image = self.transforms(image)
+            return image, target
+        except Exception as e:
+            print(f"⚠️ Skipping image {image_info['file_name']} due to error: {e}")
+            return None, None
+# Define Image Transformations
+transform = T.Compose([T.ToTensor()])
+# Load Dataset
+full_dataset = CocoDataset(root=DATASET_DIR, annotation_file=ANNOTATIONS_FILE, transforms=transform)
+subset_size = min(10000, len(full_dataset))  # Limit dataset to 10,000 samples or less
+subset_indices = list(range(subset_size))
+dataset = Subset(full_dataset, subset_indices)
+data_loader = DataLoader(dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*[item for item in x if item[0] is not None])))
+# Load Faster R-CNN Model
+model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights="DEFAULT")
+# Freeze Backbone Layers
+for param in model.backbone.parameters():
+    param.requires_grad = False
+# Modify Classifier Head for Custom Classes
+num_classes = 2  # One object class + background
+in_features = model.roi_heads.box_predictor.cls_score.in_features
+model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
+device = torch.device("cpu")
+# # Check for MPS Availability
+# if torch.backends.mps.is_available():
+#     print("✅ Using MPS (Apple Metal GPU)")
+#     device = torch.device("mps")
+# else:
+#     print("⚠️ MPS not available, using CPU")
+#     device = torch.device("cpu")
+model.to(device)
+# Training Setup
+optimizer = optim.Adam(model.parameters(), lr=0.0001)
+num_epochs = 5
+# Training Loop
+for epoch in range(num_epochs):
+    model.train()
+    epoch_loss = 0
+    print(f"Epoch {epoch+1}/{num_epochs}...")
+    for images, targets in tqdm(data_loader, desc=f"Training Epoch {epoch+1}"):
+        images = list(img.to(device) for img in images)
+        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
+        if any(len(t["boxes"]) == 0 for t in targets):
+            print("⚠️ Skipping batch with no valid bounding boxes")
+            continue
+        optimizer.zero_grad()
+        loss_dict = model(images, targets)
+        loss = sum(loss for loss in loss_dict.values())
+        loss.backward()
+        optimizer.step()
+        epoch_loss += loss.item()
+    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}")
+# Save Trained Model
+torch.save(model.state_dict(), "faster_rcnn_custom.pth")
+print("Training Complete! Model saved as 'faster_rcnn_custom.pth'")

src/transformdata.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import json
+import xml.etree.ElementTree as ET
+# Paths (Modify These)
+ANNOTATIONS_DIR = "datasets/annotations"  # Change to your XML annotations folder
+OUTPUT_JSON = "datasets/annotations.json"  # Where to save the COCO JSON
+# COCO JSON Format
+coco_data = {
+    "images": [],
+    "annotations": [],
+    "categories": [{"id": 1, "name": "object"}]  # Only one class
+}
+annotation_id = 0
+# Process Each XML File
+for xml_file in os.listdir(ANNOTATIONS_DIR):
+    if not xml_file.endswith(".xml"):
+        continue
+    try:
+        tree = ET.parse(os.path.join(ANNOTATIONS_DIR, xml_file))
+        root = tree.getroot()
+    except ET.ParseError:
+        print(f"Skipping file due to parsing error: {xml_file}")
+        continue
+    # Extract Image Info
+    filename = root.find("filename").text
+    width = int(root.find("size/width").text)
+    height = int(root.find("size/height").text)
+    image_id = len(coco_data["images"])
+    coco_data["images"].append({
+        "id": image_id,
+        "file_name": filename,
+        "width": width,
+        "height": height
+    })
+    # Extract Objects
+    for obj in root.findall("object"):
+        bbox = obj.find("bndbox")
+        xmin = int(bbox.find("xmin").text)
+        ymin = int(bbox.find("ymin").text)
+        xmax = int(bbox.find("xmax").text)
+        ymax = int(bbox.find("ymax").text)
+        # Convert VOC bbox format (xmin, ymin, xmax, ymax) to COCO format (x, y, width, height)
+        bbox_coco = [xmin, ymin, xmax, ymax]
+        # Add Annotation
+        coco_data["annotations"].append({
+            "id": annotation_id,
+            "image_id": image_id,
+            "category_id": 1,  # Only one class
+            "bbox": bbox_coco,
+            "area": (bbox_coco[2] - bbox_coco[0]) * (bbox_coco[3] - bbox_coco[1]),
+            "iscrowd": 0
+        })
+        annotation_id += 1
+# Save to JSON File
+with open(OUTPUT_JSON, "w") as f:
+    json.dump(coco_data, f, indent=4)
+print(f"COCO annotations saved to {OUTPUT_JSON}")