Spaces:

JohnJoelMota
/

AI-FINAL-PROJECT

Sleeping

JohnJoelMota commited on May 7

Commit

d650227

verified ·

1 Parent(s): aa53b45

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -43,7 +43,6 @@ def recommend_model(image):
         img_array = np.array(image)
         height, width = img_array.shape[:2]
         pixel_variance = np.var(img_array)
-        # Basic heuristic: DETR is better for high-resolution, complex images; Faster R-CNN for smaller, simpler ones
         if height * width > 1000 * 1000 or pixel_variance > 1000:
             return "DETR is recommended for high-resolution or complex images."
         else:
@@ -67,8 +66,11 @@ def detect_objects_frcnn(image, threshold=0.5):
     try:
         threshold = float(threshold) if threshold is not None else 0.5
         transform = FasterRCNN_ResNet50_FPN_Weights.DEFAULT.transforms()
-        image_tensor = transform(image).unsqueeze(0)
         with torch.no_grad():
             prediction = frcnn_model(image_tensor)[0]
@@ -123,7 +125,9 @@ def detect_objects_detr(image, threshold=0.9):
         return Image.open(buf)
     try:
-        inputs = detr_processor(images=image, return_tensors="pt")
         outputs = detr_model(**inputs)
         target_sizes = torch.tensor([image.size[::-1]])
         results = detr_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=threshold)[0]

         img_array = np.array(image)
         height, width = img_array.shape[:2]
         pixel_variance = np.var(img_array)
         if height * width > 1000 * 1000 or pixel_variance > 1000:
             return "DETR is recommended for high-resolution or complex images."
         else:
     try:
         threshold = float(threshold) if threshold is not None else 0.5
+        # Convert image to RGB and ensure float32 for transform
+        image = image.convert('RGB')
+        img_array = np.array(image).astype(np.float32) / 255.0
         transform = FasterRCNN_ResNet50_FPN_Weights.DEFAULT.transforms()
+        image_tensor = transform(Image.fromarray((img_array * 255).astype(np.uint8))).unsqueeze(0)
         with torch.no_grad():
             prediction = frcnn_model(image_tensor)[0]
         return Image.open(buf)
     try:
+        # Convert image to RGB and process with padding
+        image = image.convert('RGB')
+        inputs = detr_processor(images=image, return_tensors="pt", padding=True)
         outputs = detr_model(**inputs)
         target_sizes = torch.tensor([image.size[::-1]])
         results = detr_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=threshold)[0]