Addax-Data-Science
/

SPECIESNET-v4-0-1-A-v1

Model card Files Files and versions

xet

Community

Addax-Data-Science commited on Apr 3

Commit

db268f4

verified ·

1 Parent(s): 27da5c6

Upload inference.py

Browse files

Files changed (1) hide show

inference.py +42 -38

inference.py CHANGED Viewed

@@ -133,28 +133,39 @@ class ModelInference:
         self, image: Image.Image, bbox: tuple[float, float, float, float]
     ) -> Image.Image:
         """
-        Return the full image (cropping happens in get_tensor/get_classification).
-        The official SpeciesNet pipeline crops on the float32 tensor, not
-        the PIL image. Cropping is deferred so the bbox can be applied
-        after pil_to_tensor + convert_image_dtype, matching the official
-        preprocessing exactly.
-        The bbox is stored on the returned image via the info dict so
-        that get_tensor() can apply it at the right stage.
         """
-        img = image.copy()
-        img.info["_bbox"] = bbox
-        return img
     def get_classification(
         self, crop: Image.Image
     ) -> list[list[str | float]]:
         """
-        Run SpeciesNet classification on a single image.
         Args:
-            crop: PIL Image with optional _bbox in info dict
         Returns:
             List of [class_name, confidence] lists for ALL classes.
@@ -166,7 +177,19 @@ class ModelInference:
         if self.model is None:
             raise RuntimeError("Model not loaded, call load_model() first")
-        img_arr = self.get_tensor(crop)
         input_batch = torch.from_numpy(img_arr).unsqueeze(0).to(self.device)
         with torch.no_grad():
@@ -193,32 +216,13 @@ class ModelInference:
             str(i + 1): name for i, name in enumerate(self.class_names)
         }
-    def get_tensor(self, image: Image.Image):
-        """Preprocess an image into a numpy array for batch inference.
-        Matches the official SpeciesNet preprocessing exactly:
-        PIL -> CHW float32 [0,1] -> crop on tensor -> resize -> uint8 -> HWC /255
-        """
-        if image.mode != "RGB":
-            image = image.convert("RGB")
-        img_tensor = TF.pil_to_tensor(image)
         img_tensor = TF.convert_image_dtype(img_tensor, torch.float32)
-        # Crop on the float32 tensor (matching official API)
-        bbox = image.info.get("_bbox")
-        if bbox:
-            W, H = image.size
-            x, y, w, h = bbox
-            crop_top = int(y * H)
-            crop_left = int(x * W)
-            crop_h = int(h * H)
-            crop_w = int(w * W)
-            if crop_w > 0 and crop_h > 0:
-                img_tensor = TF.crop(
-                    img_tensor, crop_top, crop_left, crop_h, crop_w
-                )
         img_tensor = TF.resize(
             img_tensor, [IMG_SIZE, IMG_SIZE], antialias=False
         )
@@ -239,4 +243,4 @@ class ModelInference:
                 for i in range(len(self.class_names))
             ]
             results.append(classifications)
-        return results

         self, image: Image.Image, bbox: tuple[float, float, float, float]
     ) -> Image.Image:
         """
+        Crop image using normalized bounding box coordinates.
+        Matches SpeciesNet's preprocessing: crop using int() truncation
+        (not rounding) to match torchvision.transforms.functional.crop().
+        Args:
+            image: PIL Image (full resolution)
+            bbox: Normalized bounding box (x, y, width, height) in range [0.0, 1.0]
+        Returns:
+            Cropped PIL Image
         """
+        W, H = image.size
+        x, y, w, h = bbox
+        left = int(x * W)
+        top = int(y * H)
+        crop_w = int(w * W)
+        crop_h = int(h * H)
+        if crop_w <= 0 or crop_h <= 0:
+            return image
+        return image.crop((left, top, left + crop_w, top + crop_h))
     def get_classification(
         self, crop: Image.Image
     ) -> list[list[str | float]]:
         """
+        Run SpeciesNet classification on a cropped image.
         Args:
+            crop: Cropped and preprocessed PIL Image
         Returns:
             List of [class_name, confidence] lists for ALL classes.
         if self.model is None:
             raise RuntimeError("Model not loaded, call load_model() first")
+        if crop.mode != "RGB":
+            crop = crop.convert("RGB")
+        # Match SpeciesNet's exact preprocessing pipeline:
+        # PIL -> CHW float32 [0,1] -> resize -> uint8 -> /255 -> HWC
+        img_tensor = TF.pil_to_tensor(crop)
+        img_tensor = TF.convert_image_dtype(img_tensor, torch.float32)
+        img_tensor = TF.resize(
+            img_tensor, [IMG_SIZE, IMG_SIZE], antialias=False
+        )
+        img_tensor = TF.convert_image_dtype(img_tensor, torch.uint8)
+        # HWC float32 [0, 1] (matching speciesnet's img.arr / 255)
+        img_arr = img_tensor.permute(1, 2, 0).numpy().astype("float32") / 255.0
         input_batch = torch.from_numpy(img_arr).unsqueeze(0).to(self.device)
         with torch.no_grad():
             str(i + 1): name for i, name in enumerate(self.class_names)
         }
+    def get_tensor(self, crop: Image.Image):
+        """Preprocess a crop into a numpy array for batch inference."""
+        if crop.mode != "RGB":
+            crop = crop.convert("RGB")
+        img_tensor = TF.pil_to_tensor(crop)
         img_tensor = TF.convert_image_dtype(img_tensor, torch.float32)
         img_tensor = TF.resize(
             img_tensor, [IMG_SIZE, IMG_SIZE], antialias=False
         )
                 for i in range(len(self.class_names))
             ]
             results.append(classifications)
+        return results