Abs6187's picture
Upload 11 files
806bdda verified
"""
Perspective Transformation Module
==================================
Implements perspective transformation for converting camera view coordinates
to real-world top-down coordinates, essential for accurate speed calculation.
Authors:
- Abhay Gupta (0205CC221005)
- Aditi Lakhera (0205CC221011)
- Balraj Patel (0205CC221049)
- Bhumika Patel (0205CC221050)
Mathematical Background:
Perspective transformation uses a 3x3 homography matrix to map points
from one plane to another. This is crucial for converting pixel coordinates
to real-world measurements.
"""
import cv2
import numpy as np
from typing import Tuple
import logging
logger = logging.getLogger(__name__)
class PerspectiveTransformer:
"""
Handles perspective transformation between camera view and real-world coordinates.
This class computes and applies homography transformations to convert
image coordinates to a top-down view with real-world measurements.
"""
def __init__(
self,
source_points: np.ndarray,
target_points: np.ndarray
):
"""
Initialize the perspective transformer.
Args:
source_points: 4 points in source image (camera view)
Shape: (4, 2) with [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
target_points: 4 corresponding points in target space (real-world)
Shape: (4, 2) with same format
Raises:
ValueError: If points are invalid or transformation cannot be computed
"""
# Validate inputs
self._validate_points(source_points, "source")
self._validate_points(target_points, "target")
# Store points as float32 (required by OpenCV)
self.source_pts = source_points.astype(np.float32)
self.target_pts = target_points.astype(np.float32)
# Compute transformation matrix
self.matrix = self._compute_transformation_matrix()
# Compute inverse transformation (for reverse mapping if needed)
self.inverse_matrix = self._compute_inverse_matrix()
logger.info("Perspective transformer initialized successfully")
logger.debug(f"Source points:\n{self.source_pts}")
logger.debug(f"Target points:\n{self.target_pts}")
def _validate_points(self, points: np.ndarray, name: str) -> None:
"""
Validate point array format and values.
Args:
points: Points array to validate
name: Name for error messages
Raises:
ValueError: If points are invalid
"""
if not isinstance(points, np.ndarray):
raise ValueError(f"{name} points must be a numpy array")
if points.shape != (4, 2):
raise ValueError(
f"{name} points must have shape (4, 2), got {points.shape}"
)
if not np.isfinite(points).all():
raise ValueError(f"{name} points contain invalid values (NaN or Inf)")
def _compute_transformation_matrix(self) -> np.ndarray:
"""
Compute the perspective transformation matrix.
Returns:
3x3 homography matrix
Raises:
ValueError: If transformation cannot be computed
"""
try:
matrix = cv2.getPerspectiveTransform(
self.source_pts,
self.target_pts
)
# Validate matrix
if matrix is None or not np.isfinite(matrix).all():
raise ValueError("Invalid transformation matrix computed")
logger.debug(f"Transformation matrix:\n{matrix}")
return matrix
except cv2.error as e:
raise ValueError(f"Failed to compute perspective transform: {e}")
def _compute_inverse_matrix(self) -> np.ndarray:
"""
Compute the inverse transformation matrix.
Returns:
3x3 inverse homography matrix
"""
try:
inverse = cv2.getPerspectiveTransform(
self.target_pts,
self.source_pts
)
return inverse
except Exception as e:
logger.warning(f"Could not compute inverse matrix: {e}")
return None
def apply_transformation(self, points: np.ndarray) -> np.ndarray:
"""
Transform points from source to target coordinate system.
Args:
points: Array of points to transform
Shape: (N, 2) where N is number of points
Returns:
Transformed points in target coordinate system
Shape: (N, 2)
Raises:
ValueError: If points have invalid shape
"""
# Handle empty input
if points.size == 0:
return points
# Validate input shape
if len(points.shape) != 2 or points.shape[1] != 2:
raise ValueError(
f"Points must have shape (N, 2), got {points.shape}"
)
try:
# Reshape for OpenCV: (N, 1, 2)
points_reshaped = points.reshape(-1, 1, 2).astype(np.float32)
# Apply transformation
transformed = cv2.perspectiveTransform(
points_reshaped,
self.matrix
)
# Reshape back to (N, 2)
result = transformed.reshape(-1, 2)
return result
except Exception as e:
logger.error(f"Error applying transformation: {e}")
raise ValueError(f"Transformation failed: {e}")
def apply_inverse_transformation(self, points: np.ndarray) -> np.ndarray:
"""
Transform points from target back to source coordinate system.
Args:
points: Array of points in target coordinates
Shape: (N, 2)
Returns:
Points in source coordinate system
Shape: (N, 2)
Raises:
ValueError: If inverse matrix not available or transformation fails
"""
if self.inverse_matrix is None:
raise ValueError("Inverse transformation matrix not available")
if points.size == 0:
return points
try:
points_reshaped = points.reshape(-1, 1, 2).astype(np.float32)
transformed = cv2.perspectiveTransform(
points_reshaped,
self.inverse_matrix
)
return transformed.reshape(-1, 2)
except Exception as e:
logger.error(f"Error applying inverse transformation: {e}")
raise ValueError(f"Inverse transformation failed: {e}")
def transform_single_point(self, x: float, y: float) -> Tuple[float, float]:
"""
Transform a single point (convenience method).
Args:
x: X coordinate in source system
y: Y coordinate in source system
Returns:
Tuple of (x, y) in target system
"""
point = np.array([[x, y]], dtype=np.float32)
transformed = self.apply_transformation(point)
return tuple(transformed[0])
def get_transformation_matrix(self) -> np.ndarray:
"""
Get the transformation matrix.
Returns:
3x3 homography matrix
"""
return self.matrix.copy()
def get_scale_factors(self) -> Tuple[float, float]:
"""
Estimate scale factors in x and y directions.
Returns:
Tuple of (scale_x, scale_y) representing pixels per meter
"""
# Transform corners to estimate scaling
source_width = np.linalg.norm(self.source_pts[1] - self.source_pts[0])
source_height = np.linalg.norm(self.source_pts[3] - self.source_pts[0])
target_width = np.linalg.norm(self.target_pts[1] - self.target_pts[0])
target_height = np.linalg.norm(self.target_pts[3] - self.target_pts[0])
scale_x = source_width / target_width if target_width > 0 else 1.0
scale_y = source_height / target_height if target_height > 0 else 1.0
return scale_x, scale_y