Spaces:

algoryn
/

validation

Sleeping

tommulder commited on Sep 30

Commit

95db528

1 Parent(s): ce81055

Deploy gesture detection & validation API

- Add Docker container with FastAPI application
- Include gesture detection and identity validation endpoints
- Add ONNX models for hand detection and classification
- Provide comprehensive API documentation
- Support for multiple gesture types: thumbs_up, peace, ok_sign, open_palm, call_me, grabbing
- Facial validation in placeholder mode (always returns success)
- Gesture validation fully functional with configurable parameters

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +57 -0
README.md +121 -6
main.py +337 -0
models/crops_classifier.onnx +3 -0
models/hand_detector.onnx +3 -0
requirements.txt +13 -0
src/.DS_Store +0 -0
src/facialembeddingsmatch/__init__.py +15 -0
src/facialembeddingsmatch/__pycache__/__init__.cpython-312.pyc +0 -0
src/facialembeddingsmatch/__pycache__/facial_matcher.cpython-312.pyc +0 -0
src/facialembeddingsmatch/facial_matcher.py +433 -0
src/gesturedetection/.DS_Store +0 -0
src/gesturedetection/__init__.py +23 -0
src/gesturedetection/__pycache__/__init__.cpython-312.pyc +0 -0
src/gesturedetection/__pycache__/api.cpython-312.pyc +0 -0
src/gesturedetection/__pycache__/config.cpython-312.pyc +0 -0
src/gesturedetection/__pycache__/main_controller.cpython-312.pyc +0 -0
src/gesturedetection/__pycache__/models.cpython-312.pyc +0 -0
src/gesturedetection/__pycache__/onnx_models.cpython-312.pyc +0 -0
src/gesturedetection/api.py +318 -0
src/gesturedetection/config.py +55 -0
src/gesturedetection/main_controller.py +271 -0
src/gesturedetection/models.py +89 -0
src/gesturedetection/ocsort/__init__.py +2 -0
src/gesturedetection/ocsort/__pycache__/__init__.cpython-312.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/__init__.cpython-39.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/association.cpython-312.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/association.cpython-39.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/kalmanboxtracker.cpython-312.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/kalmanboxtracker.cpython-39.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/kalmanfilter.cpython-312.pyc +0 -0
src/gesturedetection/ocsort/__pycache__/kalmanfilter.cpython-39.pyc +0 -0
src/gesturedetection/ocsort/association.py +511 -0
src/gesturedetection/ocsort/kalmanboxtracker.py +157 -0
src/gesturedetection/ocsort/kalmanfilter.py +1557 -0
src/gesturedetection/onnx_models.py +194 -0
src/gesturedetection/utils/__init__.py +16 -0
src/gesturedetection/utils/__pycache__/__init__.cpython-312.pyc +0 -0
src/gesturedetection/utils/__pycache__/__init__.cpython-39.pyc +0 -0
src/gesturedetection/utils/__pycache__/action_controller.cpython-312.pyc +0 -0
src/gesturedetection/utils/__pycache__/action_controller.cpython-39.pyc +0 -0
src/gesturedetection/utils/__pycache__/box_utils_numpy.cpython-312.pyc +0 -0
src/gesturedetection/utils/__pycache__/box_utils_numpy.cpython-39.pyc +0 -0
src/gesturedetection/utils/__pycache__/drawer.cpython-312.pyc +0 -0
src/gesturedetection/utils/__pycache__/drawer.cpython-39.pyc +0 -0
src/gesturedetection/utils/__pycache__/enums.cpython-312.pyc +0 -0
src/gesturedetection/utils/__pycache__/enums.cpython-39.pyc +0 -0
src/gesturedetection/utils/__pycache__/hand.cpython-312.pyc +0 -0
src/gesturedetection/utils/__pycache__/hand.cpython-39.pyc +0 -0
src/gesturedetection/utils/action_controller.py +598 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,57 @@

+# Use Python 3.12 as base image
+FROM python:3.12-slim
+# Install system dependencies including OpenCV requirements
+RUN apt-get update && apt-get install -y \
+    curl \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+# Set up a new user named "user" with user ID 1000 (HF Spaces requirement)
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Upgrade pip and install dependencies
+RUN pip install --no-cache-dir --upgrade pip
+# Copy requirements first for better Docker layer caching
+COPY --chown=user docker/requirements.txt $HOME/app/
+# Install Python dependencies
+RUN pip install --no-cache-dir --user -r requirements.txt
+# Copy the source code from parent directory
+COPY --chown=user ../src/ $HOME/app/src/
+COPY --chown=user ../models/ $HOME/app/models/
+# Copy the main entry point from parent directory
+COPY --chown=user ../main.py $HOME/app/
+COPY --chown=user ../README.md $HOME/app/
+# Expose the port that the app runs on (HF Spaces default is 7860)
+EXPOSE 7860
+# Set environment variables
+ENV PYTHONPATH=$HOME/app
+ENV PORT=7860
+# Health check to ensure the API is running
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# Start the application directly
+CMD ["python", "main.py"]

README.md CHANGED Viewed

@@ -1,11 +1,126 @@
 ---
-title: Validation
-emoji: 🐢
-colorFrom: purple
-colorTo: yellow
 sdk: docker
 pinned: false
-license: other
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Gesture Detection & Identity Validation API
+emoji: 👋
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+license: mit
+app_port: 7860
 ---
+# 👋 Gesture Detection & Identity Validation API
+A unified API for gesture detection in videos and identity validation using facial recognition and gesture verification.
+## 🚀 Features
+- **Gesture Detection**: Detect and track hand gestures in video files
+- **Identity Validation**: Validate user identity using facial recognition and required gestures
+- **Real-time Processing**: Efficient video processing with configurable frame skip
+- **RESTful API**: Clean, documented API endpoints
+## 📋 API Endpoints
+### `GET /`
+Get API information and available endpoints
+### `GET /health`
+Health check endpoint showing service status
+### `POST /gestures`
+Detect gestures in an uploaded video file
+**Parameters:**
+- `video` (file): Video file to process
+- `frame_skip` (int, optional): Number of frames to skip (default: 1)
+**Response:**
+```json
+{
+  "gestures": [
+    {
+      "gesture": "thumbs_up",
+      "duration": 45,
+      "confidence": 0.92
+    }
+  ]
+}
+```
+### `POST /validate`
+Validate user identity using facial recognition and gesture verification
+**Parameters:**
+- `photo` (file): ID document photo
+- `video` (file): User video containing face and gestures
+- `gestures` (JSON array): Required gestures (e.g., `["thumbs_up","peace"]`)
+- `error_margin` (float, optional): Error margin for validation (default: 0.33)
+- `require_all_gestures` (bool, optional): Whether all gestures must be present
+- `similarity_threshold` (float, optional): Facial similarity threshold
+- `include_details` (bool, optional): Include detailed validation results
+**Response:**
+```json
+{
+  "face": true,
+  "gestures": true,
+  "overall": true,
+  "status": "success",
+  "processing_time_ms": 6925,
+  "timestamp": "2025-09-30T08:30:22Z"
+}
+```
+## 🎯 Supported Gestures
+- `thumbs_up` (👍)
+- `peace` (✌️)
+- `ok_sign` (👌)
+- `open_palm` (👋)
+- `call_me` (🤙)
+- `grabbing` (✊)
+## 📖 Documentation
+Interactive API documentation is available at:
+- **Swagger UI**: `/docs`
+- **ReDoc**: `/redoc`
+## 🔧 Usage Example
+```bash
+# Detect gestures in a video
+curl -X POST http://localhost:7860/gestures \
+  -F "video=@my_video.mp4" \
+  -F "frame_skip=3"
+# Validate identity
+curl -X POST http://localhost:7860/validate \
+  -F "photo=@id_photo.jpg" \
+  -F "video=@user_video.mp4" \
+  -F 'gestures=["thumbs_up","peace"]' \
+  -F "include_details=true"
+```
+## 🏗️ Technology Stack
+- **Framework**: FastAPI
+- **ML Models**: ONNX Runtime
+- **Computer Vision**: OpenCV
+- **Tracking**: OCSort with Kalman filters
+- **Facial Recognition**: Custom embeddings module
+## 📝 Note
+Facial validation is currently in placeholder mode and always returns success. Gesture validation is fully functional.
+## 📄 License
+MIT License - See LICENSE file for details
+## 🔗 Links
+- [GitHub Repository](https://github.com/kybtech/gesture-detection)
+- [API Documentation](/docs)
+- [Hugging Face Space](https://huggingface.co/spaces/algoryn/validation)

main.py ADDED Viewed

	@@ -0,0 +1,337 @@

+#!/usr/bin/env python3
+"""
+Main entry point for the unified gesture detection and identity validation API.
+Provides a flat API structure with all endpoints at the root level.
+"""
+import uvicorn
+import os
+import sys
+import tempfile
+import time
+import json
+import logging
+from typing import Optional
+from datetime import datetime, timezone
+# Add the project root to Python path
+project_root = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, project_root)
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Depends
+from fastapi.responses import ORJSONResponse
+# Import gesture detection functionality
+from src.gesturedetection.api import process_video_for_gestures
+from src.gesturedetection.models import GestureResponse
+# Import validation functionality
+from src.validate.models import ValidationRequest, ValidationResponse, ValidationStatus
+from src.validate.facial_validator import FacialValidator
+from src.validate.gesture_validator import GestureValidator
+from src.validate.api import get_validation_request
+from src.validate.config import config
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Create main FastAPI application
+app = FastAPI(
+    title="Gesture Detection & Identity Validation API",
+    description="Unified API for gesture detection and identity validation services",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc",
+    default_response_class=ORJSONResponse
+)
+# Initialize validators for validation endpoint
+facial_validator = FacialValidator()
+gesture_validator = GestureValidator()
+@app.get("/")
+async def root():
+    """
+    Root endpoint providing API information.
+    Returns
+    -------
+    dict
+        API information and available endpoints
+    """
+    return {
+        "name": "Gesture Detection & Identity Validation API",
+        "version": "1.0.0",
+        "description": "Unified API providing gesture detection and identity validation services",
+        "endpoints": {
+            "GET /": "API information",
+            "GET /health": "Health check",
+            "POST /validate": "Validate identity using facial recognition and gestures",
+            "POST /gestures": "Detect gestures in video",
+            "GET /docs": "Interactive API documentation"
+        }
+    }
+@app.get("/health")
+async def health():
+    """
+    Health check endpoint for the unified API.
+    Returns
+    -------
+    dict
+        Health status of all service components
+    """
+    return {
+        "status": "healthy",
+        "service": "unified-api",
+        "version": "1.0.0",
+        "timestamp": datetime.now(timezone.utc).isoformat(),
+        "components": {
+            "gesture_detection": "available",
+            "identity_validation": "available",
+            "facial_validator": "initialized",
+            "gesture_validator": "initialized"
+        }
+    }
+@app.post("/gestures", response_model=GestureResponse)
+async def detect_gestures(video: UploadFile = File(...), frame_skip: int = Form(1)):
+    """
+    Detect gestures in an uploaded video file.
+    Parameters
+    ----------
+    video : UploadFile
+        The video file to process
+    frame_skip : int
+        Number of frames to skip between processing (1 = process every frame, 3 = process every 3rd frame)
+    Returns
+    -------
+    GestureResponse
+        Response containing detected gestures with duration and confidence
+    """
+    logger.info(f"Gesture detection request received: {video.filename}")
+    # Validate file type
+    if not video.content_type or not video.content_type.startswith('video/'):
+        raise HTTPException(status_code=400, detail="File must be a video")
+    # Create temporary file to save uploaded video
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
+        try:
+            # Write uploaded content to temporary file
+            content = await video.read()
+            temp_file.write(content)
+            temp_file.flush()
+            logger.info(f"Processing video: {temp_file.name} ({len(content)} bytes)")
+            # Process the video with frame skip parameter
+            gestures = process_video_for_gestures(temp_file.name, frame_skip=frame_skip)
+            logger.info(f"Gesture detection completed: {len(gestures)} gestures detected")
+            return GestureResponse(gestures=gestures)
+        except Exception as e:
+            logger.error(f"Error processing video: {str(e)}", exc_info=True)
+            raise HTTPException(status_code=500, detail=f"Error processing video: {str(e)}")
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_file.name):
+                os.unlink(temp_file.name)
+                logger.debug(f"Cleaned up temporary file: {temp_file.name}")
+@app.post("/validate", response_model=ValidationResponse)
+async def validate_identity(
+    photo: UploadFile = File(...),
+    video: UploadFile = File(...),
+    request: ValidationRequest = Depends(get_validation_request)
+):
+    """
+    Validate user identity using facial recognition and gesture validation.
+    This endpoint accepts an ID document photo, a user video containing
+    the person's face and required gestures, and a list of gestures that
+    must be performed. It returns validation results for both facial
+    recognition and gesture compliance.
+    Parameters
+    ----------
+    photo : UploadFile
+        ID document photo file (image format)
+    video : UploadFile
+        User video file containing face and gestures (video format)
+    request : ValidationRequest
+        Validation configuration and gesture requirements
+    Returns
+    -------
+    ValidationResponse
+        Validation results with success indicators and optional details
+    Raises
+    ------
+    HTTPException
+        If validation fails or processing errors occur
+    """
+    start_time = time.time()
+    logger.info(f"Identity validation request received for {request.asked_gestures}")
+    # Validate file types
+    if not photo.content_type or not photo.content_type.startswith(('image/', 'application/')):
+        raise HTTPException(
+            status_code=400,
+            detail="Photo file must be an image"
+        )
+    if not video.content_type or not video.content_type.startswith('video/'):
+        raise HTTPException(
+            status_code=400,
+            detail="Video file must be a video"
+        )
+    # Validate file sizes (basic check)
+    MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB
+    if photo.size and photo.size > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=413,
+            detail="Photo file too large (max 100MB)"
+        )
+    if video.size and video.size > MAX_FILE_SIZE:
+        raise HTTPException(
+            status_code=413,
+            detail="Video file too large (max 100MB)"
+        )
+    # Create temporary files for processing
+    temp_photo = None
+    temp_video = None
+    try:
+        # Save uploaded files to temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f"_photo.{photo.filename.split('.')[-1] if '.' in photo.filename else 'jpg'}") as temp_photo_file:
+            temp_photo = temp_photo_file.name
+            photo_content = await photo.read()
+            temp_photo_file.write(photo_content)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=f"_video.{video.filename.split('.')[-1] if '.' in video.filename else 'mp4'}") as temp_video_file:
+            temp_video = temp_video_file.name
+            video_content = await video.read()
+            temp_video_file.write(video_content)
+        logger.info(f"Files saved: photo={temp_photo}, video={temp_video}")
+        # Perform facial validation
+        logger.info("Starting facial validation")
+        # Update facial validator with request-specific parameters if provided
+        if request.similarity_threshold is not None:
+            facial_validator.similarity_threshold = request.similarity_threshold
+        if request.frame_sample_rate is not None:
+            facial_validator.frame_sample_rate = request.frame_sample_rate
+        face_result = facial_validator.validate_facial_match(temp_photo, temp_video)
+        # Perform gesture validation
+        logger.info("Starting gesture validation")
+        # Update gesture validator with request-specific parameters if provided
+        if request.confidence_threshold is not None:
+            gesture_validator.confidence_threshold = request.confidence_threshold
+        if request.min_gesture_duration is not None:
+            gesture_validator.min_gesture_duration = request.min_gesture_duration
+        gesture_result = gesture_validator.validate_gestures(
+            temp_video,
+            request.asked_gestures,
+            error_margin=request.error_margin,
+            require_all=request.require_all_gestures
+        )
+        # Determine overall result
+        overall_success = face_result.success and gesture_result.success
+        overall_status = ValidationStatus.SUCCESS if overall_success else ValidationStatus.PARTIAL
+        # Calculate processing time
+        processing_time_ms = int((time.time() - start_time) * 1000)
+        # Build response
+        response = ValidationResponse(
+            face=face_result.success,
+            gestures=gesture_result.success,
+            overall=overall_success,
+            status=overall_status,
+            face_result=face_result if request.include_details else None,
+            gesture_result=gesture_result if request.include_details else None,
+            processing_time_ms=processing_time_ms,
+            timestamp=datetime.now(timezone.utc).isoformat()
+        )
+        # Log results
+        logger.info(
+            "Identity validation completed",
+            extra={
+                "face_success": face_result.success,
+                "gesture_success": gesture_result.success,
+                "overall_success": overall_success,
+                "processing_time_ms": processing_time_ms,
+                "requested_gestures": request.asked_gestures
+            }
+        )
+        return response
+    except Exception as e:
+        logger.error(f"Error during identity validation: {str(e)}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Internal server error during validation: {str(e)}"
+        )
+    finally:
+        # Clean up temporary files
+        for temp_file in [temp_photo, temp_video]:
+            if temp_file and os.path.exists(temp_file):
+                try:
+                    os.unlink(temp_file)
+                    logger.debug(f"Cleaned up temporary file: {temp_file}")
+                except Exception as e:
+                    logger.warning(f"Failed to clean up temporary file {temp_file}: {e}")
+def main():
+    """Start the unified API server."""
+    # Get port from environment variable, default to 7860 for HF Spaces compatibility
+    port = int(os.getenv("PORT", 7860))
+    print("🚀 Starting Unified Gesture Detection & Identity Validation API")
+    print(f"📍 API will be available at: http://localhost:{port}")
+    print(f"📚 API documentation at: http://localhost:{port}/docs")
+    print(f"❤️  Health check at: http://localhost:{port}/health")
+    print(f"🔐 Identity validation at: POST http://localhost:{port}/validate")
+    print(f"👋 Gesture detection at: POST http://localhost:{port}/gestures")
+    print("\nPress Ctrl+C to stop the server")
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=port,
+        reload=False,  # Disable reload in production/Docker
+        log_level="info"
+    )
+if __name__ == "__main__":
+    main()

models/crops_classifier.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12a02344f63a7c4f2a2ca90f8740ca10a08c17b683b5585d73c3e88323056762
+size 411683

models/hand_detector.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8ef73d466b61a8e8677be9c47008b217a11d1b265d95e36bf2521ff93329af6
+size 1219959

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# Direct dependencies from pyproject.toml
+filterpy>=1.4.5
+onnx>=1.19.0
+onnxruntime>=1.22.1
+opencv-contrib-python>=4.12.0.88
+fastapi>=0.104.0
+pydantic>=2.0.0
+uvicorn>=0.24.0
+python-multipart>=0.0.6
+orjson>=3.9.0
+numpy>=1.24.0
+scipy>=1.11.0
+logfire[fastapi,sqlite3,httpx]>=0.0.0

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/facialembeddingsmatch/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""
+Facial embeddings matching module for identity verification.
+This module provides facial recognition functionality using embedding-based
+matching algorithms. It handles face detection, feature extraction, and
+similarity comparison for identity verification purposes.
+"""
+__version__ = "1.0.0"
+__all__ = [
+    "FacialEmbeddingMatcher",
+    "FaceDetector",
+    "EmbeddingExtractor",
+    "SimilarityCalculator"
+]

src/facialembeddingsmatch/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (617 Bytes). View file

src/facialembeddingsmatch/__pycache__/facial_matcher.cpython-312.pyc ADDED Viewed

Binary file (13.5 kB). View file

src/facialembeddingsmatch/facial_matcher.py ADDED Viewed

	@@ -0,0 +1,433 @@

+"""
+Facial embedding matcher for identity verification.
+This module provides comprehensive facial recognition functionality including
+face detection, embedding extraction, and similarity comparison. It serves
+as the core facial matching component for the identity validation system.
+"""
+import os
+import logging
+import tempfile
+from typing import List, Dict, Any, Optional, Tuple
+from datetime import datetime, timezone
+import numpy as np
+logger = logging.getLogger(__name__)
+class FaceDetector:
+    """
+    Face detection component for identifying faces in images.
+    This class handles face detection in both ID photos and video frames.
+    Currently implemented as a stub, designed to be replaced with actual
+    face detection algorithms (e.g., MTCNN, DLib, or OpenCV cascades).
+    """
+    def __init__(self, confidence_threshold: float = 0.8):
+        """
+        Initialize the face detector.
+        Parameters
+        ----------
+        confidence_threshold : float, optional
+            Minimum confidence threshold for face detection, by default 0.8
+        """
+        self.confidence_threshold = confidence_threshold
+        logger.info(f"FaceDetector initialized with confidence_threshold={confidence_threshold}")
+    def detect_faces(self, image_path: str) -> List[Dict[str, Any]]:
+        """
+        Detect faces in an image.
+        This is currently a stub implementation that simulates face detection.
+        In the future, this will be replaced with actual face detection algorithms.
+        Parameters
+        ----------
+        image_path : str
+            Path to the image file
+        Returns
+        -------
+        List[Dict[str, Any]]
+            List of detected faces with bounding boxes and confidence scores
+        """
+        logger.debug(f"Detecting faces in {image_path} (stub implementation)")
+        # Validate input file
+        if not os.path.exists(image_path):
+            logger.error(f"Image file not found: {image_path}")
+            raise FileNotFoundError(f"Image file not found: {image_path}")
+        # Stub implementation: simulate detecting one face
+        # In a real implementation, this would use actual face detection
+        detected_faces = [
+            {
+                "bbox": [100, 100, 200, 200],  # x1, y1, x2, y2
+                "confidence": 0.95,
+                "landmarks": None,  # Facial landmarks if available
+                "image_path": image_path
+            }
+        ]
+        logger.debug(f"Detected {len(detected_faces)} faces")
+        return detected_faces
+class EmbeddingExtractor:
+    """
+    Facial embedding extraction component.
+    This class extracts facial feature embeddings from detected faces.
+    Currently implemented as a stub, designed to be replaced with actual
+    embedding extraction models (e.g., FaceNet, ArcFace, or VGGFace).
+    """
+    def __init__(self, model_path: Optional[str] = None):
+        """
+        Initialize the embedding extractor.
+        Parameters
+        ----------
+        model_path : Optional[str], optional
+            Path to the embedding extraction model, by default None
+        """
+        self.model_path = model_path
+        logger.info(f"EmbeddingExtractor initialized with model_path={model_path}")
+    def extract_embedding(self, image_path: str, face_bbox: List[int]) -> Optional[np.ndarray]:
+        """
+        Extract facial embedding from a face region.
+        This is currently a stub implementation that returns a random embedding.
+        In the future, this will extract actual facial embeddings using deep learning models.
+        Parameters
+        ----------
+        image_path : str
+            Path to the image file
+        face_bbox : List[int]
+            Bounding box coordinates [x1, y1, x2, y2]
+        Returns
+        -------
+        Optional[np.ndarray]
+            Facial embedding vector, or None if extraction fails
+        """
+        logger.debug(f"Extracting embedding from {image_path} with bbox {face_bbox}")
+        # Validate input file
+        if not os.path.exists(image_path):
+            logger.error(f"Image file not found: {image_path}")
+            return None
+        # Stub implementation: return deterministic 128-dimensional embedding for testing
+        # In a real implementation, this would use a trained model
+        # Use a seed based on the image path to make it deterministic for testing
+        import hashlib
+        seed = int(hashlib.md5(image_path.encode()).hexdigest()[:8], 16) % 2**32
+        np.random.seed(seed)
+        embedding = np.random.randn(128).astype(np.float32)
+        # Normalize the embedding
+        embedding = embedding / np.linalg.norm(embedding)
+        logger.debug(f"Extracted embedding with shape {embedding.shape}")
+        return embedding
+class SimilarityCalculator:
+    """
+    Similarity calculation component for comparing facial embeddings.
+    This class computes similarity scores between facial embeddings using
+    various distance metrics. Currently supports cosine similarity.
+    """
+    def __init__(self):
+        """Initialize the similarity calculator."""
+        logger.info("SimilarityCalculator initialized")
+    def calculate_similarity(self, embedding1: np.ndarray, embedding2: np.ndarray) -> float:
+        """
+        Calculate similarity between two facial embeddings.
+        Parameters
+        ----------
+        embedding1 : np.ndarray
+            First facial embedding
+        embedding2 : np.ndarray
+            Second facial embedding
+        Returns
+        -------
+        float
+            Similarity score between 0.0 (dissimilar) and 1.0 (identical)
+        """
+        # Calculate cosine similarity
+        dot_product = np.dot(embedding1, embedding2)
+        norm1 = np.linalg.norm(embedding1)
+        norm2 = np.linalg.norm(embedding2)
+        if norm1 == 0 or norm2 == 0:
+            return 0.0
+        cosine_similarity = dot_product / (norm1 * norm2)
+        # Convert to similarity score (0.0 to 1.0)
+        similarity = (cosine_similarity + 1.0) / 2.0
+        logger.debug(f"Calculated similarity: {similarity}")
+        return similarity
+class FacialEmbeddingMatcher:
+    """
+    Main facial embedding matcher for identity verification.
+    This class orchestrates the complete facial recognition pipeline:
+    face detection, embedding extraction, and similarity comparison.
+    It serves as the primary interface for facial matching functionality.
+    """
+    def __init__(
+        self,
+        detector_confidence: float = 0.8,
+        similarity_threshold: float = 0.7,
+        embedding_model_path: Optional[str] = None
+    ):
+        """
+        Initialize the facial embedding matcher.
+        Parameters
+        ----------
+        detector_confidence : float, optional
+            Confidence threshold for face detection, by default 0.8
+        similarity_threshold : float, optional
+            Similarity threshold for facial matching, by default 0.7
+        embedding_model_path : Optional[str], optional
+            Path to embedding extraction model, by default None
+        """
+        self.detector_confidence = detector_confidence
+        self.similarity_threshold = similarity_threshold
+        self.embedding_model_path = embedding_model_path
+        # Initialize components
+        self.face_detector = FaceDetector(confidence_threshold=detector_confidence)
+        self.embedding_extractor = EmbeddingExtractor(model_path=embedding_model_path)
+        self.similarity_calculator = SimilarityCalculator()
+        logger.info(
+            "FacialEmbeddingMatcher initialized",
+            extra={
+                "detector_confidence": detector_confidence,
+                "similarity_threshold": similarity_threshold,
+                "embedding_model_path": embedding_model_path
+            }
+        )
+    def match_faces(
+        self,
+        id_image_path: str,
+        video_path: str,
+        frame_sample_rate: int = 10
+    ) -> Dict[str, Any]:
+        """
+        Match faces between ID image and video frames.
+        This method performs comprehensive facial matching by:
+        1. Detecting faces in the ID image
+        2. Sampling frames from the video and detecting faces
+        3. Extracting embeddings from detected faces
+        4. Computing similarity scores
+        5. Determining overall match result
+        Parameters
+        ----------
+        id_image_path : str
+            Path to the ID document image
+        video_path : str
+            Path to the user video
+        frame_sample_rate : int, optional
+            Rate at which to sample video frames, by default 10
+        Returns
+        -------
+        Dict[str, Any]
+            Matching results with similarity scores and metadata
+        """
+        logger.info(f"Starting facial matching between {id_image_path} and {video_path}")
+        try:
+            # Step 1: Extract reference embedding from ID image
+            id_faces = self.face_detector.detect_faces(id_image_path)
+            if not id_faces:
+                return {
+                    "success": False,
+                    "error": "No faces detected in ID image",
+                    "similarity_score": 0.0,
+                    "matches": False,
+                    "details": {
+                        "id_faces_detected": 0,
+                        "video_faces_detected": 0,
+                        "processing_timestamp": datetime.now(timezone.utc).isoformat()
+                    }
+                }
+        except FileNotFoundError as e:
+            return {
+                "success": False,
+                "error": f"File not found: {str(e)}",
+                "similarity_score": 0.0,
+                "matches": False,
+                "details": {
+                    "id_faces_detected": 0,
+                    "video_faces_detected": 0,
+                    "processing_timestamp": datetime.now(timezone.utc).isoformat()
+                }
+            }
+            # Extract embedding from the first (best) face in ID image
+            id_face = id_faces[0]
+            id_embedding = self.embedding_extractor.extract_embedding(
+                id_image_path, id_face["bbox"]
+            )
+            if id_embedding is None:
+                return {
+                    "success": False,
+                    "error": "Failed to extract embedding from ID image",
+                    "similarity_score": 0.0,
+                    "matches": False,
+                    "details": {
+                        "id_faces_detected": len(id_faces),
+                        "video_faces_detected": 0,
+                        "processing_timestamp": datetime.now(timezone.utc).isoformat()
+                    }
+                }
+            # Step 2: Extract faces from video frames
+            video_faces = self._extract_faces_from_video(video_path, frame_sample_rate)
+            if not video_faces:
+                return {
+                    "success": False,
+                    "error": "No faces detected in video",
+                    "similarity_score": 0.0,
+                    "matches": False,
+                    "details": {
+                        "id_faces_detected": len(id_faces),
+                        "video_faces_detected": 0,
+                        "processing_timestamp": datetime.now(timezone.utc).isoformat()
+                    }
+                }
+            # Step 3: Compare embeddings and find best match
+            best_similarity = 0.0
+            best_video_face = None
+            for video_face in video_faces:
+                video_embedding = self.embedding_extractor.extract_embedding(
+                    video_path, video_face["bbox"]
+                )
+                if video_embedding is not None:
+                    similarity = self.similarity_calculator.calculate_similarity(
+                        id_embedding, video_embedding
+                    )
+                    if similarity > best_similarity:
+                        best_similarity = similarity
+                        best_video_face = video_face
+            # Step 4: Determine if faces match
+            matches = best_similarity >= self.similarity_threshold
+            result = {
+                "success": True,
+                "matches": matches,
+                "similarity_score": best_similarity,
+                "similarity_threshold": self.similarity_threshold,
+                "details": {
+                    "id_faces_detected": len(id_faces),
+                    "video_faces_detected": len(video_faces),
+                    "best_video_face": best_video_face,
+                    "processing_timestamp": datetime.now(timezone.utc).isoformat(),
+                    "frame_sample_rate": frame_sample_rate,
+                    "note": "This is a stub implementation. Real facial recognition will be implemented in the future."
+                }
+            }
+            logger.info(
+                "Facial matching completed",
+                extra={
+                    "matches": matches,
+                    "similarity_score": best_similarity,
+                    "faces_detected_id": len(id_faces),
+                    "faces_detected_video": len(video_faces)
+                }
+            )
+            return result
+        except Exception as e:
+            logger.error(f"Error during facial matching: {str(e)}", exc_info=True)
+            return {
+                "success": False,
+                "error": f"Processing error: {str(e)}",
+                "similarity_score": 0.0,
+                "matches": False,
+                "details": {
+                    "processing_timestamp": datetime.now(timezone.utc).isoformat()
+                }
+            }
+    def _extract_faces_from_video(self, video_path: str, frame_sample_rate: int) -> List[Dict[str, Any]]:
+        """
+        Extract faces from video frames.
+        This method samples frames from the video and detects faces in each frame.
+        Currently implemented as a stub that simulates face detection.
+        Parameters
+        ----------
+        video_path : str
+            Path to the video file
+        frame_sample_rate : int
+            Rate at which to sample frames
+        Returns
+        -------
+        List[Dict[str, Any]]
+            List of detected faces with frame information
+        """
+        logger.debug(f"Extracting faces from video: {video_path}")
+        # Stub implementation: simulate detecting faces in video
+        # In a real implementation, this would:
+        # 1. Open the video file
+        # 2. Sample frames at the specified rate
+        # 3. Detect faces in each sampled frame
+        # 4. Return face information with frame metadata
+        detected_faces = [
+            {
+                "bbox": [120, 120, 220, 220],  # x1, y1, x2, y2
+                "confidence": 0.92,
+                "frame_number": 15,
+                "timestamp": 0.5,  # seconds
+                "image_path": video_path
+            },
+            {
+                "bbox": [110, 110, 210, 210],
+                "confidence": 0.88,
+                "frame_number": 30,
+                "timestamp": 1.0,
+                "image_path": video_path
+            }
+        ]
+        logger.debug(f"Extracted {len(detected_faces)} faces from video")
+        return detected_faces

src/gesturedetection/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/gesturedetection/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# Gesture detection package
+from .api import app
+from .models import Gesture, GestureResponse, GESTURE_MAPPING, FULL_GESTURE_MAPPING
+from .main_controller import MainController
+from .onnx_models import HandDetection, HandClassification
+from .utils import Deque, Drawer, Hand, Event, HandPosition, targets
+__all__ = [
+    "app",
+    "Gesture",
+    "GestureResponse",
+    "GESTURE_MAPPING",
+    "FULL_GESTURE_MAPPING",
+    "MainController",
+    "HandDetection",
+    "HandClassification",
+    "Deque",
+    "Drawer",
+    "Hand",
+    "Event",
+    "HandPosition",
+    "targets"
+]

src/gesturedetection/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (703 Bytes). View file

src/gesturedetection/__pycache__/api.cpython-312.pyc ADDED Viewed

Binary file (11.7 kB). View file

src/gesturedetection/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (2.18 kB). View file

src/gesturedetection/__pycache__/main_controller.cpython-312.pyc ADDED Viewed

Binary file (12.3 kB). View file

src/gesturedetection/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (2.5 kB). View file

src/gesturedetection/__pycache__/onnx_models.cpython-312.pyc ADDED Viewed

Binary file (10 kB). View file

src/gesturedetection/api.py ADDED Viewed

	@@ -0,0 +1,318 @@

+import cv2
+import numpy as np
+import tempfile
+import os
+from collections import defaultdict
+from typing import Dict, List, Tuple, Optional
+from fastapi import FastAPI, UploadFile, File, HTTPException, Form
+from fastapi.responses import ORJSONResponse
+from fastapi.encoders import jsonable_encoder
+from .models import Gesture, GestureResponse, GESTURE_MAPPING, FULL_GESTURE_MAPPING
+from .config import get_logfire_token, is_monitoring_enabled
+# Import the gesture detection components
+from .main_controller import MainController
+# Configure logfire monitoring if token is available
+logfire = None
+if is_monitoring_enabled():
+    try:
+        import logfire
+        logfire.configure(token=get_logfire_token())
+        logfire.instrument_fastapi = logfire.instrument_fastapi
+    except ImportError:
+        logfire = None
+app = FastAPI(default_response_class=ORJSONResponse)
+# Instrument FastAPI with logfire if monitoring is enabled
+if logfire is not None:
+    logfire.instrument_fastapi(app, capture_headers=True)
+def process_video_for_gestures(video_path: str, detector_path: str = "models/hand_detector.onnx",
+                              classifier_path: str = "models/crops_classifier.onnx",
+                              frame_skip: int = 1) -> List[Gesture]:
+    """
+    Process a video file to detect gestures using the MainController.
+    Parameters
+    ----------
+    video_path : str
+        Path to the video file to process
+    detector_path : str
+        Path to the hand detection ONNX model
+    classifier_path : str
+        Path to the gesture classification ONNX model
+    frame_skip : int
+        Number of frames to skip between processing (1 = process every frame, 3 = process every 3rd frame)
+    Returns
+    -------
+    List[Gesture]
+        List of detected gestures with duration and confidence
+    """
+    # Create monitoring span for video processing
+    span_context = None
+    if logfire is not None:
+        span_context = logfire.span('process_video_for_gestures',
+                                  video_path=video_path,
+                                  detector_path=detector_path,
+                                  classifier_path=classifier_path)
+        span_context.__enter__()
+    try:
+        # Initialize the main controller
+        if logfire is not None:
+            with logfire.span('initialize_controller'):
+                controller = MainController(detector_path, classifier_path)
+        else:
+            controller = MainController(detector_path, classifier_path)
+        # Open video file
+        cap = cv2.VideoCapture(video_path)
+        if not cap.isOpened():
+            raise ValueError(f"Could not open video file: {video_path}")
+        # Get video properties for monitoring
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        if logfire is not None:
+            logfire.info('Video properties',
+                        total_frames=total_frames,
+                        fps=fps,
+                        duration_seconds=total_frames/fps if fps > 0 else 0)
+        # Track gestures per hand ID
+        gesture_tracks: Dict[int, List[Tuple[int, float]]] = defaultdict(list)  # {hand_id: [(gesture_id, confidence), ...]}
+        frame_count = 0
+        processed_frames = 0
+        detection_stats = {
+            'frames_with_detections': 0,
+            'total_detections': 0,
+            'gesture_counts': defaultdict(int)
+        }
+        try:
+            while True:
+                ret, frame = cap.read()
+                if not ret:
+                    break
+                # Skip frames based on frame_skip parameter
+                if frame_count % frame_skip == 0:
+                    # Process frame through the controller
+                    bboxes, ids, labels = controller(frame)
+                    processed_frames += 1
+                    if bboxes is not None and ids is not None and labels is not None:
+                        detection_stats['frames_with_detections'] += 1
+                        detection_stats['total_detections'] += len(bboxes)
+                        # Track gestures for each detected hand
+                        for i in range(len(bboxes)):
+                            hand_id = int(ids[i])
+                            gesture_id = labels[i]
+                            if gesture_id is not None:
+                                # Get confidence from bbox (assuming it's the last element)
+                                confidence = 0.8  # Default confidence, could be extracted from bbox if available
+                                gesture_tracks[hand_id].append((gesture_id, confidence))
+                                detection_stats['gesture_counts'][gesture_id] += 1
+                                # Log individual detections for debugging
+                                if logfire is not None:
+                                    gesture_name = FULL_GESTURE_MAPPING.get(gesture_id, f"unknown_{gesture_id}")
+                                    logfire.debug('Hand detection',
+                                                frame=frame_count,
+                                                hand_id=hand_id,
+                                                gesture_id=gesture_id,
+                                                gesture_name=gesture_name,
+                                                confidence=confidence,
+                                                bbox=bboxes[i].tolist() if len(bboxes[i]) >= 4 else None)
+                else:
+                    # Advance tracker on skipped frames to keep state consistent
+                    controller.update(np.empty((0, 5)), None)
+                frame_count += 1
+                # Log progress every 100 frames
+                if frame_count % 100 == 0 and logfire is not None:
+                    progress = (frame_count / total_frames) * 100 if total_frames > 0 else 0
+                    logfire.info('Processing progress',
+                                frame=frame_count,
+                                total_frames=total_frames,
+                                progress_percent=round(progress, 2))
+        finally:
+            cap.release()
+        # Log final detection statistics
+        if logfire is not None:
+            logfire.info('Detection statistics',
+                        total_frames=frame_count,
+                        processed_frames=processed_frames,
+                        frame_skip=frame_skip,
+                        frames_with_detections=detection_stats['frames_with_detections'],
+                        total_detections=detection_stats['total_detections'],
+                        detection_rate=detection_stats['frames_with_detections']/processed_frames if processed_frames > 0 else 0,
+                        gesture_counts=dict(detection_stats['gesture_counts']))
+        # Process gesture tracks to find continuous gestures
+        detected_gestures = []
+        for hand_id, gesture_sequence in gesture_tracks.items():
+            if not gesture_sequence:
+                continue
+            # Group consecutive identical gestures
+            current_gesture = None
+            current_duration = 0
+            current_confidence = 0.0
+            for gesture_id, confidence in gesture_sequence:
+                if current_gesture is None or current_gesture != gesture_id:
+                    # Save previous gesture if it was significant
+                    # Adjust minimum duration based on frame skip
+                    min_duration = max(5, frame_skip * 2)  # At least 2 processed frames
+                    if current_gesture is not None and current_duration >= min_duration:
+                        gesture_name = FULL_GESTURE_MAPPING.get(current_gesture, f"unknown_{current_gesture}")
+                        avg_confidence = current_confidence / current_duration if current_duration > 0 else 0.0
+                        # Scale duration back to original frame count
+                        scaled_duration = current_duration * frame_skip
+                        detected_gestures.append(Gesture(
+                            gesture=gesture_name,
+                            duration=scaled_duration,
+                            confidence=avg_confidence
+                        ))
+                        # Log significant gesture detection
+                        if logfire is not None:
+                            logfire.info('Significant gesture detected',
+                                        hand_id=hand_id,
+                                        gesture=gesture_name,
+                                        duration_frames=current_duration,
+                                        confidence=avg_confidence)
+                    # Start new gesture
+                    current_gesture = gesture_id
+                    current_duration = 1
+                    current_confidence = confidence
+                else:
+                    # Continue current gesture
+                    current_duration += 1
+                    current_confidence += confidence
+            # Don't forget the last gesture
+            min_duration = max(5, frame_skip * 2)  # At least 2 processed frames
+            if current_gesture is not None and current_duration >= min_duration:
+                gesture_name = FULL_GESTURE_MAPPING.get(current_gesture, f"unknown_{current_gesture}")
+                avg_confidence = current_confidence / current_duration if current_duration > 0 else 0.0
+                # Scale duration back to original frame count
+                scaled_duration = current_duration * frame_skip
+                detected_gestures.append(Gesture(
+                    gesture=gesture_name,
+                    duration=scaled_duration,
+                    confidence=avg_confidence
+                ))
+                # Log final gesture detection
+                if logfire is not None:
+                    logfire.info('Final gesture detected',
+                                hand_id=hand_id,
+                                gesture=gesture_name,
+                                duration_frames=current_duration,
+                                confidence=avg_confidence)
+        # Log final results
+        if logfire is not None:
+            logfire.info('Video processing completed',
+                        total_gestures_detected=len(detected_gestures),
+                        unique_hands=len(gesture_tracks),
+                        gestures=[{'gesture': g.gesture, 'duration': g.duration, 'confidence': g.confidence} for g in detected_gestures])
+        return detected_gestures
+    finally:
+        if span_context is not None:
+            span_context.__exit__(None, None, None)
+@app.get("/health")
+async def health():
+    """Health check endpoint."""
+    if logfire is not None:
+        logfire.info('Health check requested')
+    return {"message": "OK"}
+@app.post("/gestures", response_model=GestureResponse)
+async def detect_gestures(video: UploadFile = File(...), frame_skip: int = Form(1)):
+    """
+    Detect gestures in an uploaded video file.
+    Parameters
+    ----------
+    video : UploadFile
+        The video file to process
+    frame_skip : int
+        Number of frames to skip between processing (1 = process every frame, 3 = process every 3rd frame)
+    Returns
+    -------
+    GestureResponse
+        Response containing detected gestures with duration and confidence
+    """
+    # Log request details
+    if logfire is not None:
+        logfire.info('Gesture detection request received',
+                    filename=video.filename,
+                    content_type=video.content_type,
+                    content_length=video.size if hasattr(video, 'size') else 'unknown')
+    # Validate file type
+    if not video.content_type.startswith('video/'):
+        if logfire is not None:
+            logfire.warning('Invalid file type received', content_type=video.content_type)
+        raise HTTPException(status_code=400, detail="File must be a video")
+    # Create temporary file to save uploaded video
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as temp_file:
+        try:
+            # Write uploaded content to temporary file
+            content = await video.read()
+            temp_file.write(content)
+            temp_file.flush()
+            if logfire is not None:
+                logfire.info('Video file saved for processing',
+                            temp_file=temp_file.name,
+                            file_size_bytes=len(content))
+            # Process the video with frame skip parameter
+            gestures = process_video_for_gestures(temp_file.name, frame_skip=frame_skip)
+            if logfire is not None:
+                logfire.info('Gesture detection completed successfully',
+                            total_gestures=len(gestures),
+                            gestures=[g.gesture for g in gestures])
+            return GestureResponse(gestures=gestures)
+        except Exception as e:
+            if logfire is not None:
+                logfire.error('Error processing video',
+                             error=str(e),
+                             error_type=type(e).__name__,
+                             temp_file=temp_file.name)
+            raise HTTPException(status_code=500, detail=f"Error processing video: {str(e)}")
+        finally:
+            # Clean up temporary file
+            if os.path.exists(temp_file.name):
+                os.unlink(temp_file.name)
+                if logfire is not None:
+                    logfire.debug('Temporary file cleaned up', temp_file=temp_file.name)

src/gesturedetection/config.py ADDED Viewed

	@@ -0,0 +1,55 @@

+"""
+Configuration module for gesture detection system.
+Handles environment variables and logfire token configuration.
+"""
+import os
+from pathlib import Path
+from typing import Optional
+def get_logfire_token() -> Optional[str]:
+    """
+    Get the logfire token from environment variables or local configuration.
+    Priority order:
+    1. LOGFIRE_TOKEN environment variable (for production/deployment)
+    2. .env file in project root (for local development)
+    3. None (monitoring disabled)
+    Returns
+    -------
+    Optional[str]
+        The logfire token if found, None otherwise
+    """
+    # First check environment variable (for production)
+    token = os.getenv("LOGFIRE_TOKEN")
+    if token:
+        return token
+    # Check for .env file in project root (for local development)
+    env_file = Path(__file__).parent.parent.parent / ".env"
+    if env_file.exists():
+        try:
+            with open(env_file, "r") as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("LOGFIRE_TOKEN="):
+                        return line.split("=", 1)[1].strip('"\'')
+        except Exception:
+            # If we can't read the .env file, continue without token
+            pass
+    return None
+def is_monitoring_enabled() -> bool:
+    """
+    Check if monitoring is enabled by checking if we have a logfire token.
+    Returns
+    -------
+    bool
+        True if monitoring is enabled, False otherwise
+    """
+    return get_logfire_token() is not None

src/gesturedetection/main_controller.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import numpy as np
+from .ocsort import (
+    KalmanBoxTracker,
+    associate,
+    ciou_batch,
+    ct_dist,
+    diou_batch,
+    giou_batch,
+    iou_batch,
+    linear_assignment,
+)
+from .onnx_models import HandClassification, HandDetection
+from .utils import Deque, Drawer, Hand
+from .config import is_monitoring_enabled
+# Configure logfire monitoring if available
+logfire = None
+if is_monitoring_enabled():
+    try:
+        import logfire
+    except ImportError:
+        logfire = None
+ASSO_FUNCS = {"iou": iou_batch, "giou": giou_batch, "ciou": ciou_batch, "diou": diou_batch, "ct_dist": ct_dist}
+def k_previous_obs(observations, cur_age, k):
+    if len(observations) == 0:
+        return [-1, -1, -1, -1, -1]
+    for i in range(k):
+        dt = k - i
+        if cur_age - dt in observations:
+            return observations[cur_age - dt]
+    max_age = max(observations.keys())
+    return observations[max_age]
+class MainController:
+    """
+    Main tracking function.
+    Class contains a list of tracks, each track contains a KalmanBoxTracker object and a Deque object with Hand objects.
+    """
+    def __init__(
+        self, detection_model, classification_model, max_age=30, min_hits=3, iou_threshold=0.3, maxlen=30, min_frames=20
+    ):
+        """
+        Parameters
+        ----------
+        detection_model : str
+            Path to detection model.
+        classification_model : str
+            Path to classification model.
+        max_age : int
+            Maximum age of track.
+        min_hits : int
+            Minimum number of hits to confirm track.
+        iou_threshold : float
+            IOU threshold for track association.
+        maxlen : int
+            Maximum length of deque in track.
+        min_frames : int
+            Minimum number of frames to confirm track.
+        """
+        self.maxlen = maxlen
+        self.min_frames = min_frames
+        self.max_age = max_age
+        self.min_hits = min_hits
+        self.delta_t = 3
+        self.iou_threshold = iou_threshold
+        self.inertia = 0.2
+        self.asso_func = ASSO_FUNCS["giou"]
+        self.tracks = []
+        self.frame_count = 0
+        self.detection_model = HandDetection(detection_model)
+        self.classification_model = HandClassification(classification_model)
+        self.drawer = Drawer()
+    def update(self, dets=np.empty((0, 5)), labels=None):
+        """
+        Parameters
+        ----------
+        dets : np.array
+            Bounding boxes with shape [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] .
+            Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).
+        labels : np.array
+            Labels with shape (N, 1) where N is number of bounding boxes.
+        Returns
+        -------
+        np.array
+            Returns the similar array, where the last column is the object ID.
+        Notes
+        -----
+        The number of objects returned may differ from the number of detections provided.
+        """
+        # Advance frame count on every call to keep tracker state in sync with real time.
+        # This method is required to be called once per frame (even if there are no detections),
+        # so we must advance the internal Kalman filters and aging logic on empty frames as well.
+        self.frame_count += 1
+        # Get predicted locations from existing trackers for this frame.
+        # This advances age/time_since_update and is required also when there are no detections,
+        # ensuring tracks can age out (max_age) and do not persist indefinitely across gaps.
+        trks = np.zeros((len(self.tracks), 5))
+        to_del = []
+        ret = []
+        lbs = []
+        for t, trk in enumerate(trks):
+            pos = self.tracks[t]["tracker"].predict()[0]
+            trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]
+            if np.any(np.isnan(pos)):
+                to_del.append(t)
+        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))
+        for t in reversed(to_del):
+            self.tracks.pop(t)
+        velocities = np.array(
+            [
+                trk["tracker"].velocity if trk["tracker"].velocity is not None else np.array((0, 0))
+                for trk in self.tracks
+            ]
+        )
+        last_boxes = np.array([trk["tracker"].last_observation for trk in self.tracks])
+        k_observations = np.array(
+            [k_previous_obs(trk["tracker"].observations, trk["tracker"].age, self.delta_t) for trk in self.tracks]
+        )
+        """
+            First round of association
+        """
+        matched, unmatched_dets, unmatched_trks = associate(
+            dets, trks, self.iou_threshold, velocities, k_observations, self.inertia
+        )
+        for m in matched:
+            self.tracks[m[1]]["tracker"].update(dets[m[0], :])
+            self.tracks[m[1]]["hands"].append(Hand(bbox=dets[m[0], :4], gesture=labels[m[0]]))
+        """
+            Second round of associaton by OCR
+        """
+        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:
+            left_dets = dets[unmatched_dets]
+            left_trks = last_boxes[unmatched_trks]
+            iou_left = self.asso_func(left_dets, left_trks)
+            iou_left = np.array(iou_left)
+            if iou_left.max() > self.iou_threshold:
+                """
+                NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may
+                get a higher performance especially on MOT17/MOT20 datasets. But we keep it
+                uniform here for simplicity
+                """
+                rematched_indices = linear_assignment(-iou_left)
+                to_remove_det_indices = []
+                to_remove_trk_indices = []
+                for m in rematched_indices:
+                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]
+                    if iou_left[m[0], m[1]] < self.iou_threshold:
+                        continue
+                    self.tracks[trk_ind]["tracker"].update(dets[det_ind, :])
+                    self.tracks[trk_ind]["hands"].append(Hand(bbox=dets[det_ind, :4], gesture=labels[det_ind]))
+                    to_remove_det_indices.append(det_ind)
+                    to_remove_trk_indices.append(trk_ind)
+                unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))
+                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))
+        # For unmatched trackers (including the case with no detections),
+        # update with None to keep the filter consistent and append a dummy Hand.
+        for m in unmatched_trks:
+            self.tracks[m]["tracker"].update(None)
+            self.tracks[m]["hands"].append(Hand(bbox=None, gesture=None))
+        # create and initialise new trackers for unmatched detections
+        for i in unmatched_dets:
+            self.tracks.append(
+                {
+                    "hands": Deque(self.maxlen, self.min_frames),
+                    "tracker": KalmanBoxTracker(dets[i, :], delta_t=self.delta_t),
+                }
+            )
+        i = len(self.tracks)
+        for trk in reversed(self.tracks):
+            if trk["tracker"].last_observation.sum() < 0:
+                d = trk["tracker"].get_state()[0]
+            else:
+                """
+                this is optional to use the recent observation or the kalman filter prediction,
+                we didn't notice significant difference here
+                """
+                d = trk["tracker"].last_observation[:4]
+            if (trk["tracker"].time_since_update < 1) and (
+                trk["tracker"].hit_streak >= self.min_hits or self.frame_count <= self.min_hits
+            ):
+                # +1 as MOT benchmark requires positive
+                ret.append(np.concatenate((d, [trk["tracker"].id + 1])).reshape(1, -1))
+                if len(trk["hands"]) > 0:
+                    lbs.append(trk["hands"][-1].gesture)
+                else:
+                    lbs.append(None)
+            i -= 1
+            # remove dead tracklet
+            if trk["tracker"].time_since_update > self.max_age:
+                self.tracks.pop(i)
+        if len(ret) > 0:
+            return np.concatenate(ret), lbs
+        return np.empty((0, 5)), np.empty((0, 1))
+    def __call__(self, frame):
+        """
+        Parameters
+        ----------
+        frame : np.array
+            Image frame with shape (H, W, 3).
+        Returns
+        -------
+        list of np.array
+        """
+        # Log frame processing if monitoring is enabled
+        if logfire is not None:
+            with logfire.span('frame_processing', frame_shape=frame.shape):
+                bboxes, probs = self.detection_model(frame)
+                if len(bboxes):
+                    detection_scores = np.asarray(probs).tolist()
+                    logfire.debug(
+                        'Hand detections found',
+                        num_detections=len(bboxes),
+                        detection_scores=detection_scores,
+                    )
+                    labels = self.classification_model(frame, bboxes)
+                    bboxes = np.concatenate((bboxes, np.expand_dims(probs, axis=1)), axis=1)
+                    new_bboxes, labels = self.update(dets=bboxes, labels=labels)
+                    # Log classification results
+                    if labels is not None and len(labels) > 0:
+                        labels_list = np.asarray(labels).tolist()
+                        gesture_names = [
+                            f"gesture_{label}" if label is not None else "none"
+                            for label in labels_list
+                        ]
+                        logfire.debug(
+                            'Gesture classifications',
+                            labels=labels_list,
+                            gesture_names=gesture_names,
+                        )
+                    return new_bboxes[:, :-1], new_bboxes[:, -1], labels
+                else:
+                    logfire.debug('No hand detections in frame')
+                    self.update(np.empty((0, 5)), None)
+                    return None, None, None
+        else:
+            # Original logic without monitoring
+            bboxes, probs = self.detection_model(frame)
+            if len(bboxes):
+                labels = self.classification_model(frame, bboxes)
+                bboxes = np.concatenate((bboxes, np.expand_dims(probs, axis=1)), axis=1)
+                new_bboxes, labels = self.update(dets=bboxes, labels=labels)
+                return new_bboxes[:, :-1], new_bboxes[:, -1], labels
+            else:
+                self.update(np.empty((0, 5)), None)
+                return None, None, None

src/gesturedetection/models.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from pydantic import BaseModel
+from typing import List, Optional
+class Gesture(BaseModel):
+    """Represents a detected gesture with metadata."""
+    gesture: str
+    duration: int  # Duration in frames
+    confidence: float
+class GestureResponse(BaseModel):
+    """Response model containing a list of detected gestures."""
+    gestures: List[Gesture]
+# Primary gesture mappings for the main gestures + additional ones
+GESTURE_MAPPING = {
+    # Original 5 main gestures
+    27: "thumbs_up",       # like
+    31: "palm",            # open palm wave (5 fingers)
+    32: "peace",           # peace sign (2 fingers)
+    29: "ok",              # OK sign
+    20: "call",            # call me (little finger)
+    # Finger counting (1-5)
+    30: "one",             # 1 finger
+    39: "two_up",          # 2 fingers (peace sign)
+    37: "three",           # 3 fingers
+    26: "four",            # 4 fingers
+    # Note: 5 fingers is same as palm (31)
+    # Surprise gesture
+    23: "middle_finger",   # middle finger (surprise!)
+    # Additional useful gestures
+    25: "fist",            # closed fist
+    19: "point",           # pointing with index finger
+    35: "stop",            # stop gesture
+}
+# Additional gesture mappings for completeness
+FULL_GESTURE_MAPPING = {
+    0: "hand_down",
+    1: "hand_right",
+    2: "hand_left",
+    3: "thumb_index",
+    4: "thumb_left",
+    5: "thumb_right",
+    6: "thumb_down",
+    7: "half_up",
+    8: "half_left",
+    9: "half_right",
+    10: "half_down",
+    11: "part_hand_heart",
+    12: "part_hand_heart2",
+    13: "fist_inverted",
+    14: "two_left",
+    15: "two_right",
+    16: "two_down",
+    17: "grabbing",
+    18: "grip",
+    19: "point",
+    20: "call",
+    21: "three3",
+    22: "little_finger",
+    23: "middle_finger",
+    24: "dislike",
+    25: "fist",
+    26: "four",
+    27: "like",
+    28: "mute",
+    29: "ok",
+    30: "one",
+    31: "palm",
+    32: "peace",
+    33: "peace_inverted",
+    34: "rock",
+    35: "stop",
+    36: "stop_inverted",
+    37: "three",
+    38: "three2",
+    39: "two_up",
+    40: "two_up_inverted",
+    41: "three_gun",
+    42: "one_left",
+    43: "one_right",
+    44: "one_down"
+}

src/gesturedetection/ocsort/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .association import associate, ciou_batch, ct_dist, diou_batch, giou_batch, iou_batch, linear_assignment
2	+ from .kalmanboxtracker import KalmanBoxTracker

src/gesturedetection/ocsort/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (434 Bytes). View file

src/gesturedetection/ocsort/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (400 Bytes). View file

src/gesturedetection/ocsort/__pycache__/association.cpython-312.pyc ADDED Viewed

Binary file (22.3 kB). View file

src/gesturedetection/ocsort/__pycache__/association.cpython-39.pyc ADDED Viewed

Binary file (11.4 kB). View file

src/gesturedetection/ocsort/__pycache__/kalmanboxtracker.cpython-312.pyc ADDED Viewed

Binary file (7.89 kB). View file

src/gesturedetection/ocsort/__pycache__/kalmanboxtracker.cpython-39.pyc ADDED Viewed

Binary file (4.63 kB). View file

src/gesturedetection/ocsort/__pycache__/kalmanfilter.cpython-312.pyc ADDED Viewed

Binary file (69.3 kB). View file

src/gesturedetection/ocsort/__pycache__/kalmanfilter.cpython-39.pyc ADDED Viewed

Binary file (50.3 kB). View file

src/gesturedetection/ocsort/association.py ADDED Viewed

	@@ -0,0 +1,511 @@

+import numpy as np
+def iou_batch(bboxes1, bboxes2):
+    """
+    Calculate the Intersection of Unions (IoUs) between bounding boxes.
+    Parameters
+    ----------
+    bboxes1: numpy.ndarray
+        shape is [N, 4]
+    bboxes2: numpy.ndarray
+        shape is [M, 4]
+    Returns
+    -------
+    ious: numpy.ndarray
+        shape is [N, M]
+    """
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    o = wh / (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+    return o
+def giou_batch(bboxes1, bboxes2):
+    """
+    Calculate the Generalized Intersection over Union (GIoUs) between bounding boxes.
+    Parameters
+    ----------
+    bboxes1: numpy.ndarray
+        shape is [N, 4]
+    bboxes2: numpy.ndarray
+        shape is [M, 4]
+    Returns
+    -------
+    gious: numpy.ndarray
+        shape is [N, M]
+    """
+    # for details should go to https://arxiv.org/pdf/1902.09630.pdf
+    # ensure predict's bbox form
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    union = (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+    iou = wh / union
+    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
+    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
+    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
+    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
+    wc = xxc2 - xxc1
+    hc = yyc2 - yyc1
+    assert (wc > 0).all() and (hc > 0).all()
+    area_enclose = wc * hc
+    giou = iou - (area_enclose - union) / area_enclose
+    giou = (giou + 1.0) / 2.0  # resize from (-1,1) to (0,1)
+    return giou
+def diou_batch(bboxes1, bboxes2):
+    """
+    Calculate the Distance Intersection over Union (DIoUs) between bounding boxes.
+    Parameters
+    ----------
+    bboxes1: numpy.ndarray
+        shape is [N, 4]
+    bboxes2: numpy.ndarray
+        shape is [M, 4]
+    Returns
+    -------
+    dious: numpy.ndarray
+    """
+    # for details should go to https://arxiv.org/pdf/1902.09630.pdf
+    # ensure predict's bbox form
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+    # calculate the intersection box
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    union = (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+    iou = wh / union
+    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
+    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
+    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
+    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
+    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
+    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
+    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
+    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
+    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
+    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2
+    diou = iou - inner_diag / outer_diag
+    return (diou + 1) / 2.0  # resize from (-1,1) to (0,1)
+def ciou_batch(bboxes1, bboxes2):
+    """
+    Calculate the Complete Intersection over Union (CIoUs) between bounding boxes.
+    Parameters
+    ----------
+    bboxes1: numpy.ndarray
+        shape is [N, 4]
+    bboxes2: numpy.ndarray
+        shape is [M, 4]
+    Returns
+    -------
+    ciou: numpy.ndarray
+    """
+    # for details should go to https://arxiv.org/pdf/1902.09630.pdf
+    # ensure predict's bbox form
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+    # calculate the intersection box
+    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])
+    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])
+    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])
+    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])
+    w = np.maximum(0.0, xx2 - xx1)
+    h = np.maximum(0.0, yy2 - yy1)
+    wh = w * h
+    union = (
+        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])
+        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])
+        - wh
+    )
+    iou = wh / union
+    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
+    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
+    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
+    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
+    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
+    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])
+    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])
+    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])
+    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])
+    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2
+    w1 = bboxes1[..., 2] - bboxes1[..., 0]
+    h1 = bboxes1[..., 3] - bboxes1[..., 1]
+    w2 = bboxes2[..., 2] - bboxes2[..., 0]
+    h2 = bboxes2[..., 3] - bboxes2[..., 1]
+    # prevent dividing over zero. add one pixel shift
+    h2 = h2 + 1.0
+    h1 = h1 + 1.0
+    arctan = np.arctan(w2 / h2) - np.arctan(w1 / h1)
+    v = (4 / (np.pi**2)) * (arctan**2)
+    S = 1 - iou
+    alpha = v / (S + v)
+    ciou = iou - inner_diag / outer_diag - alpha * v
+    return (ciou + 1) / 2.0  # resize from (-1,1) to (0,1)
+def ct_dist(bboxes1, bboxes2):
+    """
+    Measure the center distance between two sets of bounding boxes,
+    this is a coarse implementation, we don't recommend using it only
+    for association, which can be unstable and sensitive to frame rate
+    and object speed.
+    Parameters
+    ----------
+    bboxes1: numpy.ndarray
+        shape is [N, 4]
+    bboxes2: numpy.ndarray
+        shape is [M, 4]
+    Returns
+    -------
+    ct_dist: numpy.ndarray
+    """
+    bboxes2 = np.expand_dims(bboxes2, 0)
+    bboxes1 = np.expand_dims(bboxes1, 1)
+    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0
+    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0
+    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0
+    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0
+    ct_dist2 = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2
+    ct_dist = np.sqrt(ct_dist2)
+    # The linear rescaling is a naive version and needs more study
+    ct_dist = ct_dist / ct_dist.max()
+    return ct_dist.max() - ct_dist  # resize to (0,1)
+def speed_direction_batch(dets, tracks):
+    """
+    Calculate the speed and direction between detections and tracks.
+    Parameters
+    ----------
+    dets: numpy.ndarray
+        shape is [N, 4]
+    tracks: numpy.ndarray
+        shape is [M, 4]
+    Returns
+    -------
+    dy: numpy.ndarray
+    dx: numpy.ndarray
+    """
+    tracks = tracks[..., np.newaxis]
+    CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0
+    CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0
+    dx = CX1 - CX2
+    dy = CY1 - CY2
+    norm = np.sqrt(dx**2 + dy**2) + 1e-6
+    dx = dx / norm
+    dy = dy / norm
+    return dy, dx  # size: num_track x num_det
+def linear_assignment(cost_matrix):
+    """
+    Solve the linear assignment problem using scipy.optimize.linear_sum_assignment.
+    Parameters
+    ----------
+    cost_matrix: numpy.ndarray
+        shape is [N, M]
+    Returns
+    -------
+    indices: numpy.ndarray
+        shape is [N, 2]
+    """
+    try:
+        import lap
+        _, x, y = lap.lapjv(cost_matrix, extend_cost=True)
+        return np.array([[y[i], i] for i in x if i >= 0])  #
+    except ImportError:
+        from scipy.optimize import linear_sum_assignment
+        x, y = linear_sum_assignment(cost_matrix)
+        return np.array(list(zip(x, y)))
+def associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):
+    """
+    Assigns detections to tracked object (both represented as bounding boxes)
+    Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+    Parameters
+    ----------
+    detections: numpy.ndarray
+        shape is [N, 4]
+    trackers: numpy.ndarray
+        shape is [M, 4]
+    iou_threshold: float
+        in [0, 1]. Default is 0.3
+    """
+    if len(trackers) == 0:
+        return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
+    iou_matrix = iou_batch(detections, trackers)
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            matched_indices = linear_assignment(-iou_matrix)
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+    unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+def associate(detections, trackers, iou_threshold, velocities, previous_obs, vdc_weight):
+    """
+    Assigns detections to tracked object (both represented as bounding boxes)
+    Returns 3 lists of matches, unmatched_detections and unmatched_trackers
+    Parameters
+    ----------
+    detections: numpy.ndarray
+        shape is [N, 4]
+    trackers: numpy.ndarray
+        shape is [M, 4]
+    iou_threshold: float
+        in [0, 1]. Default is 0.3
+    velocities: numpy.ndarray
+        shape is [M, 2]
+    previous_obs: numpy.ndarray
+        shape is [M, 4]
+    vdc_weight: float
+    """
+    if len(trackers) == 0:
+        return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
+    Y, X = speed_direction_batch(detections, previous_obs)
+    inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
+    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+    diff_angle_cos = inertia_X * X + inertia_Y * Y
+    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+    diff_angle = np.arccos(diff_angle_cos)
+    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+    valid_mask = np.ones(previous_obs.shape[0])
+    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+    iou_matrix = iou_batch(detections, trackers)
+    scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
+    # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this
+    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+    angle_diff_cost = angle_diff_cost.T
+    angle_diff_cost = angle_diff_cost * scores
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            matched_indices = linear_assignment(-(iou_matrix + angle_diff_cost))
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+    unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)
+def associate_kitti(detections, trackers, det_cates, iou_threshold, velocities, previous_obs, vdc_weight):
+    if len(trackers) == 0:
+        return np.empty((0, 2), dtype=int), np.arange(len(detections)), np.empty((0, 5), dtype=int)
+    """
+    Cost from the velocity direction consistency
+    Parameters
+    ----------
+    detections: numpy.ndarray
+        shape is [N, 4]
+    trackers: numpy.ndarray
+        shape is [M, 4]
+    det_cates: numpy.ndarray
+        shape is [N, 1]
+    iou_threshold: float
+        in [0, 1]. Default is 0.3
+    velocities: numpy.ndarray
+        shape is [M, 2]
+    previous_obs: numpy.ndarray
+        shape is [M, 4]
+    vdc_weight: float
+    """
+    Y, X = speed_direction_batch(detections, previous_obs)
+    inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]
+    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)
+    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)
+    diff_angle_cos = inertia_X * X + inertia_Y * Y
+    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)
+    diff_angle = np.arccos(diff_angle_cos)
+    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi
+    valid_mask = np.ones(previous_obs.shape[0])
+    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0
+    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)
+    scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)
+    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight
+    angle_diff_cost = angle_diff_cost.T
+    angle_diff_cost = angle_diff_cost * scores
+    """
+    Cost from IoU
+    """
+    iou_matrix = iou_batch(detections, trackers)
+    """
+    With multiple categories, generate the cost for catgory mismatch
+    """
+    num_dets = detections.shape[0]
+    num_trk = trackers.shape[0]
+    cate_matrix = np.zeros((num_dets, num_trk))
+    for i in range(num_dets):
+        for j in range(num_trk):
+            if det_cates[i] != trackers[j, 4]:
+                cate_matrix[i][j] = -1e6
+    cost_matrix = -iou_matrix - angle_diff_cost - cate_matrix
+    if min(iou_matrix.shape) > 0:
+        a = (iou_matrix > iou_threshold).astype(np.int32)
+        if a.sum(1).max() == 1 and a.sum(0).max() == 1:
+            matched_indices = np.stack(np.where(a), axis=1)
+        else:
+            matched_indices = linear_assignment(cost_matrix)
+    else:
+        matched_indices = np.empty(shape=(0, 2))
+    unmatched_detections = []
+    for d, det in enumerate(detections):
+        if d not in matched_indices[:, 0]:
+            unmatched_detections.append(d)
+    unmatched_trackers = []
+    for t, trk in enumerate(trackers):
+        if t not in matched_indices[:, 1]:
+            unmatched_trackers.append(t)
+    # filter out matched with low IOU
+    matches = []
+    for m in matched_indices:
+        if iou_matrix[m[0], m[1]] < iou_threshold:
+            unmatched_detections.append(m[0])
+            unmatched_trackers.append(m[1])
+        else:
+            matches.append(m.reshape(1, 2))
+    if len(matches) == 0:
+        matches = np.empty((0, 2), dtype=int)
+    else:
+        matches = np.concatenate(matches, axis=0)
+    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)

src/gesturedetection/ocsort/kalmanboxtracker.py ADDED Viewed

	@@ -0,0 +1,157 @@

+from __future__ import print_function
+import numpy as np
+def convert_bbox_to_z(bbox):
+    """
+    Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form
+      [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is
+      the aspect ratio
+    """
+    w = bbox[2] - bbox[0]
+    h = bbox[3] - bbox[1]
+    x = bbox[0] + w / 2.0
+    y = bbox[1] + h / 2.0
+    s = w * h  # scale is just area
+    r = w / float(h + 1e-6)
+    return np.array([x, y, s, r]).reshape((4, 1))
+def speed_direction(bbox1, bbox2):
+    cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0
+    cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0
+    speed = np.array([cy2 - cy1, cx2 - cx1])
+    norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6
+    return speed / norm
+def convert_x_to_bbox(x, score=None):
+    """
+    Takes a bounding box in the centre form [x,y,s,r] and returns it in the form
+      [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right
+    """
+    w = np.sqrt(x[2] * x[3])
+    h = x[2] / w
+    if score is None:
+        return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0]).reshape((1, 4))
+    else:
+        return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score]).reshape((1, 5))
+class KalmanBoxTracker(object):
+    """
+    This class represents the internal state of individual tracked objects observed as bbox.
+    """
+    count = 0
+    def __init__(self, bbox, delta_t=3, orig=False):
+        """
+        Initialises a tracker using initial bounding box.
+        """
+        # define constant velocity model
+        if not orig:
+            from .kalmanfilter import KalmanFilterNew as KalmanFilter
+            self.kf = KalmanFilter(dim_x=7, dim_z=4)
+        else:
+            from filterpy.kalman import KalmanFilter
+            self.kf = KalmanFilter(dim_x=7, dim_z=4)
+        self.kf.F = np.array(
+            [
+                [1, 0, 0, 0, 1, 0, 0],
+                [0, 1, 0, 0, 0, 1, 0],
+                [0, 0, 1, 0, 0, 0, 1],
+                [0, 0, 0, 1, 0, 0, 0],
+                [0, 0, 0, 0, 1, 0, 0],
+                [0, 0, 0, 0, 0, 1, 0],
+                [0, 0, 0, 0, 0, 0, 1],
+            ]
+        )
+        self.kf.H = np.array(
+            [[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]]
+        )
+        self.kf.R[2:, 2:] *= 10.0
+        self.kf.P[4:, 4:] *= 1000.0  # give high uncertainty to the unobservable initial velocities
+        self.kf.P *= 10.0
+        self.kf.Q[-1, -1] *= 0.01
+        self.kf.Q[4:, 4:] *= 0.01
+        self.kf.x[:4] = convert_bbox_to_z(bbox)
+        self.time_since_update = 0
+        self.id = KalmanBoxTracker.count
+        KalmanBoxTracker.count += 1
+        self.history = []
+        self.hits = 0
+        self.hit_streak = 0
+        self.age = 0
+        """
+        NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of
+        function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a
+        fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.
+        """
+        self.last_observation = np.array([-1, -1, -1, -1, -1])  # placeholder
+        self.observations = dict()
+        self.history_observations = []
+        self.velocity = None
+        self.delta_t = delta_t
+    def update(self, bbox):
+        """
+        Updates the state vector with observed bbox.
+        """
+        if bbox is not None:
+            if self.last_observation.sum() >= 0:  # no previous observation
+                previous_box = None
+                for i in range(self.delta_t):
+                    dt = self.delta_t - i
+                    if self.age - dt in self.observations:
+                        previous_box = self.observations[self.age - dt]
+                        break
+                if previous_box is None:
+                    previous_box = self.last_observation
+                """
+                  Estimate the track speed direction with observations Delta t steps away
+                """
+                self.velocity = speed_direction(previous_box, bbox)
+            """
+              Insert new observations. This is a ugly way to maintain both self.observations
+              and self.history_observations. Bear it for the moment.
+            """
+            self.last_observation = bbox
+            self.observations[self.age] = bbox
+            self.history_observations.append(bbox)
+            self.time_since_update = 0
+            self.history = []
+            self.hits += 1
+            self.hit_streak += 1
+            self.kf.update(convert_bbox_to_z(bbox))
+        else:
+            self.kf.update(bbox)
+    def predict(self):
+        """
+        Advances the state vector and returns the predicted bounding box estimate.
+        """
+        if (self.kf.x[6] + self.kf.x[2]) <= 0:
+            self.kf.x[6] *= 0.0
+        self.kf.predict()
+        self.age += 1
+        if self.time_since_update > 0:
+            self.hit_streak = 0
+        self.time_since_update += 1
+        self.history.append(convert_x_to_bbox(self.kf.x))
+        return self.history[-1]
+    def get_state(self):
+        """
+        Returns the current bounding box estimate.
+        """
+        return convert_x_to_bbox(self.kf.x)

src/gesturedetection/ocsort/kalmanfilter.py ADDED Viewed

	@@ -0,0 +1,1557 @@

+# -*- coding: utf-8 -*-
+# pylint: disable=invalid-name, too-many-arguments, too-many-branches,
+# pylint: disable=too-many-locals, too-many-instance-attributes, too-many-lines
+"""
+This module implements the linear Kalman filter in both an object
+oriented and procedural form. The KalmanFilter class implements
+the filter by storing the various matrices in instance variables,
+minimizing the amount of bookkeeping you have to do.
+All Kalman filters operate with a predict->update cycle. The
+predict step, implemented with the method or function predict(),
+uses the state transition matrix F to predict the state in the next
+time period (epoch). The state is stored as a gaussian (x, P), where
+x is the state (column) vector, and P is its covariance. Covariance
+matrix Q specifies the process covariance. In Bayesian terms, this
+prediction is called the *prior*, which you can think of colloquially
+as the estimate prior to incorporating the measurement.
+The update step, implemented with the method or function `update()`,
+incorporates the measurement z with covariance R, into the state
+estimate (x, P). The class stores the system uncertainty in S,
+the innovation (residual between prediction and measurement in
+measurement space) in y, and the Kalman gain in k. The procedural
+form returns these variables to you. In Bayesian terms this computes
+the *posterior* - the estimate after the information from the
+measurement is incorporated.
+Whether you use the OO form or procedural form is up to you. If
+matrices such as H, R, and F are changing each epoch, you'll probably
+opt to use the procedural form. If they are unchanging, the OO
+form is perhaps easier to use since you won't need to keep track
+of these matrices. This is especially useful if you are implementing
+banks of filters or comparing various KF designs for performance;
+a trivial coding bug could lead to using the wrong sets of matrices.
+This module also offers an implementation of the RTS smoother, and
+other helper functions, such as log likelihood computations.
+The Saver class allows you to easily save the state of the
+KalmanFilter class after every update
+This module expects NumPy arrays for all values that expect
+arrays, although in a few cases, particularly method parameters,
+it will accept types that convert to NumPy arrays, such as lists
+of lists. These exceptions are documented in the method or function.
+Examples
+--------
+The following example constructs a constant velocity kinematic
+filter, filters noisy data, and plots the results. It also demonstrates
+using the Saver class to save the state of the filter at each epoch.
+.. code-block:: Python
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from filterpy.kalman import KalmanFilter
+    from filterpy.common import Q_discrete_white_noise, Saver
+    r_std, q_std = 2., 0.003
+    cv = KalmanFilter(dim_x=2, dim_z=1)
+    cv.x = np.array([[0., 1.]]) # position, velocity
+    cv.F = np.array([[1, dt],[ [0, 1]])
+    cv.R = np.array([[r_std^^2]])
+    f.H = np.array([[1., 0.]])
+    f.P = np.diag([.1^^2, .03^^2)
+    f.Q = Q_discrete_white_noise(2, dt, q_std**2)
+    saver = Saver(cv)
+    for z in range(100):
+        cv.predict()
+        cv.update([z + randn() * r_std])
+        saver.save() # save the filter's state
+    saver.to_array()
+    plt.plot(saver.x[:, 0])
+    # plot all of the priors
+    plt.plot(saver.x_prior[:, 0])
+    # plot mahalanobis distance
+    plt.figure()
+    plt.plot(saver.mahalanobis)
+This code implements the same filter using the procedural form
+    x = np.array([[0., 1.]]) # position, velocity
+    F = np.array([[1, dt],[ [0, 1]])
+    R = np.array([[r_std^^2]])
+    H = np.array([[1., 0.]])
+    P = np.diag([.1^^2, .03^^2)
+    Q = Q_discrete_white_noise(2, dt, q_std**2)
+    for z in range(100):
+        x, P = predict(x, P, F=F, Q=Q)
+        x, P = update(x, P, z=[z + randn() * r_std], R=R, H=H)
+        xs.append(x[0, 0])
+    plt.plot(xs)
+For more examples see the test subdirectory, or refer to the
+book cited below. In it I both teach Kalman filtering from basic
+principles, and teach the use of this library in great detail.
+FilterPy library.
+http://github.com/rlabbe/filterpy
+Documentation at:
+https://filterpy.readthedocs.org
+Supporting book at:
+https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python
+This is licensed under an MIT license. See the readme.MD file
+for more information.
+Copyright 2014-2018 Roger R Labbe Jr.
+"""
+from __future__ import absolute_import, division
+import sys
+from copy import deepcopy
+from math import exp, log, sqrt
+import numpy as np
+import numpy.linalg as linalg
+from filterpy.common import pretty_str, reshape_z
+from filterpy.stats import logpdf
+from numpy import dot, eye, isscalar, shape, zeros
+class KalmanFilterNew(object):
+    """Implements a Kalman filter. You are responsible for setting the
+    various state variables to reasonable values; the defaults  will
+    not give you a functional filter.
+    For now the best documentation is my free book Kalman and Bayesian
+    Filters in Python [2]_. The test files in this directory also give you a
+    basic idea of use, albeit without much description.
+    In brief, you will first construct this object, specifying the size of
+    the state vector with dim_x and the size of the measurement vector that
+    you will be using with dim_z. These are mostly used to perform size checks
+    when you assign values to the various matrices. For example, if you
+    specified dim_z=2 and then try to assign a 3x3 matrix to R (the
+    measurement noise matrix you will get an assert exception because R
+    should be 2x2. (If for whatever reason you need to alter the size of
+    things midstream just use the underscore version of the matrices to
+    assign directly: your_filter._R = a_3x3_matrix.)
+    After construction the filter will have default matrices created for you,
+    but you must specify the values for each. It’s usually easiest to just
+    overwrite them rather than assign to each element yourself. This will be
+    clearer in the example below. All are of type numpy.array.
+    Examples
+    --------
+    Here is a filter that tracks position and velocity using a sensor that only
+    reads position.
+    First construct the object with the required dimensionality. Here the state
+    (`dim_x`) has 2 coefficients (position and velocity), and the measurement
+    (`dim_z`) has one. In FilterPy `x` is the state, `z` is the measurement.
+    .. code::
+        from filterpy.kalman import KalmanFilter
+        f = KalmanFilter (dim_x=2, dim_z=1)
+    Assign the initial value for the state (position and velocity). You can do this
+    with a two dimensional array like so:
+        .. code::
+            f.x = np.array([[2.],    # position
+                            [0.]])   # velocity
+    or just use a one dimensional array, which I prefer doing.
+    .. code::
+        f.x = np.array([2., 0.])
+    Define the state transition matrix:
+        .. code::
+            f.F = np.array([[1.,1.],
+                            [0.,1.]])
+    Define the measurement function. Here we need to convert a position-velocity
+    vector into just a position vector, so we use:
+        .. code::
+        f.H = np.array([[1., 0.]])
+    Define the state's covariance matrix P.
+    .. code::
+        f.P = np.array([[1000.,    0.],
+                        [   0., 1000.] ])
+    Now assign the measurement noise. Here the dimension is 1x1, so I can
+    use a scalar
+    .. code::
+        f.R = 5
+    I could have done this instead:
+    .. code::
+        f.R = np.array([[5.]])
+    Note that this must be a 2 dimensional array.
+    Finally, I will assign the process noise. Here I will take advantage of
+    another FilterPy library function:
+    .. code::
+        from filterpy.common import Q_discrete_white_noise
+        f.Q = Q_discrete_white_noise(dim=2, dt=0.1, var=0.13)
+    Now just perform the standard predict/update loop:
+    .. code::
+        while some_condition_is_true:
+            z = get_sensor_reading()
+            f.predict()
+            f.update(z)
+            do_something_with_estimate (f.x)
+    **Procedural Form**
+    This module also contains stand alone functions to perform Kalman filtering.
+    Use these if you are not a fan of objects.
+    **Example**
+    .. code::
+        while True:
+            z, R = read_sensor()
+            x, P = predict(x, P, F, Q)
+            x, P = update(x, P, z, R, H)
+    See my book Kalman and Bayesian Filters in Python [2]_.
+    You will have to set the following attributes after constructing this
+    object for the filter to perform properly. Please note that there are
+    various checks in place to ensure that you have made everything the
+    'correct' size. However, it is possible to provide incorrectly sized
+    arrays such that the linear algebra can not perform an operation.
+    It can also fail silently - you can end up with matrices of a size that
+    allows the linear algebra to work, but are the wrong shape for the problem
+    you are trying to solve.
+    Parameters
+    ----------
+    dim_x : int
+        Number of state variables for the Kalman filter. For example, if
+        you are tracking the position and velocity of an object in two
+        dimensions, dim_x would be 4.
+        This is used to set the default size of P, Q, and u
+    dim_z : int
+        Number of of measurement inputs. For example, if the sensor
+        provides you with position in (x,y), dim_z would be 2.
+    dim_u : int (optional)
+        size of the control input, if it is being used.
+        Default value of 0 indicates it is not used.
+    compute_log_likelihood : bool (default = True)
+        Computes log likelihood by default, but this can be a slow
+        computation, so if you never use it you can turn this computation
+        off.
+    Attributes
+    ----------
+    x : numpy.array(dim_x, 1)
+        Current state estimate. Any call to update() or predict() updates
+        this variable.
+    P : numpy.array(dim_x, dim_x)
+        Current state covariance matrix. Any call to update() or predict()
+        updates this variable.
+    x_prior : numpy.array(dim_x, 1)
+        Prior (predicted) state estimate. The *_prior and *_post attributes
+        are for convenience; they store the  prior and posterior of the
+        current epoch. Read Only.
+    P_prior : numpy.array(dim_x, dim_x)
+        Prior (predicted) state covariance matrix. Read Only.
+    x_post : numpy.array(dim_x, 1)
+        Posterior (updated) state estimate. Read Only.
+    P_post : numpy.array(dim_x, dim_x)
+        Posterior (updated) state covariance matrix. Read Only.
+    z : numpy.array
+        Last measurement used in update(). Read only.
+    R : numpy.array(dim_z, dim_z)
+        Measurement noise covariance matrix. Also known as the
+        observation covariance.
+    Q : numpy.array(dim_x, dim_x)
+        Process noise covariance matrix. Also known as the transition
+        covariance.
+    F : numpy.array()
+        State Transition matrix. Also known as `A` in some formulation.
+    H : numpy.array(dim_z, dim_x)
+        Measurement function. Also known as the observation matrix, or as `C`.
+    y : numpy.array
+        Residual of the update step. Read only.
+    K : numpy.array(dim_x, dim_z)
+        Kalman gain of the update step. Read only.
+    S :  numpy.array
+        System uncertainty (P projected to measurement space). Read only.
+    SI :  numpy.array
+        Inverse system uncertainty. Read only.
+    log_likelihood : float
+        log-likelihood of the last measurement. Read only.
+    likelihood : float
+        likelihood of last measurement. Read only.
+        Computed from the log-likelihood. The log-likelihood can be very
+        small,  meaning a large negative value such as -28000. Taking the
+        exp() of that results in 0.0, which can break typical algorithms
+        which multiply by this value, so by default we always return a
+        number >= sys.float_info.min.
+    mahalanobis : float
+        mahalanobis distance of the innovation. Read only.
+    inv : function, default numpy.linalg.inv
+        If you prefer another inverse function, such as the Moore-Penrose
+        pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv
+        This is only used to invert self.S. If you know it is diagonal, you
+        might choose to set it to filterpy.common.inv_diagonal, which is
+        several times faster than numpy.linalg.inv for diagonal matrices.
+    alpha : float
+        Fading memory setting. 1.0 gives the normal Kalman filter, and
+        values slightly larger than 1.0 (such as 1.02) give a fading
+        memory effect - previous measurements have less influence on the
+        filter's estimates. This formulation of the Fading memory filter
+        (there are many) is due to Dan Simon [1]_.
+    References
+    ----------
+    .. [1] Dan Simon. "Optimal State Estimation." John Wiley & Sons.
+       p. 208-212. (2006)
+    .. [2] Roger Labbe. "Kalman and Bayesian Filters in Python"
+       https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python
+    """
+    def __init__(self, dim_x, dim_z, dim_u=0):
+        if dim_x < 1:
+            raise ValueError("dim_x must be 1 or greater")
+        if dim_z < 1:
+            raise ValueError("dim_z must be 1 or greater")
+        if dim_u < 0:
+            raise ValueError("dim_u must be 0 or greater")
+        self.dim_x = dim_x
+        self.dim_z = dim_z
+        self.dim_u = dim_u
+        self.x = zeros((dim_x, 1))  # state
+        self.P = eye(dim_x)  # uncertainty covariance
+        self.Q = eye(dim_x)  # process uncertainty
+        self.B = None  # control transition matrix
+        self.F = eye(dim_x)  # state transition matrix
+        self.H = zeros((dim_z, dim_x))  # measurement function
+        self.R = eye(dim_z)  # measurement uncertainty
+        self._alpha_sq = 1.0  # fading memory control
+        self.M = np.zeros((dim_x, dim_z))  # process-measurement cross correlation
+        self.z = np.array([[None] * self.dim_z]).T
+        # gain and residual are computed during the innovation step. We
+        # save them so that in case you want to inspect them for various
+        # purposes
+        self.K = np.zeros((dim_x, dim_z))  # kalman gain
+        self.y = zeros((dim_z, 1))
+        self.S = np.zeros((dim_z, dim_z))  # system uncertainty
+        self.SI = np.zeros((dim_z, dim_z))  # inverse system uncertainty
+        # identity matrix. Do not alter this.
+        self._I = np.eye(dim_x)
+        # these will always be a copy of x,P after predict() is called
+        self.x_prior = self.x.copy()
+        self.P_prior = self.P.copy()
+        # these will always be a copy of x,P after update() is called
+        self.x_post = self.x.copy()
+        self.P_post = self.P.copy()
+        # Only computed only if requested via property
+        self._log_likelihood = log(sys.float_info.min)
+        self._likelihood = sys.float_info.min
+        self._mahalanobis = None
+        # keep all observations
+        self.history_obs = []
+        self.inv = np.linalg.inv
+        self.attr_saved = None
+        self.observed = False
+    def predict(self, u=None, B=None, F=None, Q=None):
+        """
+        Predict next state (prior) using the Kalman filter state propagation
+        equations.
+        Parameters
+        ----------
+        u : np.array, default 0
+            Optional control vector.
+        B : np.array(dim_x, dim_u), or None
+            Optional control transition matrix; a value of None
+            will cause the filter to use `self.B`.
+        F : np.array(dim_x, dim_x), or None
+            Optional state transition matrix; a value of None
+            will cause the filter to use `self.F`.
+        Q : np.array(dim_x, dim_x), scalar, or None
+            Optional process noise matrix; a value of None will cause the
+            filter to use `self.Q`.
+        """
+        if B is None:
+            B = self.B
+        if F is None:
+            F = self.F
+        if Q is None:
+            Q = self.Q
+        elif isscalar(Q):
+            Q = eye(self.dim_x) * Q
+        # x = Fx + Bu
+        if B is not None and u is not None:
+            self.x = dot(F, self.x) + dot(B, u)
+        else:
+            self.x = dot(F, self.x)
+        # P = FPF' + Q
+        self.P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q
+        # save prior
+        self.x_prior = self.x.copy()
+        self.P_prior = self.P.copy()
+    def freeze(self):
+        """
+        Save the parameters before non-observation forward
+        """
+        self.attr_saved = deepcopy(self.__dict__)
+    def unfreeze(self):
+        if self.attr_saved is not None:
+            new_history = deepcopy(self.history_obs)
+            self.__dict__ = self.attr_saved
+            # self.history_obs = new_history
+            self.history_obs = self.history_obs[:-1]
+            occur = [int(d is None) for d in new_history]
+            indices = np.where(np.array(occur) == 0)[0]
+            index1 = indices[-2]
+            index2 = indices[-1]
+            box1 = new_history[index1]
+            x1, y1, s1, r1 = box1
+            w1 = np.sqrt(s1 * r1)
+            h1 = np.sqrt(s1 / r1)
+            box2 = new_history[index2]
+            x2, y2, s2, r2 = box2
+            w2 = np.sqrt(s2 * r2)
+            h2 = np.sqrt(s2 / r2)
+            time_gap = index2 - index1
+            dx = (x2 - x1) / time_gap
+            dy = (y2 - y1) / time_gap
+            dw = (w2 - w1) / time_gap
+            dh = (h2 - h1) / time_gap
+            for i in range(index2 - index1):
+                """
+                The default virtual trajectory generation is by linear
+                motion (constant speed hypothesis), you could modify this
+                part to implement your own.
+                """
+                x = x1 + (i + 1) * dx
+                y = y1 + (i + 1) * dy
+                w = w1 + (i + 1) * dw
+                h = h1 + (i + 1) * dh
+                s = w * h
+                r = w / float(h)
+                new_box = np.array([x, y, s, r]).reshape((4, 1))
+                """
+                    I still use predict-update loop here to refresh the parameters,
+                    but this can be faster by directly modifying the internal parameters
+                    as suggested in the paper. I keep this naive but slow way for
+                    easy read and understanding
+                """
+                self.update(new_box)
+                if not i == (index2 - index1 - 1):
+                    self.predict()
+    def update(self, z, R=None, H=None):
+        """
+        Add a new measurement (z) to the Kalman filter.
+        If z is None, nothing is computed. However, x_post and P_post are
+        updated with the prior (x_prior, P_prior), and self.z is set to None.
+        Parameters
+        ----------
+        z : (dim_z, 1): array_like
+            measurement for this update. z can be a scalar if dim_z is 1,
+            otherwise it must be convertible to a column vector.
+            If you pass in a value of H, z must be a column vector the
+            of the correct size.
+        R : np.array, scalar, or None
+            Optionally provide R to override the measurement noise for this
+            one call, otherwise  self.R will be used.
+        H : np.array, or None
+            Optionally provide H to override the measurement function for this
+            one call, otherwise self.H will be used.
+        """
+        # set to None to force recompute
+        self._log_likelihood = None
+        self._likelihood = None
+        self._mahalanobis = None
+        # append the observation
+        self.history_obs.append(z)
+        if z is None:
+            if self.observed:
+                """
+                Got no observation so freeze the current parameters for future
+                potential online smoothing.
+                """
+                self.freeze()
+            self.observed = False
+            self.z = np.array([[None] * self.dim_z]).T
+            self.x_post = self.x.copy()
+            self.P_post = self.P.copy()
+            self.y = zeros((self.dim_z, 1))
+            return
+        # self.observed = True
+        if not self.observed:
+            """
+            Get observation, use online smoothing to re-update parameters
+            """
+            self.unfreeze()
+        self.observed = True
+        if R is None:
+            R = self.R
+        elif isscalar(R):
+            R = eye(self.dim_z) * R
+        if H is None:
+            z = reshape_z(z, self.dim_z, self.x.ndim)
+            H = self.H
+        # y = z - Hx
+        # error (residual) between measurement and prediction
+        self.y = z - dot(H, self.x)
+        # common subexpression for speed
+        PHT = dot(self.P, H.T)
+        # S = HPH' + R
+        # project system uncertainty into measurement space
+        self.S = dot(H, PHT) + R
+        self.SI = self.inv(self.S)
+        # K = PH'inv(S)
+        # map system uncertainty into kalman gain
+        self.K = dot(PHT, self.SI)
+        # x = x + Ky
+        # predict new x with residual scaled by the kalman gain
+        self.x = self.x + dot(self.K, self.y)
+        # P = (I-KH)P(I-KH)' + KRK'
+        # This is more numerically stable
+        # and works for non-optimal K vs the equation
+        # P = (I-KH)P usually seen in the literature.
+        I_KH = self._I - dot(self.K, H)
+        self.P = dot(dot(I_KH, self.P), I_KH.T) + dot(dot(self.K, R), self.K.T)
+        # save measurement and posterior state
+        self.z = deepcopy(z)
+        self.x_post = self.x.copy()
+        self.P_post = self.P.copy()
+    def predict_steadystate(self, u=0, B=None):
+        """
+        Predict state (prior) using the Kalman filter state propagation
+        equations. Only x is updated, P is left unchanged. See
+        update_steadstate() for a longer explanation of when to use this
+        method.
+        Parameters
+        ----------
+        u : np.array
+            Optional control vector. If non-zero, it is multiplied by B
+            to create the control input into the system.
+        B : np.array(dim_x, dim_u), or None
+            Optional control transition matrix; a value of None
+            will cause the filter to use `self.B`.
+        """
+        if B is None:
+            B = self.B
+        # x = Fx + Bu
+        if B is not None:
+            self.x = dot(self.F, self.x) + dot(B, u)
+        else:
+            self.x = dot(self.F, self.x)
+        # save prior
+        self.x_prior = self.x.copy()
+        self.P_prior = self.P.copy()
+    def update_steadystate(self, z):
+        """
+        Add a new measurement (z) to the Kalman filter without recomputing
+        the Kalman gain K, the state covariance P, or the system
+        uncertainty S.
+        You can use this for LTI systems since the Kalman gain and covariance
+        converge to a fixed value. Precompute these and assign them explicitly,
+        or run the Kalman filter using the normal predict()/update(0 cycle
+        until they converge.
+        The main advantage of this call is speed. We do significantly less
+        computation, notably avoiding a costly matrix inversion.
+        Use in conjunction with predict_steadystate(), otherwise P will grow
+        without bound.
+        Parameters
+        ----------
+        z : (dim_z, 1): array_like
+            measurement for this update. z can be a scalar if dim_z is 1,
+            otherwise it must be convertible to a column vector.
+        Examples
+        --------
+        >>> cv = kinematic_kf(dim=3, order=2) # 3D const velocity filter
+        >>> # let filter converge on representative data, then save k and P
+        >>> for i in range(100):
+        >>>     cv.predict()
+        >>>     cv.update([i, i, i])
+        >>> saved_k = np.copy(cv.K)
+        >>> saved_P = np.copy(cv.P)
+        later on:
+        >>> cv = kinematic_kf(dim=3, order=2) # 3D const velocity filter
+        >>> cv.K = np.copy(saved_K)
+        >>> cv.P = np.copy(saved_P)
+        >>> for i in range(100):
+        >>>     cv.predict_steadystate()
+        >>>     cv.update_steadystate([i, i, i])
+        """
+        # set to None to force recompute
+        self._log_likelihood = None
+        self._likelihood = None
+        self._mahalanobis = None
+        if z is None:
+            self.z = np.array([[None] * self.dim_z]).T
+            self.x_post = self.x.copy()
+            self.P_post = self.P.copy()
+            self.y = zeros((self.dim_z, 1))
+            return
+        z = reshape_z(z, self.dim_z, self.x.ndim)
+        # y = z - Hx
+        # error (residual) between measurement and prediction
+        self.y = z - dot(self.H, self.x)
+        # x = x + Ky
+        # predict new x with residual scaled by the kalman gain
+        self.x = self.x + dot(self.K, self.y)
+        self.z = deepcopy(z)
+        self.x_post = self.x.copy()
+        self.P_post = self.P.copy()
+        # set to None to force recompute
+        self._log_likelihood = None
+        self._likelihood = None
+        self._mahalanobis = None
+    def update_correlated(self, z, R=None, H=None):
+        """Add a new measurement (z) to the Kalman filter assuming that
+        process noise and measurement noise are correlated as defined in
+        the `self.M` matrix.
+        A partial derivation can be found in [1]
+        If z is None, nothing is changed.
+        Parameters
+        ----------
+        z : (dim_z, 1): array_like
+            measurement for this update. z can be a scalar if dim_z is 1,
+            otherwise it must be convertible to a column vector.
+        R : np.array, scalar, or None
+            Optionally provide R to override the measurement noise for this
+            one call, otherwise  self.R will be used.
+        H : np.array,  or None
+            Optionally provide H to override the measurement function for this
+            one call, otherwise  self.H will be used.
+        References
+        ----------
+        .. [1] Bulut, Y. (2011). Applied Kalman filter theory (Doctoral dissertation, Northeastern University).
+               http://people.duke.edu/~hpgavin/SystemID/References/Balut-KalmanFilter-PhD-NEU-2011.pdf
+        """
+        # set to None to force recompute
+        self._log_likelihood = None
+        self._likelihood = None
+        self._mahalanobis = None
+        if z is None:
+            self.z = np.array([[None] * self.dim_z]).T
+            self.x_post = self.x.copy()
+            self.P_post = self.P.copy()
+            self.y = zeros((self.dim_z, 1))
+            return
+        if R is None:
+            R = self.R
+        elif isscalar(R):
+            R = eye(self.dim_z) * R
+        # rename for readability and a tiny extra bit of speed
+        if H is None:
+            z = reshape_z(z, self.dim_z, self.x.ndim)
+            H = self.H
+        # handle special case: if z is in form [[z]] but x is not a column
+        # vector dimensions will not match
+        if self.x.ndim == 1 and shape(z) == (1, 1):
+            z = z[0]
+        if shape(z) == ():  # is it scalar, e.g. z=3 or z=np.array(3)
+            z = np.asarray([z])
+        # y = z - Hx
+        # error (residual) between measurement and prediction
+        self.y = z - dot(H, self.x)
+        # common subexpression for speed
+        PHT = dot(self.P, H.T)
+        # project system uncertainty into measurement space
+        self.S = dot(H, PHT) + dot(H, self.M) + dot(self.M.T, H.T) + R
+        self.SI = self.inv(self.S)
+        # K = PH'inv(S)
+        # map system uncertainty into kalman gain
+        self.K = dot(PHT + self.M, self.SI)
+        # x = x + Ky
+        # predict new x with residual scaled by the kalman gain
+        self.x = self.x + dot(self.K, self.y)
+        self.P = self.P - dot(self.K, dot(H, self.P) + self.M.T)
+        self.z = deepcopy(z)
+        self.x_post = self.x.copy()
+        self.P_post = self.P.copy()
+    def batch_filter(self, zs, Fs=None, Qs=None, Hs=None, Rs=None, Bs=None, us=None, update_first=False, saver=None):
+        """Batch processes a sequences of measurements.
+         Parameters
+         ----------
+         zs : list-like
+             list of measurements at each time step `self.dt`. Missing
+             measurements must be represented by `None`.
+         Fs : None, list-like, default=None
+             optional value or list of values to use for the state transition
+             matrix F.
+             If Fs is None then self.F is used for all epochs.
+             Otherwise it must contain a list-like list of F's, one for
+             each epoch.  This allows you to have varying F per epoch.
+         Qs : None, np.array or list-like, default=None
+             optional value or list of values to use for the process error
+             covariance Q.
+             If Qs is None then self.Q is used for all epochs.
+             Otherwise it must contain a list-like list of Q's, one for
+             each epoch.  This allows you to have varying Q per epoch.
+         Hs : None, np.array or list-like, default=None
+             optional list of values to use for the measurement matrix H.
+             If Hs is None then self.H is used for all epochs.
+             If Hs contains a single matrix, then it is used as H for all
+             epochs.
+             Otherwise it must contain a list-like list of H's, one for
+             each epoch.  This allows you to have varying H per epoch.
+         Rs : None, np.array or list-like, default=None
+             optional list of values to use for the measurement error
+             covariance R.
+             If Rs is None then self.R is used for all epochs.
+             Otherwise it must contain a list-like list of R's, one for
+             each epoch.  This allows you to have varying R per epoch.
+         Bs : None, np.array or list-like, default=None
+             optional list of values to use for the control transition matrix B.
+             If Bs is None then self.B is used for all epochs.
+             Otherwise it must contain a list-like list of B's, one for
+             each epoch.  This allows you to have varying B per epoch.
+         us : None, np.array or list-like, default=None
+             optional list of values to use for the control input vector;
+             If us is None then None is used for all epochs (equivalent to 0,
+             or no control input).
+             Otherwise it must contain a list-like list of u's, one for
+             each epoch.
+        update_first : bool, optional, default=False
+             controls whether the order of operations is update followed by
+             predict, or predict followed by update. Default is predict->update.
+         saver : filterpy.common.Saver, optional
+             filterpy.common.Saver object. If provided, saver.save() will be
+             called after every epoch
+         Returns
+         -------
+         means : np.array((n,dim_x,1))
+             array of the state for each time step after the update. Each entry
+             is an np.array. In other words `means[k,:]` is the state at step
+             `k`.
+         covariance : np.array((n,dim_x,dim_x))
+             array of the covariances for each time step after the update.
+             In other words `covariance[k,:,:]` is the covariance at step `k`.
+         means_predictions : np.array((n,dim_x,1))
+             array of the state for each time step after the predictions. Each
+             entry is an np.array. In other words `means[k,:]` is the state at
+             step `k`.
+         covariance_predictions : np.array((n,dim_x,dim_x))
+             array of the covariances for each time step after the prediction.
+             In other words `covariance[k,:,:]` is the covariance at step `k`.
+         Examples
+         --------
+         .. code-block:: Python
+             # this example demonstrates tracking a measurement where the time
+             # between measurement varies, as stored in dts. This requires
+             # that F be recomputed for each epoch. The output is then smoothed
+             # with an RTS smoother.
+             zs = [t + random.randn()*4 for t in range (40)]
+             Fs = [np.array([[1., dt], [0, 1]] for dt in dts]
+             (mu, cov, _, _) = kf.batch_filter(zs, Fs=Fs)
+             (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs)
+        """
+        # pylint: disable=too-many-statements
+        n = np.size(zs, 0)
+        if Fs is None:
+            Fs = [self.F] * n
+        if Qs is None:
+            Qs = [self.Q] * n
+        if Hs is None:
+            Hs = [self.H] * n
+        if Rs is None:
+            Rs = [self.R] * n
+        if Bs is None:
+            Bs = [self.B] * n
+        if us is None:
+            us = [0] * n
+        # mean estimates from Kalman Filter
+        if self.x.ndim == 1:
+            means = zeros((n, self.dim_x))
+            means_p = zeros((n, self.dim_x))
+        else:
+            means = zeros((n, self.dim_x, 1))
+            means_p = zeros((n, self.dim_x, 1))
+        # state covariances from Kalman Filter
+        covariances = zeros((n, self.dim_x, self.dim_x))
+        covariances_p = zeros((n, self.dim_x, self.dim_x))
+        if update_first:
+            for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):
+                self.update(z, R=R, H=H)
+                means[i, :] = self.x
+                covariances[i, :, :] = self.P
+                self.predict(u=u, B=B, F=F, Q=Q)
+                means_p[i, :] = self.x
+                covariances_p[i, :, :] = self.P
+                if saver is not None:
+                    saver.save()
+        else:
+            for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):
+                self.predict(u=u, B=B, F=F, Q=Q)
+                means_p[i, :] = self.x
+                covariances_p[i, :, :] = self.P
+                self.update(z, R=R, H=H)
+                means[i, :] = self.x
+                covariances[i, :, :] = self.P
+                if saver is not None:
+                    saver.save()
+        return (means, covariances, means_p, covariances_p)
+    def rts_smoother(self, Xs, Ps, Fs=None, Qs=None, inv=np.linalg.inv):
+        """
+        Runs the Rauch-Tung-Striebel Kalman smoother on a set of
+        means and covariances computed by a Kalman filter. The usual input
+        would come from the output of `KalmanFilter.batch_filter()`.
+        Parameters
+        ----------
+        Xs : numpy.array
+           array of the means (state variable x) of the output of a Kalman
+           filter.
+        Ps : numpy.array
+            array of the covariances of the output of a kalman filter.
+        Fs : list-like collection of numpy.array, optional
+            State transition matrix of the Kalman filter at each time step.
+            Optional, if not provided the filter's self.F will be used
+        Qs : list-like collection of numpy.array, optional
+            Process noise of the Kalman filter at each time step. Optional,
+            if not provided the filter's self.Q will be used
+        inv : function, default numpy.linalg.inv
+            If you prefer another inverse function, such as the Moore-Penrose
+            pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv
+        Returns
+        -------
+        x : numpy.ndarray
+           smoothed means
+        P : numpy.ndarray
+           smoothed state covariances
+        K : numpy.ndarray
+            smoother gain at each step
+        Pp : numpy.ndarray
+           Predicted state covariances
+        Examples
+        --------
+        .. code-block:: Python
+            zs = [t + random.randn()*4 for t in range (40)]
+            (mu, cov, _, _) = kalman.batch_filter(zs)
+            (x, P, K, Pp) = rts_smoother(mu, cov, kf.F, kf.Q)
+        """
+        if len(Xs) != len(Ps):
+            raise ValueError("length of Xs and Ps must be the same")
+        n = Xs.shape[0]
+        dim_x = Xs.shape[1]
+        if Fs is None:
+            Fs = [self.F] * n
+        if Qs is None:
+            Qs = [self.Q] * n
+        # smoother gain
+        K = zeros((n, dim_x, dim_x))
+        x, P, Pp = Xs.copy(), Ps.copy(), Ps.copy()
+        for k in range(n - 2, -1, -1):
+            Pp[k] = dot(dot(Fs[k + 1], P[k]), Fs[k + 1].T) + Qs[k + 1]
+            # pylint: disable=bad-whitespace
+            K[k] = dot(dot(P[k], Fs[k + 1].T), inv(Pp[k]))
+            x[k] += dot(K[k], x[k + 1] - dot(Fs[k + 1], x[k]))
+            P[k] += dot(dot(K[k], P[k + 1] - Pp[k]), K[k].T)
+        return (x, P, K, Pp)
+    def get_prediction(self, u=None, B=None, F=None, Q=None):
+        """
+        Predict next state (prior) using the Kalman filter state propagation
+        equations and returns it without modifying the object.
+        Parameters
+        ----------
+        u : np.array, default 0
+            Optional control vector.
+        B : np.array(dim_x, dim_u), or None
+            Optional control transition matrix; a value of None
+            will cause the filter to use `self.B`.
+        F : np.array(dim_x, dim_x), or None
+            Optional state transition matrix; a value of None
+            will cause the filter to use `self.F`.
+        Q : np.array(dim_x, dim_x), scalar, or None
+            Optional process noise matrix; a value of None will cause the
+            filter to use `self.Q`.
+        Returns
+        -------
+        (x, P) : tuple
+            State vector and covariance array of the prediction.
+        """
+        if B is None:
+            B = self.B
+        if F is None:
+            F = self.F
+        if Q is None:
+            Q = self.Q
+        elif isscalar(Q):
+            Q = eye(self.dim_x) * Q
+        # x = Fx + Bu
+        if B is not None and u is not None:
+            x = dot(F, self.x) + dot(B, u)
+        else:
+            x = dot(F, self.x)
+        # P = FPF' + Q
+        P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q
+        return x, P
+    def get_update(self, z=None):
+        """
+        Computes the new estimate based on measurement `z` and returns it
+        without altering the state of the filter.
+        Parameters
+        ----------
+        z : (dim_z, 1): array_like
+            measurement for this update. z can be a scalar if dim_z is 1,
+            otherwise it must be convertible to a column vector.
+        Returns
+        -------
+        (x, P) : tuple
+            State vector and covariance array of the update.
+        """
+        if z is None:
+            return self.x, self.P
+        z = reshape_z(z, self.dim_z, self.x.ndim)
+        R = self.R
+        H = self.H
+        P = self.P
+        x = self.x
+        # error (residual) between measurement and prediction
+        y = z - dot(H, x)
+        # common subexpression for speed
+        PHT = dot(P, H.T)
+        # project system uncertainty into measurement space
+        S = dot(H, PHT) + R
+        # map system uncertainty into kalman gain
+        K = dot(PHT, self.inv(S))
+        # predict new x with residual scaled by the kalman gain
+        x = x + dot(K, y)
+        # P = (I-KH)P(I-KH)' + KRK'
+        I_KH = self._I - dot(K, H)
+        P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)
+        return x, P
+    def residual_of(self, z):
+        """
+        Returns the residual for the given measurement (z). Does not alter
+        the state of the filter.
+        """
+        z = reshape_z(z, self.dim_z, self.x.ndim)
+        return z - dot(self.H, self.x_prior)
+    def measurement_of_state(self, x):
+        """
+        Helper function that converts a state into a measurement.
+        Parameters
+        ----------
+        x : np.array
+            kalman state vector
+        Returns
+        -------
+        z : (dim_z, 1): array_like
+            measurement for this update. z can be a scalar if dim_z is 1,
+            otherwise it must be convertible to a column vector.
+        """
+        return dot(self.H, x)
+    @property
+    def log_likelihood(self):
+        """
+        log-likelihood of the last measurement.
+        """
+        if self._log_likelihood is None:
+            self._log_likelihood = logpdf(x=self.y, cov=self.S)
+        return self._log_likelihood
+    @property
+    def likelihood(self):
+        """
+        Computed from the log-likelihood. The log-likelihood can be very
+        small,  meaning a large negative value such as -28000. Taking the
+        exp() of that results in 0.0, which can break typical algorithms
+        which multiply by this value, so by default we always return a
+        number >= sys.float_info.min.
+        """
+        if self._likelihood is None:
+            self._likelihood = exp(self.log_likelihood)
+            if self._likelihood == 0:
+                self._likelihood = sys.float_info.min
+        return self._likelihood
+    @property
+    def mahalanobis(self):
+        """ "
+        Mahalanobis distance of measurement. E.g. 3 means measurement
+        was 3 standard deviations away from the predicted value.
+        Returns
+        -------
+        mahalanobis : float
+        """
+        if self._mahalanobis is None:
+            self._mahalanobis = sqrt(float(dot(dot(self.y.T, self.SI), self.y)))
+        return self._mahalanobis
+    @property
+    def alpha(self):
+        """
+        Fading memory setting. 1.0 gives the normal Kalman filter, and
+        values slightly larger than 1.0 (such as 1.02) give a fading
+        memory effect - previous measurements have less influence on the
+        filter's estimates. This formulation of the Fading memory filter
+        (there are many) is due to Dan Simon [1]_.
+        """
+        return self._alpha_sq**0.5
+    def log_likelihood_of(self, z):
+        """
+        log likelihood of the measurement `z`. This should only be called
+        after a call to update(). Calling after predict() will yield an
+        incorrect result."""
+        if z is None:
+            return log(sys.float_info.min)
+        return logpdf(z, dot(self.H, self.x), self.S)
+    @alpha.setter
+    def alpha(self, value):
+        if not np.isscalar(value) or value < 1:
+            raise ValueError("alpha must be a float greater than 1")
+        self._alpha_sq = value**2
+    def __repr__(self):
+        return "\n".join(
+            [
+                "KalmanFilter object",
+                pretty_str("dim_x", self.dim_x),
+                pretty_str("dim_z", self.dim_z),
+                pretty_str("dim_u", self.dim_u),
+                pretty_str("x", self.x),
+                pretty_str("P", self.P),
+                pretty_str("x_prior", self.x_prior),
+                pretty_str("P_prior", self.P_prior),
+                pretty_str("x_post", self.x_post),
+                pretty_str("P_post", self.P_post),
+                pretty_str("F", self.F),
+                pretty_str("Q", self.Q),
+                pretty_str("R", self.R),
+                pretty_str("H", self.H),
+                pretty_str("K", self.K),
+                pretty_str("y", self.y),
+                pretty_str("S", self.S),
+                pretty_str("SI", self.SI),
+                pretty_str("M", self.M),
+                pretty_str("B", self.B),
+                pretty_str("z", self.z),
+                pretty_str("log-likelihood", self.log_likelihood),
+                pretty_str("likelihood", self.likelihood),
+                pretty_str("mahalanobis", self.mahalanobis),
+                pretty_str("alpha", self.alpha),
+                pretty_str("inv", self.inv),
+            ]
+        )
+    def test_matrix_dimensions(self, z=None, H=None, R=None, F=None, Q=None):
+        """
+        Performs a series of asserts to check that the size of everything
+        is what it should be. This can help you debug problems in your design.
+        If you pass in H, R, F, Q those will be used instead of this object's
+        value for those matrices.
+        Testing `z` (the measurement) is problamatic. x is a vector, and can be
+        implemented as either a 1D array or as a nx1 column vector. Thus Hx
+        can be of different shapes. Then, if Hx is a single value, it can
+        be either a 1D array or 2D vector. If either is true, z can reasonably
+        be a scalar (either '3' or np.array('3') are scalars under this
+        definition), a 1D, 1 element array, or a 2D, 1 element array. You are
+        allowed to pass in any combination that works.
+        """
+        if H is None:
+            H = self.H
+        if R is None:
+            R = self.R
+        if F is None:
+            F = self.F
+        if Q is None:
+            Q = self.Q
+        x = self.x
+        P = self.P
+        assert x.ndim == 1 or x.ndim == 2, "x must have one or two dimensions, but has {}".format(x.ndim)
+        if x.ndim == 1:
+            assert x.shape[0] == self.dim_x, "Shape of x must be ({},{}), but is {}".format(self.dim_x, 1, x.shape)
+        else:
+            assert x.shape == (self.dim_x, 1), "Shape of x must be ({},{}), but is {}".format(self.dim_x, 1, x.shape)
+        assert P.shape == (self.dim_x, self.dim_x), "Shape of P must be ({},{}), but is {}".format(
+            self.dim_x, self.dim_x, P.shape
+        )
+        assert Q.shape == (self.dim_x, self.dim_x), "Shape of Q must be ({},{}), but is {}".format(
+            self.dim_x, self.dim_x, P.shape
+        )
+        assert F.shape == (self.dim_x, self.dim_x), "Shape of F must be ({},{}), but is {}".format(
+            self.dim_x, self.dim_x, F.shape
+        )
+        assert np.ndim(H) == 2, "Shape of H must be (dim_z, {}), but is {}".format(P.shape[0], shape(H))
+        assert H.shape[1] == P.shape[0], "Shape of H must be (dim_z, {}), but is {}".format(P.shape[0], H.shape)
+        # shape of R must be the same as HPH'
+        hph_shape = (H.shape[0], H.shape[0])
+        r_shape = shape(R)
+        if H.shape[0] == 1:
+            # r can be scalar, 1D, or 2D in this case
+            assert r_shape in [(), (1,), (1, 1)], "R must be scalar or one element array, but is shaped {}".format(
+                r_shape
+            )
+        else:
+            assert r_shape == hph_shape, "shape of R should be {} but it is {}".format(hph_shape, r_shape)
+        if z is not None:
+            z_shape = shape(z)
+        else:
+            z_shape = (self.dim_z, 1)
+        # H@x must have shape of z
+        Hx = dot(H, x)
+        if z_shape == ():  # scalar or np.array(scalar)
+            assert Hx.ndim == 1 or shape(Hx) == (1, 1), "shape of z should be {}, not {} for the given H".format(
+                shape(Hx), z_shape
+            )
+        elif shape(Hx) == (1,):
+            assert z_shape[0] == 1, "Shape of z must be {} for the given H".format(shape(Hx))
+        else:
+            assert z_shape == shape(Hx) or (
+                len(z_shape) == 1 and shape(Hx) == (z_shape[0], 1)
+            ), "shape of z should be {}, not {} for the given H".format(shape(Hx), z_shape)
+        if np.ndim(Hx) > 1 and shape(Hx) != (1, 1):
+            assert shape(Hx) == z_shape, "shape of z should be {} for the given H, but it is {}".format(
+                shape(Hx), z_shape
+            )
+def update(x, P, z, R, H=None, return_all=False):
+    """
+    Add a new measurement (z) to the Kalman filter. If z is None, nothing
+    is changed.
+    This can handle either the multidimensional or unidimensional case. If
+    all parameters are floats instead of arrays the filter will still work,
+    and return floats for x, P as the result.
+    update(1, 2, 1, 1, 1)  # univariate
+    update(x, P, 1
+    Parameters
+    ----------
+    x : numpy.array(dim_x, 1), or float
+        State estimate vector
+    P : numpy.array(dim_x, dim_x), or float
+        Covariance matrix
+    z : (dim_z, 1): array_like
+        measurement for this update. z can be a scalar if dim_z is 1,
+        otherwise it must be convertible to a column vector.
+    R : numpy.array(dim_z, dim_z), or float
+        Measurement noise matrix
+    H : numpy.array(dim_x, dim_x), or float, optional
+        Measurement function. If not provided, a value of 1 is assumed.
+    return_all : bool, default False
+        If true, y, K, S, and log_likelihood are returned, otherwise
+        only x and P are returned.
+    Returns
+    -------
+    x : numpy.array
+        Posterior state estimate vector
+    P : numpy.array
+        Posterior covariance matrix
+    y : numpy.array or scalar
+        Residua. Difference between measurement and state in measurement space
+    K : numpy.array
+        Kalman gain
+    S : numpy.array
+        System uncertainty in measurement space
+    log_likelihood : float
+        log likelihood of the measurement
+    """
+    # pylint: disable=bare-except
+    if z is None:
+        if return_all:
+            return x, P, None, None, None, None
+        return x, P
+    if H is None:
+        H = np.array([1])
+    if np.isscalar(H):
+        H = np.array([H])
+    Hx = np.atleast_1d(dot(H, x))
+    z = reshape_z(z, Hx.shape[0], x.ndim)
+    # error (residual) between measurement and prediction
+    y = z - Hx
+    # project system uncertainty into measurement space
+    S = dot(dot(H, P), H.T) + R
+    # map system uncertainty into kalman gain
+    try:
+        K = dot(dot(P, H.T), linalg.inv(S))
+    except linalg.LinAlgError:
+        # can't invert a 1D array, annoyingly
+        K = dot(dot(P, H.T), 1.0 / S)
+    # predict new x with residual scaled by the kalman gain
+    x = x + dot(K, y)
+    # P = (I-KH)P(I-KH)' + KRK'
+    KH = dot(K, H)
+    try:
+        I_KH = np.eye(KH.shape[0]) - KH
+    except linalg.LinAlgError:
+        I_KH = np.array([1 - KH])
+    P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)
+    if return_all:
+        # compute log likelihood
+        log_likelihood = logpdf(z, dot(H, x), S)
+        return x, P, y, K, S, log_likelihood
+    return x, P
+def update_steadystate(x, z, K, H=None):
+    """
+    Add a new measurement (z) to the Kalman filter. If z is None, nothing
+    is changed.
+    Parameters
+    ----------
+    x : numpy.array(dim_x, 1), or float
+        State estimate vector
+    z : (dim_z, 1): array_like
+        measurement for this update. z can be a scalar if dim_z is 1,
+        otherwise it must be convertible to a column vector.
+    K : numpy.array, or float
+        Kalman gain matrix
+    H : numpy.array(dim_x, dim_x), or float, optional
+        Measurement function. If not provided, a value of 1 is assumed.
+    Returns
+    -------
+    x : numpy.array
+        Posterior state estimate vector
+    Examples
+    --------
+    This can handle either the multidimensional or unidimensional case. If
+    all parameters are floats instead of arrays the filter will still work,
+    and return floats for x, P as the result.
+    >>> update_steadystate(1, 2, 1)  # univariate
+    >>> update_steadystate(x, P, z, H)
+    """
+    if z is None:
+        return x
+    if H is None:
+        H = np.array([1])
+    if np.isscalar(H):
+        H = np.array([H])
+    Hx = np.atleast_1d(dot(H, x))
+    z = reshape_z(z, Hx.shape[0], x.ndim)
+    # error (residual) between measurement and prediction
+    y = z - Hx
+    # estimate new x with residual scaled by the kalman gain
+    return x + dot(K, y)
+def predict(x, P, F=1, Q=0, u=0, B=1, alpha=1.0):
+    """
+    Predict next state (prior) using the Kalman filter state propagation
+    equations.
+    Parameters
+    ----------
+    x : numpy.array
+        State estimate vector
+    P : numpy.array
+        Covariance matrix
+    F : numpy.array()
+        State Transition matrix
+    Q : numpy.array, Optional
+        Process noise matrix
+    u : numpy.array, Optional, default 0.
+        Control vector. If non-zero, it is multiplied by B
+        to create the control input into the system.
+    B : numpy.array, optional, default 0.
+        Control transition matrix.
+    alpha : float, Optional, default=1.0
+        Fading memory setting. 1.0 gives the normal Kalman filter, and
+        values slightly larger than 1.0 (such as 1.02) give a fading
+        memory effect - previous measurements have less influence on the
+        filter's estimates. This formulation of the Fading memory filter
+        (there are many) is due to Dan Simon
+    Returns
+    -------
+    x : numpy.array
+        Prior state estimate vector
+    P : numpy.array
+        Prior covariance matrix
+    """
+    if np.isscalar(F):
+        F = np.array(F)
+    x = dot(F, x) + dot(B, u)
+    P = (alpha * alpha) * dot(dot(F, P), F.T) + Q
+    return x, P
+def predict_steadystate(x, F=1, u=0, B=1):
+    """
+    Predict next state (prior) using the Kalman filter state propagation
+    equations. This steady state form only computes x, assuming that the
+    covariance is constant.
+    Parameters
+    ----------
+    x : numpy.array
+        State estimate vector
+    P : numpy.array
+        Covariance matrix
+    F : numpy.array()
+        State Transition matrix
+    u : numpy.array, Optional, default 0.
+        Control vector. If non-zero, it is multiplied by B
+        to create the control input into the system.
+    B : numpy.array, optional, default 0.
+        Control transition matrix.
+    Returns
+    -------
+    x : numpy.array
+        Prior state estimate vector
+    """
+    if np.isscalar(F):
+        F = np.array(F)
+    x = dot(F, x) + dot(B, u)
+    return x
+def batch_filter(x, P, zs, Fs, Qs, Hs, Rs, Bs=None, us=None, update_first=False, saver=None):
+    """
+    Batch processes a sequences of measurements.
+    Parameters
+    ----------
+    zs : list-like
+        list of measurements at each time step. Missing measurements must be
+        represented by None.
+    Fs : list-like
+        list of values to use for the state transition matrix matrix.
+    Qs : list-like
+        list of values to use for the process error
+        covariance.
+    Hs : list-like
+        list of values to use for the measurement matrix.
+    Rs : list-like
+        list of values to use for the measurement error
+        covariance.
+    Bs : list-like, optional
+        list of values to use for the control transition matrix;
+        a value of None in any position will cause the filter
+        to use `self.B` for that time step.
+    us : list-like, optional
+        list of values to use for the control input vector;
+        a value of None in any position will cause the filter to use
+        0 for that time step.
+    update_first : bool, optional
+        controls whether the order of operations is update followed by
+        predict, or predict followed by update. Default is predict->update.
+        saver : filterpy.common.Saver, optional
+            filterpy.common.Saver object. If provided, saver.save() will be
+            called after every epoch
+    Returns
+    -------
+    means : np.array((n,dim_x,1))
+        array of the state for each time step after the update. Each entry
+        is an np.array. In other words `means[k,:]` is the state at step
+        `k`.
+    covariance : np.array((n,dim_x,dim_x))
+        array of the covariances for each time step after the update.
+        In other words `covariance[k,:,:]` is the covariance at step `k`.
+    means_predictions : np.array((n,dim_x,1))
+        array of the state for each time step after the predictions. Each
+        entry is an np.array. In other words `means[k,:]` is the state at
+        step `k`.
+    covariance_predictions : np.array((n,dim_x,dim_x))
+        array of the covariances for each time step after the prediction.
+        In other words `covariance[k,:,:]` is the covariance at step `k`.
+    Examples
+    --------
+    .. code-block:: Python
+        zs = [t + random.randn()*4 for t in range (40)]
+        Fs = [kf.F for t in range (40)]
+        Hs = [kf.H for t in range (40)]
+        (mu, cov, _, _) = kf.batch_filter(zs, Rs=R_list, Fs=Fs, Hs=Hs, Qs=None,
+                                          Bs=None, us=None, update_first=False)
+        (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs, Qs=None)
+    """
+    n = np.size(zs, 0)
+    dim_x = x.shape[0]
+    # mean estimates from Kalman Filter
+    if x.ndim == 1:
+        means = zeros((n, dim_x))
+        means_p = zeros((n, dim_x))
+    else:
+        means = zeros((n, dim_x, 1))
+        means_p = zeros((n, dim_x, 1))
+    # state covariances from Kalman Filter
+    covariances = zeros((n, dim_x, dim_x))
+    covariances_p = zeros((n, dim_x, dim_x))
+    if us is None:
+        us = [0.0] * n
+        Bs = [0.0] * n
+    if update_first:
+        for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):
+            x, P = update(x, P, z, R=R, H=H)
+            means[i, :] = x
+            covariances[i, :, :] = P
+            x, P = predict(x, P, u=u, B=B, F=F, Q=Q)
+            means_p[i, :] = x
+            covariances_p[i, :, :] = P
+            if saver is not None:
+                saver.save()
+    else:
+        for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):
+            x, P = predict(x, P, u=u, B=B, F=F, Q=Q)
+            means_p[i, :] = x
+            covariances_p[i, :, :] = P
+            x, P = update(x, P, z, R=R, H=H)
+            means[i, :] = x
+            covariances[i, :, :] = P
+            if saver is not None:
+                saver.save()
+    return (means, covariances, means_p, covariances_p)
+def rts_smoother(Xs, Ps, Fs, Qs):
+    """
+    Runs the Rauch-Tung-Striebel Kalman smoother on a set of
+    means and covariances computed by a Kalman filter. The usual input
+    would come from the output of `KalmanFilter.batch_filter()`.
+    Parameters
+    ----------
+    Xs : numpy.array
+       array of the means (state variable x) of the output of a Kalman
+       filter.
+    Ps : numpy.array
+        array of the covariances of the output of a kalman filter.
+    Fs : list-like collection of numpy.array
+        State transition matrix of the Kalman filter at each time step.
+    Qs : list-like collection of numpy.array, optional
+        Process noise of the Kalman filter at each time step.
+    Returns
+    -------
+    x : numpy.ndarray
+       smoothed means
+    P : numpy.ndarray
+       smoothed state covariances
+    K : numpy.ndarray
+        smoother gain at each step
+    pP : numpy.ndarray
+       predicted state covariances
+    Examples
+    --------
+    .. code-block:: Python
+        zs = [t + random.randn()*4 for t in range (40)]
+        (mu, cov, _, _) = kalman.batch_filter(zs)
+        (x, P, K, pP) = rts_smoother(mu, cov, kf.F, kf.Q)
+    """
+    if len(Xs) != len(Ps):
+        raise ValueError("length of Xs and Ps must be the same")
+    n = Xs.shape[0]
+    dim_x = Xs.shape[1]
+    # smoother gain
+    K = zeros((n, dim_x, dim_x))
+    x, P, pP = Xs.copy(), Ps.copy(), Ps.copy()
+    for k in range(n - 2, -1, -1):
+        pP[k] = dot(dot(Fs[k], P[k]), Fs[k].T) + Qs[k]
+        # pylint: disable=bad-whitespace
+        K[k] = dot(dot(P[k], Fs[k].T), linalg.inv(pP[k]))
+        x[k] += dot(K[k], x[k + 1] - dot(Fs[k], x[k]))
+        P[k] += dot(dot(K[k], P[k + 1] - pP[k]), K[k].T)
+    return (x, P, K, pP)

src/gesturedetection/onnx_models.py ADDED Viewed

	@@ -0,0 +1,194 @@

+from abc import ABC
+import cv2
+import numpy as np
+import onnxruntime as ort
+class OnnxModel(ABC):
+    def __init__(self, model_path, image_size):
+        self.model_path = model_path
+        self.image_size = image_size
+        self.mean = np.array([127, 127, 127], dtype=np.float32)
+        self.std = np.array([128, 128, 128], dtype=np.float32)
+        options, prov_opts, providers = self.get_onnx_provider()
+        self.sess = ort.InferenceSession(
+            model_path, sess_options=options, providers=providers, provider_options=prov_opts
+        )
+        self._get_input_output()
+    def preprocess(self, frame):
+        """
+        Preprocess frame
+        Parameters
+        ----------
+        frame : np.ndarray
+            Frame to preprocess
+        Returns
+        -------
+        np.ndarray
+            Preprocessed frame
+        """
+        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        image = cv2.resize(image, self.image_size)
+        image = (image - self.mean) / self.std
+        image = np.transpose(image, [2, 0, 1])
+        image = np.expand_dims(image, axis=0)
+        return image
+    def _get_input_output(self):
+        inputs = self.sess.get_inputs()
+        self.inputs = "".join(
+            [
+                f"\n {i}: {input.name}" f" Shape: ({','.join(map(str, input.shape))})" f" Dtype: {input.type}"
+                for i, input in enumerate(inputs)
+            ]
+        )
+        outputs = self.sess.get_outputs()
+        self.outputs = "".join(
+            [
+                f"\n {i}: {output.name}" f" Shape: ({','.join(map(str, output.shape))})" f" Dtype: {output.type}"
+                for i, output in enumerate(outputs)
+            ]
+        )
+    @staticmethod
+    def get_onnx_provider():
+        """
+        Get onnx provider
+        Returns
+        -------
+        options : onnxruntime.SessionOptions
+            Session options
+        prov_opts : dict
+            Provider options
+        providers : list
+            List of providers
+        """
+        providers = ["CPUExecutionProvider"]
+        options = ort.SessionOptions()
+        options.enable_mem_pattern = False
+        options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
+        prov_opts = []
+        print("Using ONNX Runtime", ort.get_device())
+        if "DML" in ort.get_device():
+            prov_opts = [{"device_id": 0}]
+            providers.append("DmlExecutionProvider")
+        elif "GPU" in ort.get_device():
+            prov_opts = [
+                {
+                    "device_id": 0,
+                    "arena_extend_strategy": "kNextPowerOfTwo",
+                    "gpu_mem_limit": 2 * 1024 * 1024 * 1024,
+                    "cudnn_conv_algo_search": "EXHAUSTIVE",
+                    "do_copy_in_default_stream": True,
+                }
+            ]
+            providers.append("CUDAExecutionProvider")
+        return options, prov_opts, providers
+    def __repr__(self):
+        return (
+            f"Providers: {self.sess.get_providers()}\n"
+            f"Model: {self.sess.get_modelmeta().description}\n"
+            f"Version: {self.sess.get_modelmeta().version}\n"
+            f"Inputs: {self.inputs}\n"
+            f"Outputs: {self.outputs}"
+        )
+class HandDetection(OnnxModel):
+    def __init__(self, model_path, image_size=(320, 240)):
+        super().__init__(model_path, image_size)
+        self.image_size = image_size
+        self.sess = ort.InferenceSession(model_path)
+        self.input_name = self.sess.get_inputs()[0].name
+        self.output_names = [output.name for output in self.sess.get_outputs()]
+    def __call__(self, frame):
+        input_tensor = self.preprocess(frame)
+        boxes, _, probs = self.sess.run(self.output_names, {self.input_name: input_tensor})
+        width, height = frame.shape[1], frame.shape[0]
+        boxes[:, 0] *= width
+        boxes[:, 1] *= height
+        boxes[:, 2] *= width
+        boxes[:, 3] *= height
+        return boxes.astype(np.int32), probs
+class HandClassification(OnnxModel):
+    def __init__(self, model_path, image_size=(128, 128)):
+        super().__init__(model_path, image_size)
+    @staticmethod
+    def get_square(box, image):
+        """
+        Get square box
+        Parameters
+        ----------
+        box : np.ndarray
+            Box coordinates (x1, y1, x2, y2)
+        image : np.ndarray
+            Image for shape
+        """
+        height, width, _ = image.shape
+        x0, y0, x1, y1 = box
+        w, h = x1 - x0, y1 - y0
+        if h < w:
+            y0 = y0 - int((w - h) / 2)
+            y1 = y0 + w
+        if h > w:
+            x0 = x0 - int((h - w) / 2)
+            x1 = x0 + h
+        x0 = max(0, x0)
+        y0 = max(0, y0)
+        x1 = min(width - 1, x1)
+        y1 = min(height - 1, y1)
+        return x0, y0, x1, y1
+    def get_crops(self, frame, bboxes):
+        """
+        Get crops from frame
+        Parameters
+        ----------
+        frame : np.ndarray
+            Frame to crop from bboxes
+        bboxes : np.ndarray
+            Bounding boxes
+        Returns
+        -------
+        crops : np.ndarray
+            Crops from frame
+        """
+        crops = []
+        for bbox in bboxes:
+            bbox = self.get_square(bbox, frame)
+            crop = frame[bbox[1] : bbox[3], bbox[0] : bbox[2]]
+            crops.append(crop)
+        return crops
+    def __call__(self, image, bboxes):
+        """
+        Get predictions from model
+        Parameters
+        ----------
+        image : np.ndarray
+            Image to predict
+        bboxes : np.ndarray
+            Bounding boxes
+        Returns
+        -------
+        predictions : np.ndarray
+            Predictions from model
+        """
+        crops = self.get_crops(image, bboxes)
+        crops = [self.preprocess(crop) for crop in crops]
+        input_name = self.sess.get_inputs()[0].name
+        outputs = self.sess.run(None, {input_name: np.concatenate(crops, axis=0)})[0]
+        labels = np.argmax(outputs, axis=1)
+        return labels

src/gesturedetection/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from .action_controller import Deque
+from .box_utils_numpy import hard_nms
+from .drawer import Drawer
+from .enums import Event, HandPosition, targets
+from .hand import Hand
+__all__ = [
+    "Deque",
+    "hard_nms",
+    "Drawer",
+    "Event",
+    "HandPosition",
+    "targets",
+    "Hand"
+]

src/gesturedetection/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (504 Bytes). View file

src/gesturedetection/utils/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (465 Bytes). View file

src/gesturedetection/utils/__pycache__/action_controller.cpython-312.pyc ADDED Viewed

Binary file (25.8 kB). View file

src/gesturedetection/utils/__pycache__/action_controller.cpython-39.pyc ADDED Viewed

Binary file (12 kB). View file

src/gesturedetection/utils/__pycache__/box_utils_numpy.cpython-312.pyc ADDED Viewed

Binary file (7.47 kB). View file

src/gesturedetection/utils/__pycache__/box_utils_numpy.cpython-39.pyc ADDED Viewed

Binary file (5.46 kB). View file

src/gesturedetection/utils/__pycache__/drawer.cpython-312.pyc ADDED Viewed

Binary file (9.86 kB). View file

src/gesturedetection/utils/__pycache__/drawer.cpython-39.pyc ADDED Viewed

Binary file (4.29 kB). View file

src/gesturedetection/utils/__pycache__/enums.cpython-312.pyc ADDED Viewed

Binary file (2.59 kB). View file

src/gesturedetection/utils/__pycache__/enums.cpython-39.pyc ADDED Viewed

Binary file (2.33 kB). View file

src/gesturedetection/utils/__pycache__/hand.cpython-312.pyc ADDED Viewed

Binary file (1.7 kB). View file

src/gesturedetection/utils/__pycache__/hand.cpython-39.pyc ADDED Viewed

Binary file (1.18 kB). View file

src/gesturedetection/utils/action_controller.py ADDED Viewed

	@@ -0,0 +1,598 @@

+from scipy.spatial import distance
+from collections import deque
+from .enums import Event, HandPosition, targets
+from .hand import Hand
+class Deque:
+    def __init__(self, maxlen=30, min_frames=20):
+        self.maxlen = maxlen
+        self._deque = []
+        self.action = None
+        self.min_absolute_distance = 1.5
+        self.min_frames = min_frames
+        self.action_deque = deque(maxlen=5)
+    def __len__(self):
+        return len(self._deque)
+    def index_position(self, x):
+        for i in range(len(self._deque)):
+            if self._deque[i].position == x:
+                return i
+    def index_gesture(self, x):
+        for i in range(len(self._deque)):
+            if self._deque[i].gesture == x:
+                return i
+    def __getitem__(self, index):
+        return self._deque[index]
+    def __setitem__(self, index, value):
+        self._deque[index] = value
+    def __delitem__(self, index):
+        del self._deque[index]
+    def __iter__(self):
+        return iter(self._deque)
+    def __reversed__(self):
+        return reversed(self._deque)
+    def append(self, x):
+        if self.maxlen is not None and len(self) >= self.maxlen:
+            self._deque.pop(0)
+        self.set_hand_position(x)
+        self._deque.append(x)
+        self.check_is_action(x)
+    def check_duration(self, start_index, min_frames=None):
+        """
+        Check duration of swipe.
+        Parameters
+        ----------
+        start_index : int
+            Index of start position of swipe.
+        Returns
+        -------
+        bool
+            True if duration of swipe is more than min_frames.
+        """
+        if min_frames == None:
+            min_frames = self.min_frames
+        if len(self) - start_index >= min_frames:
+            return True
+        else:
+            return False
+    def check_duration_max(self, start_index, max_frames=10):
+        """
+        Check duration of swipe.
+        Parameters
+        ----------
+        start_index : int
+            Index of start position of swipe.
+        Returns
+        -------
+        bool
+            True if duration of swipe is more than min_frames.
+        """
+        if len(self) - start_index <= max_frames:
+            return True
+        else:
+            return False
+    def check_is_action(self, x):
+        """
+        Check if gesture is action.
+        Parameters
+        ----------
+        x : Hand
+            Hand object.
+        Returns
+        -------
+        bool
+            True if gesture is action.
+        """
+        if x.position == HandPosition.LEFT_END and HandPosition.RIGHT_START in self:
+            start_index = self.index_position(HandPosition.RIGHT_START)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_LEFT
+                self.clear()
+                return True
+        elif x.position == HandPosition.RIGHT_END and HandPosition.LEFT_START in self:
+            start_index = self.index_position(HandPosition.LEFT_START)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_RIGHT
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.UP_END and HandPosition.DOWN_START in self:
+            start_index = self.index_position(HandPosition.DOWN_START)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_UP
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.DOWN_END and HandPosition.UP_START in self:
+            start_index = self.index_position(HandPosition.UP_START)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_DOWN
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.gesture == 18: # grip
+            if self.action is None:
+                start_index = self.index_gesture(18)
+                if self.check_duration(start_index):
+                    self.action = Event.DRAG2
+                    return True
+        elif self.action == Event.DRAG2 and x.gesture in [11, 12]: # hand heart
+            self.action = Event.DROP2
+            self.clear()
+            return True
+        elif x.gesture == 29: # ok
+            if self.action is None:
+                start_index = self.index_gesture(29)
+                if self.check_duration(start_index):
+                    self.action = Event.DRAG3
+                    return True
+        elif self.action == Event.DRAG3 and x.gesture in [11, 12]: # hand heart
+            self.action = Event.DROP3
+            self.clear()
+            return True
+        elif x.position == HandPosition.FAST_SWIPE_UP_END and HandPosition.FAST_SWIPE_UP_START in self:
+            start_index = self.index_position(HandPosition.FAST_SWIPE_UP_START)
+            if (
+                self.check_duration(start_index, min_frames=20)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.FAST_SWIPE_UP
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.FAST_SWIPE_DOWN_END and HandPosition.FAST_SWIPE_DOWN_START in self:
+            start_index = self.index_position(HandPosition.FAST_SWIPE_DOWN_START)
+            if (
+                self.check_duration(start_index, min_frames=20)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.FAST_SWIPE_DOWN
+                self.clear()
+                return True
+        elif x.position == HandPosition.ZOOM_IN_END and HandPosition.ZOOM_IN_START in self:
+            start_index = self.index_position(HandPosition.ZOOM_IN_START)
+            if (
+                    self.check_duration(start_index, min_frames=20)
+                    and self.check_vertical_swipe(self._deque[start_index], x)
+                    and self.check_horizontal_swipe(self._deque[start_index], x)
+                ):
+                    self.action = Event.ZOOM_IN
+                    self.clear()
+                    return True
+        elif x.position == HandPosition.ZOOM_OUT_END and HandPosition.ZOOM_OUT_START in self:
+            start_index = self.index_position(HandPosition.ZOOM_OUT_START)
+            if (
+                    self.check_duration(start_index, min_frames=20)
+                    and self.check_vertical_swipe(self._deque[start_index], x)
+                    and self.check_horizontal_swipe(self._deque[start_index], x)
+                ):
+                    self.action = Event.ZOOM_OUT
+                    self.clear()
+                    return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.LEFT_END2 and HandPosition.RIGHT_START2 in self:
+            start_index = self.index_position(HandPosition.RIGHT_START2)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_LEFT2
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.RIGHT_END2 and HandPosition.LEFT_START2 in self:
+            start_index = self.index_position(HandPosition.LEFT_START2)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_RIGHT2
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.UP_END2 and HandPosition.DOWN_START2 in self:
+            start_index = self.index_position(HandPosition.DOWN_START2)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_UP2
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.LEFT_END3 and HandPosition.RIGHT_START3 in self:
+            start_index = self.index_position(HandPosition.RIGHT_START3)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_LEFT3 # two
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.RIGHT_END3 and HandPosition.LEFT_START3 in self:
+            start_index = self.index_position(HandPosition.LEFT_START3)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_duration(start_index)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_RIGHT3
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.UP_END3 and HandPosition.DOWN_START3 in self:
+            start_index = self.index_position(HandPosition.DOWN_START3)
+            if (
+                self.check_duration(start_index, min_frames=15)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_UP3
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.DOWN_END3 and HandPosition.UP_START3 in self:
+            start_index = self.index_position(HandPosition.UP_START3)
+            if (
+                self.check_duration(start_index, min_frames=15)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_DOWN3
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif HandPosition.DRAG_START in self and x.gesture == 25: # fist
+            if self.action is None:
+                start_index = self.index_gesture(17) # grabbing
+                if self.check_duration(start_index, min_frames=3):
+                    self.action = Event.DRAG
+                    return True
+                else:
+                    self.clear()
+        elif HandPosition.ZOOM_IN_START in self and x.gesture == 19: # point
+            start_index = self.index_position(HandPosition.ZOOM_IN_START)
+            if (
+                self.check_duration(start_index, min_frames=8)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.TAP
+                self.clear()
+                return True
+            elif (
+                self.check_duration(start_index, min_frames=2)
+                and self.check_duration_max(start_index, max_frames=8)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+                and self.check_horizontal_swipe(self._deque[start_index], x)
+            ):
+                self.action_deque.append(Event.TAP)
+                if len(self.action_deque) >= 2 and self.action_deque[-1] == Event.TAP and self.action_deque[-2] == Event.TAP:
+                    self.action_deque.pop()
+                    self.action_deque.pop()
+                    self.action = Event.DOUBLE_TAP
+                    self.clear()
+                    return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.DOWN_END2 and HandPosition.ZOOM_OUT_START in self:
+            start_index = self.index_position(HandPosition.ZOOM_OUT_START)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_DOWN2
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif x.position == HandPosition.ZOOM_OUT_START and HandPosition.UP_START2 in self:
+            start_index = self.index_position(HandPosition.UP_START2)
+            if (
+                self.swipe_distance(self._deque[start_index], x)
+                and self.check_vertical_swipe(self._deque[start_index], x)
+            ):
+                self.action = Event.SWIPE_UP2
+                self.clear()
+                return True
+            else:
+                self.clear()
+        elif self.action == Event.DRAG and x.gesture in [35, 31, 36, 17]: # [stop, palm, stop_inverted, grabbing]
+            self.action = Event.DROP
+            self.clear()
+            return True
+        return False
+    @staticmethod
+    def check_horizontal_swipe(start_hand, x):
+        """
+        Check if swipe is horizontal.
+        Parameters
+        ----------
+        start_hand : Hand
+            Hand object of start position of swipe.
+        x : Hand
+            Hand object of end position of swipe.
+        Returns
+        -------
+        bool
+            True if swipe is horizontal.
+        """
+        boundary = [start_hand.bbox[1], start_hand.bbox[3]]
+        if boundary[0] < x.center[1] < boundary[1]:
+            return True
+        else:
+            return False
+    @staticmethod
+    def check_vertical_swipe(start_hand, x):
+        """
+        Check if swipe is vertical.
+        Parameters
+        ----------
+        start_hand : Hand
+            Hand object of start position of swipe.
+        x : Hand
+            Hand object of end position of swipe.
+        Returns
+        -------
+        bool
+            True if swipe is vertical.
+        """
+        boundary = [start_hand.bbox[0], start_hand.bbox[2]]
+        if boundary[0] < x.center[0] < boundary[1]:
+            return True
+        else:
+            return False
+    def __contains__(self, item):
+        for x in self._deque:
+            if x.position == item:
+                return True
+    def set_hand_position(self, hand: Hand):
+        """
+        Set hand position.
+        Parameters
+        ----------
+        hand : Hand
+            Hand object.
+        """
+        if hand.gesture in [31, 35, 36]: # [palm, stop, stop_inv]
+            if HandPosition.DOWN_START in self:
+                hand.position = HandPosition.UP_END
+            else:
+                hand.position = HandPosition.UP_START
+        elif hand.gesture == 0: # hand_down
+            if HandPosition.UP_START in self:
+                hand.position = HandPosition.DOWN_END
+            else:
+                hand.position = HandPosition.DOWN_START
+        elif hand.gesture == 1: # hand_right
+            if HandPosition.LEFT_START in self:
+                hand.position = HandPosition.RIGHT_END
+            else:
+                hand.position = HandPosition.RIGHT_START
+        elif hand.gesture == 2: # hand_left
+            if HandPosition.RIGHT_START in self:
+                hand.position = HandPosition.LEFT_END
+            else:
+                hand.position = HandPosition.LEFT_START
+        elif hand.gesture == 30: # one
+            if HandPosition.FAST_SWIPE_UP_START in self:
+                hand.position = HandPosition.FAST_SWIPE_UP_END
+            else:
+                hand.position = HandPosition.FAST_SWIPE_DOWN_START
+        elif hand.gesture == 19: # point
+            if HandPosition.FAST_SWIPE_DOWN_START in self:
+                hand.position = HandPosition.FAST_SWIPE_DOWN_END
+            else:
+                hand.position = HandPosition.FAST_SWIPE_UP_START
+        elif hand.gesture == 17: # grabbing
+            hand.position = HandPosition.DRAG_START
+        elif hand.gesture == 25: # fist
+            if HandPosition.ZOOM_OUT_START in self:
+                hand.position = HandPosition.ZOOM_OUT_END
+            else:
+                hand.position = HandPosition.ZOOM_IN_START
+        elif hand.gesture == 3: # thumb_index
+            if HandPosition.ZOOM_IN_START in self:
+                hand.position = HandPosition.ZOOM_IN_END
+            else:
+                hand.position = HandPosition.ZOOM_OUT_START
+        elif hand.gesture == 38: # three2
+            if HandPosition.ZOOM_IN_START in self:
+                hand.position = HandPosition.ZOOM_IN_END
+            else:
+                hand.position = HandPosition.ZOOM_OUT_START
+        elif hand.gesture == 5: # thumb_right
+            if HandPosition.LEFT_START2 in self:
+                hand.position = HandPosition.RIGHT_END2
+            else:
+                hand.position = HandPosition.RIGHT_START2
+        elif hand.gesture == 4: # thumb_left
+            if HandPosition.RIGHT_START2 in self:
+                hand.position = HandPosition.LEFT_END2
+            else:
+                hand.position = HandPosition.LEFT_START2
+        elif hand.gesture == 15: # two_right
+            if HandPosition.LEFT_START3 in self:
+                hand.position = HandPosition.RIGHT_END3
+            else:
+                hand.position = HandPosition.RIGHT_START3
+        elif hand.gesture == 14: # two_left
+            if HandPosition.RIGHT_START3 in self:
+                hand.position = HandPosition.LEFT_END3
+            else:
+                hand.position = HandPosition.LEFT_START3
+        elif hand.gesture == 39: # two_up
+            if HandPosition.DOWN_START3 in self:
+                hand.position = HandPosition.UP_END3
+            else:
+                hand.position = HandPosition.UP_START3
+        elif hand.gesture == 16: # two_down
+            if HandPosition.UP_START3 in self:
+                hand.position = HandPosition.DOWN_END3
+            else:
+                hand.position = HandPosition.DOWN_START3
+        elif hand.gesture == 6: # thumb_down
+            if HandPosition.ZOOM_OUT_START in self:
+                hand.position = HandPosition.DOWN_END2
+            else:
+                hand.position = HandPosition.UP_START2
+        else:
+            hand.position = HandPosition.UNKNOWN
+    def swipe_distance(
+        self,
+        first_hand: Hand,
+        last_hand: Hand,
+    ):
+        """
+        Check if swipe distance is more than min_distance.
+        Parameters
+        ----------
+        first_hand : Hand
+            Hand object of start position of swipe.
+        last_hand : Hand
+            Hand object of end position of swipe.
+        Returns
+        -------
+        bool
+            True if swipe distance is more than min_distance.
+        """
+        hand_dist = distance.euclidean(first_hand.center, last_hand.center)
+        hand_size = (first_hand.size + last_hand.size) / 2
+        return hand_dist / hand_size > self.min_absolute_distance
+    def clear(self):
+        self._deque.clear()
+    def copy(self):
+        return self._deque.copy()
+    def count(self, x):
+        return self._deque.count(x)
+    def extend(self, iterable):
+        self._deque.extend(iterable)
+    def insert(self, i, x):
+        self._deque.insert(i, x)
+    def pop(self):
+        return self._deque.pop()
+    def remove(self, value):
+        self._deque.remove(value)
+    def reverse(self):
+        self._deque.reverse()
+    def __str__(self):
+        return f"Deque({[hand.gesture for hand in self._deque]})"