Spaces:
Sleeping
Sleeping
| """ | |
| Gesture validation service for identity verification. | |
| This module provides gesture validation functionality by leveraging the existing | |
| gesture detection system in src/gesturedetection/. It processes user videos to | |
| detect specific gestures and validates them against a list of required gestures. | |
| """ | |
| import os | |
| import logging | |
| import tempfile | |
| from typing import List, Dict, Any, Optional, Tuple | |
| from datetime import datetime, timezone | |
| from .models import ValidationResult, ValidationStatus, GestureRequirement | |
| logger = logging.getLogger(__name__) | |
| class GestureValidator: | |
| """ | |
| Gesture validation service for identity verification. | |
| This class processes user videos to detect and validate specific gestures | |
| against a list of required gestures. It uses the existing gesture detection | |
| pipeline from src/gesturedetection/ and provides configurable validation | |
| parameters including error margins and minimum requirements. | |
| """ | |
| def __init__( | |
| self, | |
| detector_path: str = "models/hand_detector.onnx", | |
| classifier_path: str = "models/crops_classifier.onnx", | |
| frame_skip: int = 1, | |
| min_gesture_duration: int = 5, | |
| confidence_threshold: float = 0.7 | |
| ): | |
| """ | |
| Initialize the gesture validator. | |
| Parameters | |
| ---------- | |
| detector_path : str, optional | |
| Path to the hand detection ONNX model, by default "models/hand_detector.onnx" | |
| classifier_path : str, optional | |
| Path to the gesture classification ONNX model, by default "models/crops_classifier.onnx" | |
| frame_skip : int, optional | |
| Number of frames to skip between processing, by default 1 | |
| min_gesture_duration : int, optional | |
| Minimum duration for gesture detection, by default 5 | |
| confidence_threshold : float, optional | |
| Minimum confidence threshold for gesture detection, by default 0.7 | |
| """ | |
| self.detector_path = detector_path | |
| self.classifier_path = classifier_path | |
| self.frame_skip = frame_skip | |
| self.min_gesture_duration = min_gesture_duration | |
| self.confidence_threshold = confidence_threshold | |
| # Import here to avoid circular imports and handle missing dependencies gracefully | |
| try: | |
| from ..gesturedetection.main_controller import MainController | |
| from ..gesturedetection.models import PRODUCTION_GESTURE_MAPPING | |
| self._main_controller_class = MainController | |
| self._gesture_mapping = PRODUCTION_GESTURE_MAPPING | |
| self._initialized = True | |
| logger.info("GestureValidator initialized successfully with PRODUCTION_GESTURE_MAPPING") | |
| except ImportError as e: | |
| logger.warning(f"Could not import gesture detection components: {e}") | |
| self._initialized = False | |
| def validate_gestures( | |
| self, | |
| video_path: str, | |
| required_gestures: List[str], | |
| error_margin: float = 0.33, | |
| require_all: bool = True | |
| ) -> ValidationResult: | |
| """ | |
| Validate that required gestures are present in the video. | |
| Parameters | |
| ---------- | |
| video_path : str | |
| Path to the video file to analyze | |
| required_gestures : List[str] | |
| List of gesture names that must be detected | |
| error_margin : float, optional | |
| Fraction of gestures that can be missed (0.0-1.0), by default 0.33 | |
| require_all : bool, optional | |
| Whether all gestures must be present, by default True | |
| Returns | |
| ------- | |
| ValidationResult | |
| Validation result with success status and detailed metrics | |
| """ | |
| if not self._initialized: | |
| error_msg = "GestureValidator not properly initialized - missing gesture detection components" | |
| logger.error(error_msg) | |
| return ValidationResult( | |
| status=ValidationStatus.FAILED, | |
| success=False, | |
| confidence=0.0, | |
| error_message=error_msg | |
| ) | |
| logger.info(f"Starting gesture validation for video: {video_path}") | |
| logger.info(f"Required gestures: {required_gestures}, error_margin: {error_margin}") | |
| # Validate input file | |
| if not os.path.exists(video_path): | |
| error_msg = f"Video file not found: {video_path}" | |
| logger.error(error_msg) | |
| return ValidationResult( | |
| status=ValidationStatus.FAILED, | |
| success=False, | |
| confidence=0.0, | |
| error_message=error_msg | |
| ) | |
| # Validate required gestures | |
| if not required_gestures: | |
| error_msg = "No gestures specified for validation" | |
| logger.error(error_msg) | |
| return ValidationResult( | |
| status=ValidationStatus.FAILED, | |
| success=False, | |
| confidence=0.0, | |
| error_message=error_msg | |
| ) | |
| try: | |
| # Process video using existing gesture detection pipeline | |
| detected_gestures = self._process_video_for_gestures(video_path) | |
| # Analyze detected gestures against requirements | |
| validation_metrics = self._analyze_gesture_requirements( | |
| detected_gestures, required_gestures, error_margin, require_all | |
| ) | |
| # Determine overall success | |
| if require_all: | |
| success = validation_metrics["required_gestures_met"] >= len(required_gestures) | |
| else: | |
| # Allow for error margin | |
| min_required = max(1, int(len(required_gestures) * (1.0 - error_margin))) | |
| success = validation_metrics["required_gestures_met"] >= min_required | |
| # Calculate confidence based on detection quality | |
| confidence = self._calculate_confidence(detected_gestures, validation_metrics) | |
| status = ValidationStatus.SUCCESS if success else ValidationStatus.PARTIAL | |
| result = ValidationResult( | |
| status=status, | |
| success=success, | |
| confidence=confidence, | |
| details={ | |
| "detected_gestures": [ | |
| { | |
| "gesture": g["gesture"], | |
| "duration": g["duration"], | |
| "confidence": g["confidence"] | |
| } | |
| for g in detected_gestures | |
| ], | |
| "validation_metrics": validation_metrics, | |
| "required_gestures": required_gestures, | |
| "error_margin": error_margin, | |
| "require_all": require_all, | |
| "processing_timestamp": datetime.now(timezone.utc).isoformat() | |
| } | |
| ) | |
| logger.info(f"Gesture validation completed: success={success}, confidence={confidence}") | |
| return result | |
| except Exception as e: | |
| error_msg = f"Error during gesture validation: {str(e)}" | |
| logger.error(error_msg, exc_info=True) | |
| return ValidationResult( | |
| status=ValidationStatus.FAILED, | |
| success=False, | |
| confidence=0.0, | |
| error_message=error_msg | |
| ) | |
| def _process_video_for_gestures(self, video_path: str) -> List[Dict[str, Any]]: | |
| """ | |
| Process video file to detect gestures using existing pipeline. | |
| Parameters | |
| ---------- | |
| video_path : str | |
| Path to the video file | |
| Returns | |
| ------- | |
| List[Dict[str, Any]] | |
| List of detected gestures with metadata | |
| """ | |
| logger.debug(f"Processing video for gestures: {video_path}") | |
| # Initialize the main controller | |
| controller = self._main_controller_class(self.detector_path, self.classifier_path) | |
| # Import video processing function from existing API | |
| try: | |
| from ..gesturedetection.api import process_video_for_gestures | |
| gestures = process_video_for_gestures( | |
| video_path, | |
| detector_path=self.detector_path, | |
| classifier_path=self.classifier_path, | |
| frame_skip=self.frame_skip | |
| ) | |
| except ImportError: | |
| # Fallback: use controller directly if import fails | |
| logger.warning("Using fallback gesture processing method") | |
| gestures = self._process_video_with_controller(controller, video_path) | |
| # Convert to our internal format | |
| detected_gestures = [] | |
| for gesture in gestures: | |
| # Map gesture names to standardized format | |
| gesture_name = self._normalize_gesture_name(gesture.gesture) | |
| detected_gestures.append({ | |
| "gesture": gesture_name, | |
| "duration": gesture.duration, | |
| "confidence": gesture.confidence, | |
| "raw_gesture": gesture.gesture | |
| }) | |
| logger.debug(f"Detected {len(detected_gestures)} gestures") | |
| return detected_gestures | |
| def _process_video_with_controller(self, controller, video_path: str) -> List[Dict[str, Any]]: | |
| """ | |
| Fallback method to process video using controller directly. | |
| This is used if the import from api.py fails for any reason. | |
| """ | |
| import cv2 | |
| from collections import defaultdict | |
| logger.debug("Processing video with controller fallback method") | |
| # Open video file | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| raise ValueError(f"Could not open video file: {video_path}") | |
| gesture_tracks = defaultdict(list) | |
| frame_count = 0 | |
| try: | |
| while True: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Skip frames based on frame_skip parameter | |
| if frame_count % self.frame_skip == 0: | |
| # Process frame through the controller | |
| bboxes, ids, labels = controller(frame) | |
| if bboxes is not None and ids is not None and labels is not None: | |
| # Track gestures for each detected hand | |
| for i in range(len(bboxes)): | |
| hand_id = int(ids[i]) | |
| gesture_id = labels[i] | |
| if gesture_id is not None: | |
| confidence = 0.8 # Default confidence | |
| gesture_tracks[hand_id].append((gesture_id, confidence)) | |
| frame_count += 1 | |
| finally: | |
| cap.release() | |
| # Process gesture tracks to find continuous gestures | |
| detected_gestures = [] | |
| for hand_id, gesture_sequence in gesture_tracks.items(): | |
| if not gesture_sequence: | |
| continue | |
| # Group consecutive identical gestures | |
| current_gesture = None | |
| current_duration = 0 | |
| current_confidence = 0.0 | |
| for gesture_id, confidence in gesture_sequence: | |
| if current_gesture is None or current_gesture != gesture_id: | |
| # Save previous gesture if it was significant | |
| if current_gesture is not None and current_duration >= self.min_gesture_duration: | |
| gesture_name = self._gesture_mapping.get(current_gesture, f"unknown_{current_gesture}") | |
| avg_confidence = current_confidence / current_duration if current_duration > 0 else 0.0 | |
| scaled_duration = current_duration * self.frame_skip | |
| detected_gestures.append({ | |
| "gesture": gesture_name, | |
| "duration": scaled_duration, | |
| "confidence": avg_confidence | |
| }) | |
| # Start new gesture | |
| current_gesture = gesture_id | |
| current_duration = 1 | |
| current_confidence = confidence | |
| else: | |
| # Continue current gesture | |
| current_duration += 1 | |
| current_confidence += confidence | |
| # Don't forget the last gesture | |
| if current_gesture is not None and current_duration >= self.min_gesture_duration: | |
| gesture_name = self._gesture_mapping.get(current_gesture, f"unknown_{current_gesture}") | |
| avg_confidence = current_confidence / current_duration if current_duration > 0 else 0.0 | |
| scaled_duration = current_duration * self.frame_skip | |
| detected_gestures.append({ | |
| "gesture": gesture_name, | |
| "duration": scaled_duration, | |
| "confidence": avg_confidence | |
| }) | |
| return detected_gestures | |
| def _analyze_gesture_requirements( | |
| self, | |
| detected_gestures: List[Dict[str, Any]], | |
| required_gestures: List[str], | |
| error_margin: float, | |
| require_all: bool | |
| ) -> Dict[str, Any]: | |
| """ | |
| Analyze detected gestures against requirements. | |
| Parameters | |
| ---------- | |
| detected_gestures : List[Dict[str, Any]] | |
| List of detected gestures | |
| required_gestures : List[str] | |
| List of required gesture names | |
| error_margin : float | |
| Error margin for validation | |
| require_all : bool | |
| Whether all gestures are required | |
| Returns | |
| ------- | |
| Dict[str, Any] | |
| Validation metrics and analysis | |
| """ | |
| logger.debug("Analyzing gesture requirements") | |
| # Create lookup for detected gestures | |
| detected_gesture_counts = {} | |
| for gesture in detected_gestures: | |
| gesture_name = gesture["gesture"] | |
| if gesture_name not in detected_gesture_counts: | |
| detected_gesture_counts[gesture_name] = [] | |
| detected_gesture_counts[gesture_name].append(gesture) | |
| # Analyze each required gesture | |
| required_gestures_met = 0 | |
| gesture_analysis = {} | |
| for required_gesture in required_gestures: | |
| detected_instances = detected_gesture_counts.get(required_gesture, []) | |
| # Filter by minimum duration and confidence if specified | |
| valid_instances = [ | |
| g for g in detected_instances | |
| if g["duration"] >= self.min_gesture_duration and | |
| g["confidence"] >= self.confidence_threshold | |
| ] | |
| met_requirement = len(valid_instances) > 0 | |
| gesture_analysis[required_gesture] = { | |
| "required": True, | |
| "detected": len(detected_instances), | |
| "valid_instances": len(valid_instances), | |
| "met_requirement": met_requirement, | |
| "best_confidence": max([g["confidence"] for g in detected_instances], default=0.0), | |
| "best_duration": max([g["duration"] for g in detected_instances], default=0) | |
| } | |
| if met_requirement: | |
| required_gestures_met += 1 | |
| # Calculate success rate | |
| total_required = len(required_gestures) | |
| success_rate = required_gestures_met / total_required if total_required > 0 else 0.0 | |
| # Determine if validation passes based on error margin | |
| if require_all: | |
| passes_validation = required_gestures_met >= total_required | |
| else: | |
| min_required = max(1, int(total_required * (1.0 - error_margin))) | |
| passes_validation = required_gestures_met >= min_required | |
| metrics = { | |
| "total_required_gestures": total_required, | |
| "required_gestures_met": required_gestures_met, | |
| "success_rate": success_rate, | |
| "passes_validation": passes_validation, | |
| "error_margin": error_margin, | |
| "require_all": require_all, | |
| "gesture_analysis": gesture_analysis | |
| } | |
| logger.debug(f"Gesture analysis completed: {required_gestures_met}/{total_required} gestures met requirement") | |
| return metrics | |
| def _calculate_confidence( | |
| self, | |
| detected_gestures: List[Dict[str, Any]], | |
| validation_metrics: Dict[str, Any] | |
| ) -> float: | |
| """ | |
| Calculate overall confidence score for gesture validation. | |
| Parameters | |
| ---------- | |
| detected_gestures : List[Dict[str, Any]] | |
| List of detected gestures | |
| validation_metrics : Dict[str, Any] | |
| Validation metrics from analysis | |
| Returns | |
| ------- | |
| float | |
| Overall confidence score (0.0-1.0) | |
| """ | |
| if not detected_gestures: | |
| return 0.0 | |
| # Base confidence on success rate | |
| success_rate = validation_metrics.get("success_rate", 0.0) | |
| # Boost confidence based on average gesture quality | |
| if detected_gestures: | |
| avg_confidence = sum(g["confidence"] for g in detected_gestures) / len(detected_gestures) | |
| avg_duration = sum(g["duration"] for g in detected_gestures) / len(detected_gestures) | |
| # Normalize duration to confidence boost (longer, more confident gestures = higher score) | |
| duration_boost = min(0.2, avg_duration / 100.0) # Cap at 0.2 boost | |
| confidence_boost = min(0.1, avg_confidence * 0.1) # Cap at 0.1 boost | |
| success_rate = min(1.0, success_rate + duration_boost + confidence_boost) | |
| return success_rate | |
| def _normalize_gesture_name(self, gesture_name: str) -> str: | |
| """ | |
| Normalize gesture names to production-standard format. | |
| Handles legacy naming and variations to ensure consistent gesture names | |
| across different parts of the system. Maps old names like "like" to | |
| "thumbs_up", and handles hand-agnostic counting variations. | |
| Parameters | |
| ---------- | |
| gesture_name : str | |
| Raw gesture name from detection | |
| Returns | |
| ------- | |
| str | |
| Normalized gesture name matching PRODUCTION_GESTURE_MAPPING | |
| """ | |
| # Convert to lowercase and remove common variations | |
| normalized = gesture_name.lower().strip() | |
| # Handle common variations and legacy names | |
| variations = { | |
| "thumbs_up": ["thumbsup", "thumb_up", "like"], # "like" is legacy name | |
| "one": ["one_finger", "one_left", "one_right", "one_down"], # Hand-agnostic | |
| "two": ["peace_sign", "victory", "two_fingers", "two_up", "two_left", "two_right", "two_down"], # Hand-agnostic | |
| "three": ["three_fingers", "three2", "three3"], # Hand-agnostic | |
| "four": ["four_fingers"], | |
| "five": ["palm", "open_palm", "five_fingers"], # "palm" is alias for "five" | |
| "peace_inverted": ["peace_inverted_sign"], | |
| "ok": ["okay", "ok_sign"], | |
| "call": ["call_me", "phone"], | |
| "fist": ["closed_fist"], | |
| "point": ["pointing"], | |
| "stop": ["stop_sign"], | |
| "middle_finger": ["middle"], | |
| } | |
| for standard_name, variant_list in variations.items(): | |
| if normalized in variant_list or normalized == standard_name: | |
| return standard_name | |
| return normalized | |