Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Running

App Files Files Community

capradeepgujaran commited on Oct 23

Commit

771e08a

•

1 Parent(s): 46e12d1

Update app.py

Browse files

Files changed (1) hide show

app.py +197 -161

app.py CHANGED Viewed

@@ -8,20 +8,94 @@ import io
 import os
 import base64
-def create_monitor_interface():
-    api_key = os.getenv("GROQ_API_KEY")
-    class SafetyMonitor:
-        def __init__(self):
-            self.client = Groq()
-            self.model_name = "llama-3.2-90b-vision-preview"
-            self.max_image_size = (800, 800)
-            self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
-        def analyze_frame(self, frame: np.ndarray) -> str:
             if frame is None:
-                return "No frame received"
             frame = self.preprocess_image(frame)
             image_url = self.encode_image(frame)
@@ -34,32 +108,23 @@ def create_monitor_interface():
                             "content": [
                                 {
                                     "type": "text",
-                                    "text": """Analyze this image for safety hazards and issues. For each identified hazard:
-1. Specify the exact location in the image where the hazard exists
-2. Describe the specific safety concern
-3. Note any violations or risks
-Format each observation exactly as:
-- <location>area:hazard description</location>
-Examples of locations: top-left, center, bottom-right, full-area, near-machine, workspace, etc.
-Look for ALL types of safety issues including:
-- Personal protective equipment (PPE)
-- Machine and equipment hazards
-- Ergonomic risks
-- Environmental hazards
-- Fire and electrical safety
-- Chemical safety
-- Fall protection
-- Material handling
-- Access/egress issues
-- Housekeeping
-- Tool safety
-- Emergency equipment
-Be specific about locations and provide detailed observations."""
                                 },
                                 {
                                     "type": "image_url",
@@ -74,154 +139,123 @@ Be specific about locations and provide detailed observations."""
                     max_tokens=500,
                     stream=False
                 )
-                return completion.choices[0].message.content
             except Exception as e:
                 print(f"Analysis error: {str(e)}")
-                return f"Analysis Error: {str(e)}"
-        def preprocess_image(self, frame):
-            """Prepare image for analysis."""
-            if len(frame.shape) == 2:
-                frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
-            elif len(frame.shape) == 3 and frame.shape[2] == 4:
-                frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
-            return self.resize_image(frame)
-        def resize_image(self, image):
-            """Resize image while maintaining aspect ratio."""
-            height, width = image.shape[:2]
-            if height > self.max_image_size[1] or width > self.max_image_size[0]:
-                aspect = width / height
-                if width > height:
-                    new_width = self.max_image_size[0]
-                    new_height = int(new_width / aspect)
-                else:
-                    new_height = self.max_image_size[1]
-                    new_width = int(new_height * aspect)
-                return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
-            return image
-        def encode_image(self, frame):
-            """Convert image to base64 encoding."""
-            frame_pil = PILImage.fromarray(frame)
-            buffered = io.BytesIO()
-            frame_pil.save(buffered, format="JPEG", quality=95)
-            img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
-            return f"data:image/jpeg;base64,{img_base64}"
-        def parse_locations(self, observation: str) -> dict:
-            """Parse location information from observation."""
-            locations = {
-                'full': (0, 0, 1, 1),
-                'top': (0.2, 0, 0.8, 0.3),
-                'bottom': (0.2, 0.7, 0.8, 1),
-                'left': (0, 0.2, 0.3, 0.8),
-                'right': (0.7, 0.2, 1, 0.8),
-                'center': (0.3, 0.3, 0.7, 0.7),
-                'top-left': (0, 0, 0.3, 0.3),
-                'top-right': (0.7, 0, 1, 0.3),
-                'bottom-left': (0, 0.7, 0.3, 1),
-                'bottom-right': (0.7, 0.7, 1, 1),
-                'workspace': (0.2, 0.2, 0.8, 0.8),
-                'near-machine': (0.6, 0.1, 1, 0.9),
-                'floor-area': (0, 0.7, 1, 1),
-                'equipment': (0.5, 0.1, 1, 0.9)
-            }
-            # Find best matching location
-            text = observation.lower()
-            best_match = 'center'
-            max_match = 0
-            for loc in locations.keys():
-                if loc in text:
-                    words = loc.split('-')
-                    matches = sum(1 for word in words if word in text)
-                    if matches > max_match:
-                        max_match = matches
-                        best_match = loc
-            return locations[best_match]
-        def draw_observations(self, image, observations):
-            """Draw bounding boxes and labels for safety observations."""
             height, width = image.shape[:2]
             font = cv2.FONT_HERSHEY_SIMPLEX
             font_scale = 0.5
             thickness = 2
             padding = 10
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
-                # Get relative coordinates and convert to absolute
-                rel_coords = self.parse_locations(obs['location'])
-                x1 = int(rel_coords[0] * width)
-                y1 = int(rel_coords[1] * height)
-                x2 = int(rel_coords[2] * width)
-                y2 = int(rel_coords[3] * height)
-                # Draw rectangle
                 cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
-                # Prepare label
-                label = obs['description'][:50]
-                if len(obs['description']) > 50:
-                    label += "..."
-                # Calculate text position
                 label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
                 text_x = max(0, x1)
                 text_y = max(label_size[1] + padding, y1 - padding)
-                # Draw label background
                 cv2.rectangle(image,
-                            (text_x, text_y - label_size[1] - padding),
-                            (text_x + label_size[0] + padding, text_y),
-                            color, -1)
-                # Draw label text
                 cv2.putText(image, label,
                            (text_x + padding//2, text_y - padding//2),
                            font, font_scale, (255, 255, 255), thickness)
-            return image
         def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
-            """Process frame and generate safety analysis with visualizations."""
-            if frame is None:
-                return None, "No image provided"
-            # Get analysis
-            analysis = self.analyze_frame(frame)
-            display_frame = frame.copy()
-            # Parse observations
-            observations = []
-            for line in analysis.split('\n'):
-                line = line.strip()
-                if line.startswith('-') and '<location>' in line and '</location>' in line:
-                    start = line.find('<location>') + len('<location>')
-                    end = line.find('</location>')
-                    location_description = line[start:end].strip()
-                    # Split location and description
-                    if ':' in location_description:
-                        location, description = location_description.split(':', 1)
-                        observations.append({
-                            'location': location.strip(),
-                            'description': description.strip()
-                        })
-            # Draw observations if any were found
-            if observations:
-                annotated_frame = self.draw_observations(display_frame, observations)
-                return annotated_frame, analysis
-            return display_frame, analysis
-    # Create interface
     monitor = SafetyMonitor()
     with gr.Blocks() as demo:
@@ -252,11 +286,13 @@ Be specific about locations and provide detailed observations."""
         gr.Markdown("""
         ## Instructions:
         1. Upload any workplace/safety-related image
-        2. View identified hazards and safety concerns
-        3. Check detailed analysis for recommendations
         """)
     return demo
-demo = create_monitor_interface()
-demo.launch()

 import os
 import base64
+class SafetyMonitor:
+    def __init__(self):
+        self.client = Groq()
+        self.model_name = "llama-3.2-90b-vision-preview"
+        self.max_image_size = (800, 800)
+        self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
+    def preprocess_image(self, frame):
+        """Prepare image for analysis."""
+        if len(frame.shape) == 2:
+            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
+        elif len(frame.shape) == 3 and frame.shape[2] == 4:
+            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
+        return self.resize_image(frame)
+    def resize_image(self, image):
+        """Resize image while maintaining aspect ratio."""
+        height, width = image.shape[:2]
+        if height > self.max_image_size[1] or width > self.max_image_size[0]:
+            aspect = width / height
+            if width > height:
+                new_width = self.max_image_size[0]
+                new_height = int(new_width / aspect)
+            else:
+                new_height = self.max_image_size[1]
+                new_width = int(new_height * aspect)
+            return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+        return image
+    def encode_image(self, frame):
+        """Convert image to base64 encoding."""
+        frame_pil = PILImage.fromarray(frame)
+        buffered = io.BytesIO()
+        frame_pil.save(buffered, format="JPEG", quality=95)
+        img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
+        return f"data:image/jpeg;base64,{img_base64}"
+    def get_scene_context(self, image: np.ndarray) -> str:
+        """Get scene understanding to determine context."""
+        try:
+            image_url = self.encode_image(image)
+            completion = self.client.chat.completions.create(
+                model=self.model_name,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": """Describe the key areas and elements visible in this construction/workplace image. Include:
+                                1. Worker locations and activities
+                                2. Equipment and machinery positions
+                                3. Material storage or work areas
+                                4. Environmental features
+                                5. Access ways and pathways
+                                Format as:
+                                - Element: precise location description"""
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": image_url
+                                }
+                            }
+                        ]
+                    }
+                ],
+                temperature=0.3,
+                max_tokens=200,
+                stream=False
+            )
+            return completion.choices[0].message.content
+        except Exception as e:
+            print(f"Scene analysis error: {str(e)}")
+            return ""
+    def analyze_frame(self, frame: np.ndarray) -> tuple[str, dict]:
+            """Analyze frame and return both safety analysis and scene context."""
             if frame is None:
+                return "No frame received", {}
+            # First get scene understanding
+            scene_context = self.get_scene_context(frame)
+            scene_regions = self.parse_scene_context(scene_context)
+            # Then perform safety analysis with context
             frame = self.preprocess_image(frame)
             image_url = self.encode_image(frame)
                             "content": [
                                 {
                                     "type": "text",
+                                    "text": """Analyze this workplace image for safety concerns. For each identified hazard:
+                                    1. Specify the exact location where the hazard exists
+                                    2. Describe the specific safety issue
+                                    3. Note any violations or risks
+                                    Format each observation exactly as:
+                                    - <location>area:detailed hazard description</location>
+                                    Consider all safety aspects:
+                                    - PPE compliance
+                                    - Ergonomic risks
+                                    - Equipment safety
+                                    - Environmental hazards
+                                    - Material handling
+                                    - Access/egress
+                                    - Work procedures
+                                    """
                                 },
                                 {
                                     "type": "image_url",
                     max_tokens=500,
                     stream=False
                 )
+                return completion.choices[0].message.content, scene_regions
             except Exception as e:
                 print(f"Analysis error: {str(e)}")
+                return f"Analysis Error: {str(e)}", scene_regions
+        def parse_scene_context(self, context: str) -> dict:
+            """Parse scene context to get region mapping."""
+            regions = {}
+            for line in context.split('\n'):
+                if line.strip().startswith('-'):
+                    parts = line.strip('- ').split(':')
+                    if len(parts) == 2:
+                        element_type = parts[0].strip()
+                        location = parts[1].strip()
+                        regions[element_type] = location
+            return regions
+        def get_region_coordinates(self, location: str, image_shape: tuple) -> tuple:
+            """Convert location description to coordinates."""
+            height, width = image_shape[:2]
+            # Parse location description for spatial information
+            location = location.lower()
+            x1, y1, x2, y2 = 0, 0, width, height  # Default to full image
+            # Horizontal position
+            if 'left' in location:
+                x2 = width // 2
+            elif 'right' in location:
+                x1 = width // 2
+            elif 'center' in location:
+                x1 = width // 4
+                x2 = 3 * width // 4
+            # Vertical position
+            if 'top' in location:
+                y2 = height // 2
+            elif 'bottom' in location:
+                y1 = height // 2
+            elif 'middle' in location or 'center' in location:
+                y1 = height // 4
+                y2 = 3 * height // 4
+            return (x1, y1, x2, y2)
+        def draw_observations(self, image: np.ndarray, observations: list, scene_regions: dict) -> np.ndarray:
+            """Draw safety observations using scene context."""
             height, width = image.shape[:2]
             font = cv2.FONT_HERSHEY_SIMPLEX
             font_scale = 0.5
             thickness = 2
             padding = 10
             for idx, obs in enumerate(observations):
                 color = self.colors[idx % len(self.colors)]
+                # Find best matching region from scene context or parse location directly
+                location = obs['location'].lower()
+                x1, y1, x2, y2 = self.get_region_coordinates(location, image.shape)
+                # Draw observation box
                 cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
+                # Add label
+                label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
                 label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
+                # Position text above the box
                 text_x = max(0, x1)
                 text_y = max(label_size[1] + padding, y1 - padding)
+                # Draw text background
                 cv2.rectangle(image,
+                             (text_x, text_y - label_size[1] - padding),
+                             (text_x + label_size[0] + padding, text_y),
+                             color, -1)
+                # Draw text
                 cv2.putText(image, label,
                            (text_x + padding//2, text_y - padding//2),
                            font, font_scale, (255, 255, 255), thickness)
+            return image
         def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
+                """Process frame with safety analysis and visualization."""
+                if frame is None:
+                    return None, "No image provided"
+                # Get analysis and scene context
+                analysis, scene_regions = self.analyze_frame(frame)
+                display_frame = frame.copy()
+                # Parse observations
+                observations = []
+                for line in analysis.split('\n'):
+                    line = line.strip()
+                    if line.startswith('-') and '<location>' in line and '</location>' in line:
+                        start = line.find('<location>') + len('<location>')
+                        end = line.find('</location>')
+                        location_description = line[start:end].strip()
+                        if ':' in location_description:
+                            location, description = location_description.split(':', 1)
+                            observations.append({
+                                'location': location.strip(),
+                                'description': description.strip()
+                            })
+                # Draw observations if any were found
+                if observations:
+                    annotated_frame = self.draw_observations(display_frame, observations, scene_regions)
+                    return annotated_frame, analysis
+                return display_frame, analysis
+def create_monitor_interface():
     monitor = SafetyMonitor()
     with gr.Blocks() as demo:
         gr.Markdown("""
         ## Instructions:
         1. Upload any workplace/safety-related image
+        2. View identified hazards and their locations
+        3. Read detailed analysis of safety concerns
         """)
     return demo
+if __name__ == "__main__":
+    demo = create_monitor_interface()
+    demo.launch()