File size: 18,301 Bytes
3172319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
import os
import json
from typing import Dict, List, Tuple, Any, Optional

from scene_type import SCENE_TYPES
from scene_detail_templates import SCENE_DETAIL_TEMPLATES
from object_template_fillers import OBJECT_TEMPLATE_FILLERS
from activity_templates import ACTIVITY_TEMPLATES
from safety_templates import SAFETY_TEMPLATES
from confifence_templates import CONFIDENCE_TEMPLATES

class SceneDescriptor:
    """
    Generates natural language descriptions of scenes.
    Handles scene descriptions, activity inference, and safety concerns identification.
    """

    def __init__(self, scene_types=None, object_categories=None):
        """
        Initialize the scene descriptor

        Args:
            scene_types: Dictionary of scene type definitions
        """
        self.scene_types = scene_types or {}
        self.SCENE_TYPES = scene_types or {}

        if object_categories:
            self.OBJECT_CATEGORIES = object_categories
        else:
            # 從 JSON 加載或使用默認值
            self.OBJECT_CATEGORIES = self._load_json_data("object_categories") or {
                "furniture": [56, 57, 58, 59, 60, 61],
                "electronics": [62, 63, 64, 65, 66, 67, 68, 69, 70],
                "kitchen_items": [39, 40, 41, 42, 43, 44, 45],
                "food": [46, 47, 48, 49, 50, 51, 52, 53, 54, 55],
                "vehicles": [1, 2, 3, 4, 5, 6, 7, 8],
                "personal_items": [24, 25, 26, 27, 28, 73, 78, 79]
            }

        # 加載所有模板數據
        self._load_templates()

    def _load_templates(self):
        """Load all template data from script or fallback to imported defaults"""
        self.confidence_templates = CONFIDENCE_TEMPLATES
        self.scene_detail_templates = SCENE_DETAIL_TEMPLATES
        self.object_template_fillers = OBJECT_TEMPLATE_FILLERS
        self.safety_templates = SAFETY_TEMPLATES
        self.activity_templates = ACTIVITY_TEMPLATES


    def _initialize_fallback_templates(self):
        """Initialize fallback templates when no external data is available"""
        # 只在無法從文件或導入加載時使用
        self.confidence_templates = {
            "high": "{description} {details}",
            "medium": "This appears to be {description} {details}",
            "low": "This might be {description}, but the confidence is low. {details}"
        }

        # 僅提供最基本的模板作為後備
        self.scene_detail_templates = {
            "default": ["A space with various objects."]
        }

        self.object_template_fillers = {
            "default": ["various items"]
        }

        self.safety_templates = {
            "general": "Pay attention to {safety_element}."
        }

        self.activity_templates = {
            "default": ["General activity"]
        }

    def _get_alternative_scenes(self, scene_scores: Dict[str, float],
                            threshold: float, top_k: int = 2) -> List[Dict]:
        """
        Get alternative scene interpretations with their scores.

        Args:
            scene_scores: Dictionary of scene type scores
            threshold: Minimum confidence threshold
            top_k: Number of alternatives to return

        Returns:
            List of dictionaries with alternative scenes
        """
        # Sort scenes by score in descending order
        sorted_scenes = sorted(scene_scores.items(), key=lambda x: x[1], reverse=True)

        # Skip the first one (best match) and take the next top_k
        alternatives = []
        for scene_type, score in sorted_scenes[1:1+top_k]:
            if score >= threshold:
                alternatives.append({
                    "type": scene_type,
                    "name": self.SCENE_TYPES.get(scene_type, {}).get("name", "Unknown"),
                    "confidence": score
                })

        return alternatives


    def _infer_possible_activities(self, scene_type: str, detected_objects: List[Dict]) -> List[str]:
        """
        Infer possible activities based on scene type and detected objects.

        Args:
            scene_type: Identified scene type
            detected_objects: List of detected objects

        Returns:
            List of possible activities
        """
        activities = []

        if scene_type.startswith("aerial_view_"):
            if scene_type == "aerial_view_intersection":
                # 使用預定義的十字路口活動
                activities.extend(self.activity_templates.get("aerial_view_intersection", []))
                
                # 添加與行人和車輛相關的特定活動
                pedestrians = [obj for obj in detected_objects if obj["class_id"] == 0]
                vehicles = [obj for obj in detected_objects if obj["class_id"] in [2, 5, 7]]  # Car, bus, truck
                
                if pedestrians and vehicles:
                    activities.append("Waiting for an opportunity to cross the street")
                    activities.append("Obeying traffic signals")
            
            elif scene_type == "aerial_view_commercial_area":
                activities.extend(self.activity_templates.get("aerial_view_commercial_area", []))
                
            elif scene_type == "aerial_view_plaza":
                activities.extend(self.activity_templates.get("aerial_view_plaza", []))
            
            else:
                # 處理其他未明確定義的空中視角場景
                aerial_activities = [
                    "Street crossing", 
                    "Waiting for signals", 
                    "Following traffic rules", 
                    "Pedestrian movement"
                ]
                activities.extend(aerial_activities)

        if scene_type in self.activity_templates:
            activities.extend(self.activity_templates[scene_type])
        elif "default" in self.activity_templates:
            activities.extend(self.activity_templates["default"])

        detected_class_ids = [obj["class_id"] for obj in detected_objects]

        # Add activities based on specific object combinations
        if 62 in detected_class_ids and 57 in detected_class_ids:  # TV and sofa
            activities.append("Watching shows or movies")

        if 63 in detected_class_ids:  # laptop
            activities.append("Using a computer/laptop")

        if 67 in detected_class_ids:  # cell phone
            activities.append("Using a mobile phone")

        if 73 in detected_class_ids:  # book
            activities.append("Reading")

        if any(food_id in detected_class_ids for food_id in [46, 47, 48, 49, 50, 51, 52, 53, 54, 55]):
            activities.append("Eating or preparing food")

        # Person-specific activities
        if 0 in detected_class_ids:  # Person
            if any(vehicle in detected_class_ids for vehicle in [1, 2, 3, 5, 7]):  # Vehicles
                activities.append("Commuting or traveling")

            if 16 in detected_class_ids:  # Dog
                activities.append("Walking a dog")

            if 24 in detected_class_ids or 26 in detected_class_ids:  # Backpack or handbag
                activities.append("Carrying personal items")

        # Remove duplicates
        return list(set(activities))

    def _identify_safety_concerns(self, detected_objects: List[Dict], scene_type: str) -> List[str]:
        """
        Identify potential safety concerns based on objects and scene type.

        Args:
            detected_objects: List of detected objects
            scene_type: Identified scene type

        Returns:
            List of potential safety concerns
        """
        concerns = []
        detected_class_ids = [obj["class_id"] for obj in detected_objects]

        # ORIGINAL SAFETY CONCERNS LOGIC

        # General safety concerns
        if 42 in detected_class_ids or 43 in detected_class_ids:  # Fork or knife
            concerns.append("Sharp utensils present")

        if 76 in detected_class_ids:  # Scissors
            concerns.append("Cutting tools present")

        # Traffic-related concerns
        if scene_type in ["city_street", "parking_lot"]:
            if 0 in detected_class_ids:  # Person
                if any(vehicle in detected_class_ids for vehicle in [2, 3, 5, 7, 8]):  # Vehicles
                    concerns.append("Pedestrians near vehicles")

            if 9 in detected_class_ids:  # Traffic light
                concerns.append("Monitor traffic signals")

        # Identify crowded scenes
        person_count = detected_class_ids.count(0)
        if person_count > 5:
            concerns.append(f"Crowded area with multiple people ({person_count})")

        # Scene-specific concerns
        if scene_type == "kitchen":
            if 68 in detected_class_ids or 69 in detected_class_ids:  # Microwave or oven
                concerns.append("Hot cooking equipment")

        # Potentially unstable objects
        for obj in detected_objects:
            if obj["class_id"] in [39, 40, 41, 45]:  # Bottle, wine glass, cup, bowl
                if obj["region"] in ["top_left", "top_center", "top_right"] and obj["normalized_area"] > 0.05:
                    concerns.append(f"Elevated {obj['class_name']} might be unstable")

        # NEW SAFETY CONCERNS LOGIC FOR ADDITIONAL SCENE TYPES

        # Upscale dining safety concerns
        if scene_type == "upscale_dining":
            # Check for fragile items
            if 40 in detected_class_ids:  # Wine glass
                concerns.append("Fragile glassware present")

            # Check for lit candles (can't directly detect but can infer from context)
            # Look for small bright spots that might be candles
            if any(obj["class_id"] == 41 for obj in detected_objects):  # Cup (which might include candle holders)
                # We can't reliably detect candles, but if the scene appears to be formal dining,
                # we can suggest this as a possibility
                concerns.append("Possible lit candles or decorative items requiring care")

            # Check for overcrowded table
            table_objs = [obj for obj in detected_objects if obj["class_id"] == 60]  # Dining table
            if table_objs:
                table_region = table_objs[0]["region"]
                items_on_table = 0

                for obj in detected_objects:
                    if obj["class_id"] in [39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55]:
                        if obj["region"] == table_region:
                            items_on_table += 1

                if items_on_table > 8:
                    concerns.append("Dining table has multiple items which should be handled with care")

        # Asian commercial street safety concerns
        elif scene_type == "asian_commercial_street":
            # Check for crowded walkways
            if 0 in detected_class_ids:  # Person
                person_count = detected_class_ids.count(0)
                if person_count > 3:
                    # Calculate person density (simplified)
                    person_positions = []
                    for obj in detected_objects:
                        if obj["class_id"] == 0:
                            person_positions.append(obj["normalized_center"])

                    if len(person_positions) >= 2:
                        # Calculate average distance between people
                        total_distance = 0
                        count = 0
                        for i in range(len(person_positions)):
                            for j in range(i+1, len(person_positions)):
                                p1 = person_positions[i]
                                p2 = person_positions[j]
                                distance = ((p2[0] - p1[0])**2 + (p2[1] - p1[1])**2)**0.5
                                total_distance += distance
                                count += 1

                        if count > 0:
                            avg_distance = total_distance / count
                            if avg_distance < 0.1:  # Close proximity
                                concerns.append("Crowded walkway with limited personal space")

            # Check for motorcycles/bicycles near pedestrians
            if (1 in detected_class_ids or 3 in detected_class_ids) and 0 in detected_class_ids:  # Bicycle/motorcycle and person
                concerns.append("Two-wheeled vehicles in pedestrian areas")

            # Check for potential trip hazards
            # We can't directly detect this, but can infer from context
            if scene_type == "asian_commercial_street" and "bottom" in " ".join([obj["region"] for obj in detected_objects if obj["class_id"] == 0]):
                # If people are in bottom regions, they might be walking on uneven surfaces
                concerns.append("Potential uneven walking surfaces in commercial area")

        # Financial district safety concerns
        elif scene_type == "financial_district":
            # Check for heavy traffic conditions
            vehicle_count = sum(1 for obj_id in detected_class_ids if obj_id in [2, 5, 7])  # Car, bus, truck
            if vehicle_count > 5:
                concerns.append("Heavy vehicle traffic in urban area")

            # Check for pedestrians crossing busy streets
            if 0 in detected_class_ids:  # Person
                person_count = detected_class_ids.count(0)
                vehicle_nearby = any(vehicle in detected_class_ids for vehicle in [2, 3, 5, 7])

                if person_count > 0 and vehicle_nearby:
                    concerns.append("Pedestrians navigating busy urban traffic")

            # Check for traffic signals
            if 9 in detected_class_ids:  # Traffic light
                concerns.append("Observe traffic signals when navigating this area")
            else:
                # If no traffic lights detected but it's a busy area, it's worth noting
                if vehicle_count > 3:
                    concerns.append("Busy traffic area potentially without visible traffic signals in view")

            # Time of day considerations
            # We don't have direct time data, but can infer from vehicle lights
            vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [2, 5, 7]]
            if vehicle_objs and any("lighting_conditions" in obj for obj in detected_objects):
                # If vehicles are present and it might be evening/night
                concerns.append("Reduced visibility conditions during evening commute")

        # Urban intersection safety concerns
        elif scene_type == "urban_intersection":
            # Check for pedestrians in crosswalks
            pedestrian_objs = [obj for obj in detected_objects if obj["class_id"] == 0]
            vehicle_objs = [obj for obj in detected_objects if obj["class_id"] in [2, 3, 5, 7]]

            if pedestrian_objs:
                # Calculate distribution of pedestrians to see if they're crossing
                pedestrian_positions = [obj["normalized_center"] for obj in pedestrian_objs]

                # Simplified check for pedestrians in crossing pattern
                if len(pedestrian_positions) >= 3:
                    # Check if pedestrians are distributed across different regions
                    pedestrian_regions = set(obj["region"] for obj in pedestrian_objs)
                    if len(pedestrian_regions) >= 2:
                        concerns.append("Multiple pedestrians crossing the intersection")

            # Check for traffic signal observation
            if 9 in detected_class_ids:  # Traffic light
                concerns.append("Observe traffic signals when crossing")

            # Check for busy intersection
            if len(vehicle_objs) > 3:
                concerns.append("Busy intersection with multiple vehicles")

            # Check for pedestrians potentially jay-walking
            if pedestrian_objs and not 9 in detected_class_ids:  # People but no traffic lights
                concerns.append("Pedestrians should use designated crosswalks")

            # Visibility concerns based on lighting
            # This would be better with actual lighting data
            pedestrian_count = len(pedestrian_objs)
            if pedestrian_count > 5:
                concerns.append("High pedestrian density at crossing points")

        # Transit hub safety concerns
        elif scene_type == "transit_hub":
            # These would be for transit areas like train stations or bus terminals
            if 0 in detected_class_ids:  # Person
                person_count = detected_class_ids.count(0)
                if person_count > 8:
                    concerns.append("Crowded transit area requiring careful navigation")

            # Check for luggage/bags that could be trip hazards
            if 24 in detected_class_ids or 28 in detected_class_ids:  # Backpack or suitcase
                concerns.append("Luggage and personal items may create obstacles")

            # Public transportation vehicles
            if any(vehicle in detected_class_ids for vehicle in [5, 6, 7]):  # Bus, train, truck
                concerns.append("Stay clear of arriving and departing transit vehicles")

        # Shopping district safety concerns
        elif scene_type == "shopping_district":
            # Check for crowded shopping areas
            if 0 in detected_class_ids:  # Person
                person_count = detected_class_ids.count(0)
                if person_count > 5:
                    concerns.append("Crowded shopping area with multiple people")

            # Check for shopping bags and personal items
            if 24 in detected_class_ids or 26 in detected_class_ids:  # Backpack or handbag
                concerns.append("Mind personal belongings in busy retail environment")

            # Check for store entrances/exits which might have automatic doors
            # We can't directly detect this, but can infer from context
            if scene_type == "shopping_district" and 0 in detected_class_ids:
                concerns.append("Be aware of store entrances and exits with potential automatic doors")

        return concerns