Reaper200 commited on
Commit
26de1f0
1 Parent(s): 2cf4d1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -94
app.py CHANGED
@@ -1,95 +1,73 @@
1
- import streamlit as st
2
- from PIL import Image
3
- import numpy as np
4
  import torch
5
- from transformers import DetrImageProcessor, DetrForObjectDetection
6
- from gtts import gTTS
7
-
8
- # Load the model and processor
9
- @st.cache_resource # Cache the model to improve performance
10
- def load_model():
11
- processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
12
- model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
13
- return processor, model
14
-
15
- # Function to detect objects in the image
16
- def detect_objects(image, processor, model):
17
- # Preprocess the image and make predictions
18
- inputs = processor(images=image, return_tensors="pt")
19
- with torch.no_grad():
20
- outputs = model(**inputs)
21
-
22
- # Process the outputs
23
- target_sizes = torch.tensor([image.size[::-1]]) # Convert to (height, width)
24
- results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
25
-
26
- detected_objects = []
27
- for score, label in zip(results["scores"], results["labels"]):
28
- if score > 0.9: # Confidence threshold
29
- detected_objects.append(label.item())
30
-
31
- return detected_objects, results
32
-
33
- # Function to convert label IDs to class names
34
- def get_object_names(class_ids):
35
- # Sample mapping (extend this according to your model's output labels)
36
- COCO_INSTANCE_CATEGORY_NAMES = [
37
- "N/A", "person", "bicycle", "car", "motorcycle", "airplane",
38
- "bus", "train", "truck", "boat", "traffic light", "fire hydrant",
39
- "stop sign", "parking meter", "bench", "bird", "cat", "dog",
40
- "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
41
- "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
42
- "skis", "snowboard", "sports ball", "kite", "baseball bat",
43
- "baseball glove", "skateboard", "surfboard", "tennis racket",
44
- "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
45
- "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
46
- "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant",
47
- "bed", "dining table", "toilet", "TV", "laptop", "mouse", "remote",
48
- "keyboard", "cell phone", "microwave", "oven", "toaster", "sink",
49
- "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
50
- "hair drier", "toothbrush"
51
- ]
52
- return [COCO_INSTANCE_CATEGORY_NAMES[id] for id in class_ids]
53
-
54
- # Mock summarization function
55
- def generate_summary(relevant_objects):
56
- st.write("Generating summary for relevant objects...")
57
- summary = f"This is an {len(relevant_objects)}-item scene including: {', '.join(relevant_objects)}."
58
- return summary
59
-
60
- # Mock text-to-speech function
61
- def text_to_speech(text):
62
- st.write("Converting summary to speech...")
63
- tts = gTTS(text)
64
- tts.save("summary.mp3")
65
- st.audio("summary.mp3")
66
-
67
- # Streamlit app main function
68
- def main():
69
- st.title("Context-Aware Object Detection App with Hugging Face")
70
-
71
- # Load model
72
- processor, model = load_model()
73
-
74
- # Step 1: Capture Image from Camera
75
- captured_image = st.camera_input("Take a picture")
76
-
77
- if captured_image is not None:
78
- # Open the captured image
79
- image = Image.open(captured_image)
80
- st.image(image, caption="Captured Image", use_column_width=True)
81
-
82
- # Step 2: Detect Objects
83
- detected_ids, results = detect_objects(image, processor, model)
84
- detected_objects = get_object_names(detected_ids)
85
- st.write(f"Detected Objects: {detected_objects}")
86
-
87
- # Step 3: Generate Summary
88
- summary = generate_summary(detected_objects)
89
- st.write(f"Summary: {summary}")
90
-
91
- # Step 4: Convert Summary to Speech
92
- text_to_speech(summary)
93
-
94
- if __name__ == "__main__":
95
- main()
 
 
 
 
1
  import torch
2
+ import cv2
3
+ import pyttsx3
4
+ import random
5
+
6
+ # Download model from GitHub
7
+ model = torch.hub.load('ultralytics/yolov5', 'yolov5n')
8
+
9
+ # Initialize video capture
10
+ cap = cv2.VideoCapture('cars.mp4')
11
+
12
+ # Initialize text-to-speech engine
13
+ engine = pyttsx3.init()
14
+
15
+ # Simulated GPS location (latitude, longitude)
16
+ gps_location = (37.7749, -122.4194) # Example coordinates for San Francisco
17
+
18
+ # Function to speak the detected object
19
+ def speak(text):
20
+ engine.say(text)
21
+ engine.runAndWait()
22
+
23
+ while True:
24
+ ret, img = cap.read()
25
+ if not ret:
26
+ break
27
+
28
+ # Perform detection on the image
29
+ result = model(img)
30
+ print('result: ', result)
31
+
32
+ # Convert detected result to pandas DataFrame
33
+ data_frame = result.pandas().xyxy[0]
34
+ print('data_frame:')
35
+ print(data_frame)
36
+
37
+ # Get indexes of all the rows
38
+ indexes = data_frame.index
39
+ for index in indexes:
40
+ # Find the coordinate of top left corner of bounding box
41
+ x1 = int(data_frame['xmin'][index])
42
+ y1 = int(data_frame['ymin'][index])
43
+ # Find the coordinate of bottom right corner of bounding box
44
+ x2 = int(data_frame['xmax'][index])
45
+ y2 = int(data_frame['ymax'][index])
46
+
47
+ # Find label name and confidence score
48
+ label = data_frame['name'][index]
49
+ conf = data_frame['confidence'][index]
50
+ text = f"{label} {conf:.2f}"
51
+
52
+ # Draw bounding box and label on the image
53
+ cv2.rectangle(img, (x1, y1), (x2, y2), (255, 255, 0), 2)
54
+ cv2.putText(img, text, (x1, y1 - 5), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 0), 2)
55
+
56
+ # Context-aware actions based on detected objects
57
+ if label == "car" and conf > 0.5:
58
+ # Announce detected car and GPS location
59
+ speak(f"Car detected at GPS location: {gps_location[0]}, {gps_location[1]}")
60
+ # Here you can add more context-based features (e.g., alerting, saving data, etc.)
61
+
62
+ # Display GPS coordinates on the image
63
+ gps_text = f"GPS: {gps_location[0]:.4f}, {gps_location[1]:.4f}"
64
+ cv2.putText(img, gps_text, (10, 30), cv2.FONT_HERSHEY_PLAIN, 1, (0, 255, 0), 2)
65
+
66
+ # Show the processed image
67
+ cv2.imshow('IMAGE', img)
68
+ if cv2.waitKey(1) & 0xFF == ord('q'):
69
+ break
70
+
71
+ # Release resources
72
+ cap.release()
73
+ cv2.destroyAllWindows()