initial commit
Browse files- .gitignore +3 -0
- README.md +5 -210
- app.py +44 -91
- object_detection.py +0 -420
- requirements.txt +2 -2
- sentiment.py +0 -141
.gitignore
CHANGED
@@ -117,6 +117,9 @@ dmypy.json
|
|
117 |
# MacOS specific
|
118 |
.DS_Store
|
119 |
|
|
|
|
|
|
|
120 |
# Keep empty models dir
|
121 |
models/*
|
122 |
!models/.gitkeep
|
|
|
117 |
# MacOS specific
|
118 |
.DS_Store
|
119 |
|
120 |
+
# Specific model files
|
121 |
+
yolov8n.pt
|
122 |
+
|
123 |
# Keep empty models dir
|
124 |
models/*
|
125 |
!models/.gitkeep
|
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: 🦀
|
4 |
colorFrom: indigo
|
5 |
colorTo: blue
|
@@ -10,218 +10,13 @@ pinned: false
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
-
#
|
14 |
|
15 |
-
This Streamlit application
|
16 |
-
|
17 |
-
## How to Use
|
18 |
-
|
19 |
-
1. Clone the repository.
|
20 |
-
2. Ensure you have the necessary packages installed: `pip install -r requirements.txt`
|
21 |
-
3. Run the application: `streamlit run app.py`
|
22 |
-
|
23 |
-
## Create Your Own Analysis Space
|
24 |
-
|
25 |
-
Follow these steps to set up and modify the application for your own image analysis:
|
26 |
-
|
27 |
-
### Step 1: Clone the Repository
|
28 |
-
|
29 |
-
First, you need to clone the repository to your local machine. Open your terminal or command prompt and run:
|
30 |
-
|
31 |
-
```sh
|
32 |
-
git clone https://huggingface.co/spaces/eusholli/computer-vision-playground
|
33 |
-
cd computer-vision-playground
|
34 |
-
```
|
35 |
-
|
36 |
-
|
37 |
-
### Step 2: Install Dependencies
|
38 |
-
|
39 |
-
Make sure you have Python installed on your machine. You can download it from [python.org](https://www.python.org/).
|
40 |
-
|
41 |
-
Next, install the required packages. In the terminal, navigate to the cloned repository directory and run:
|
42 |
-
|
43 |
-
```sh
|
44 |
-
pip install -r requirements.txt
|
45 |
-
```
|
46 |
-
|
47 |
-
This will install all the necessary libraries specified in the \`requirements.txt\` file.
|
48 |
-
|
49 |
-
### Step 3: Run the Application
|
50 |
-
|
51 |
-
To start the Streamlit application, run:
|
52 |
-
|
53 |
-
```sh
|
54 |
-
streamlit run app.py
|
55 |
-
```
|
56 |
-
|
57 |
-
This will open a new tab in your default web browser with the Streamlit interface.
|
58 |
-
|
59 |
-
## Using the Application
|
60 |
-
|
61 |
-
#### Webcam Stream
|
62 |
-
|
63 |
-
- Allow access to your webcam when prompted.
|
64 |
-
- You will see the live stream from your webcam in the "Input Stream" section.
|
65 |
-
- The application will analyze the video frames in real-time and display the sentiment results in the "Analysis" section.
|
66 |
-
|
67 |
-
#### Uploading Images
|
68 |
-
|
69 |
-
- In the "Input Stream" section, under "Upload an Image", click on the "Choose an image..." button.
|
70 |
-
- Select an image file (jpg, jpeg, png) from your computer.
|
71 |
-
- The application will analyze the uploaded image and display the sentiment results.
|
72 |
-
|
73 |
-
#### Image URL
|
74 |
-
|
75 |
-
- In the "Input Stream" section, under "Or Enter Image URL", paste an image URL and press Enter.
|
76 |
-
- The application will download and analyze the image from the provided URL and display the sentiment results.
|
77 |
-
|
78 |
-
#### Uploading Videos
|
79 |
-
|
80 |
-
- In the "Input Stream" section, under "Upload a Video", click on the "Choose a video..." button.
|
81 |
-
- Select a video file (mp4, avi, mov, mkv) from your computer.
|
82 |
-
- The application will analyze the video frames and display the sentiment results.
|
83 |
-
|
84 |
-
#### Video URL
|
85 |
-
|
86 |
-
- In the "Input Stream" section, under "Or Enter Video Download URL", paste a video URL and press Enter.
|
87 |
-
- The application will download and analyze the video from the provided URL and display the sentiment results.
|
88 |
-
|
89 |
-
## Customize the Analysis
|
90 |
-
|
91 |
-
You can customize the analysis function to perform your own image analysis. The default function \`analyze_frame\` performs facial sentiment analysis. To use your own analysis:
|
92 |
-
|
93 |
-
1. Replace the contents of the \`analyze_frame\` function in \`app.py\` with your custom analysis code.
|
94 |
-
2. Update any necessary imports at the top of the \`app.py\` file.
|
95 |
-
3. Adjust the \`ANALYSIS_TITLE\` variable to reflect your custom analysis.
|
96 |
-
|
97 |
-
Example:
|
98 |
-
|
99 |
-
```python
|
100 |
-
ANALYSIS_TITLE = "Custom Analysis"
|
101 |
-
|
102 |
-
def analyze_frame(frame: np.ndarray):
|
103 |
-
# Your custom analysis code here
|
104 |
-
...
|
105 |
-
```
|
106 |
-
|
107 |
-
### Troubleshooting
|
108 |
-
|
109 |
-
If you encounter any issues:
|
110 |
-
|
111 |
-
- Ensure all dependencies are correctly installed.
|
112 |
-
- Check that your webcam is working and accessible.
|
113 |
-
- Verify the URLs you provide are correct and accessible.
|
114 |
-
|
115 |
-
For more detailed information, refer to the comments in the \`app.py\` file.
|
116 |
-
|
117 |
-
|
118 |
-
### Debugging using Vscode
|
119 |
-
|
120 |
-
If you are using Vscode as your IDE you can use the following launch.json file to debug the current file (e.g. app.py) in your editor.
|
121 |
-
|
122 |
-
```json
|
123 |
-
{
|
124 |
-
"version": "0.2.0",
|
125 |
-
"configurations": [
|
126 |
-
{
|
127 |
-
"name": "Python:Streamlit",
|
128 |
-
"type": "debugpy",
|
129 |
-
"request": "launch",
|
130 |
-
"module": "streamlit",
|
131 |
-
"args": [
|
132 |
-
"run",
|
133 |
-
"${file}",
|
134 |
-
"--server.port",
|
135 |
-
"2000"
|
136 |
-
]
|
137 |
-
}
|
138 |
-
]
|
139 |
-
}
|
140 |
-
```
|
141 |
-
|
142 |
-
|
143 |
-
# How to Create a New Huggingface Space and Push Code to It
|
144 |
-
|
145 |
-
## Step 1: Create a New Huggingface Space
|
146 |
-
1. Log in to your [Huggingface](https://huggingface.co/) account.
|
147 |
-
2. Go to the [Spaces](https://huggingface.co/spaces) section.
|
148 |
-
3. Click on the **Create new Space** button.
|
149 |
-
4. Fill in the details for your new Space:
|
150 |
-
- **Space name**: Choose a unique name for your Space.
|
151 |
-
- **Owner**: Ensure your username is selected.
|
152 |
-
- **Visibility**: Choose between Public or Private based on your preference.
|
153 |
-
- **SDK**: Select the SDK you will use (in this case`streamlit`).
|
154 |
-
5. Click on the **Create Space** button to create your new Space.
|
155 |
-
|
156 |
-
## Step 2: Change the Local Git Remote Repo Reference
|
157 |
-
1. Open your terminal or command prompt.
|
158 |
-
2. Navigate to your local project directory:
|
159 |
-
```bash
|
160 |
-
cd /path/to/your/project
|
161 |
-
```
|
162 |
-
3. Remove the existing remote reference (if any):
|
163 |
-
```bash
|
164 |
-
git remote remove origin
|
165 |
-
```
|
166 |
-
4. Add the new remote reference pointing to your newly created Huggingface Space. Replace `<your-username>` and `<your-space-name>` with your actual Huggingface username and Space name:
|
167 |
-
```bash
|
168 |
-
git remote add origin https://huggingface.co/spaces/<your-username>/<your-space-name>.git
|
169 |
-
```
|
170 |
-
|
171 |
-
## Step 3: Add, Commit, and Push the Code to the New Space
|
172 |
-
1. Stage all the changes in your local project directory:
|
173 |
-
```bash
|
174 |
-
git add .
|
175 |
-
```
|
176 |
-
2. Commit the changes with a meaningful commit message:
|
177 |
-
```bash
|
178 |
-
git commit -m "Initial commit to Huggingface Space"
|
179 |
-
```
|
180 |
-
3. Push the changes to the new Huggingface Space:
|
181 |
-
```bash
|
182 |
-
git push origin main
|
183 |
-
```
|
184 |
-
|
185 |
-
> **Note**: If your default branch is not `main`, replace `main` with the appropriate branch name in the push command.
|
186 |
-
|
187 |
-
## Conclusion
|
188 |
-
You have now successfully created a new Huggingface Space, updated your local Git remote reference, and pushed your code to the new Space. You can verify that your code has been uploaded by visiting your Huggingface Space's URL.
|
189 |
-
|
190 |
-
## Webcam STUN/TURN Server
|
191 |
-
|
192 |
-
When running remotely on Huggingface, the code needs to access your remote webcam. It does this using the [streamlit-webrtc](https://github.com/whitphx/streamlit-webrtc) module but requires a Twilio account to be established and the credentials uploaded to the Huggingface space.
|
193 |
-
|
194 |
-
### How to Create a Free Twilio Account and Add Credentials to Huggingface Space Settings
|
195 |
-
|
196 |
-
#### Step 1: Create a Free Twilio Account
|
197 |
-
1. Go to the [Twilio Sign-Up Page](https://www.twilio.com/try-twilio).
|
198 |
-
2. Fill in your details to create a new account.
|
199 |
-
3. Verify your email address and phone number.
|
200 |
-
4. After verification, log in to your Twilio dashboard.
|
201 |
-
|
202 |
-
#### Step 2: Obtain `TWILIO_ACCOUNT_SID` and `TWILIO_AUTH_TOKEN`
|
203 |
-
1. In the Twilio dashboard, navigate to the **Console**.
|
204 |
-
2. Look for the **Account Info** section on the dashboard.
|
205 |
-
3. Here, you will find your `Account SID` (referred to as `TWILIO_ACCOUNT_SID`).
|
206 |
-
4. To obtain your `Auth Token` (referred to as `TWILIO_AUTH_TOKEN`), click on the **Show** button next to the `Auth Token`.
|
207 |
-
|
208 |
-
#### Step 3: Add Twilio Credentials to Huggingface Space Settings
|
209 |
-
1. Log in to your [Huggingface](https://huggingface.co/) account.
|
210 |
-
2. Navigate to your Huggingface Space where you need to add the credentials.
|
211 |
-
3. Go to the **Settings** of your Space.
|
212 |
-
4. In the **Variables and secrets** section:
|
213 |
-
- Click on the **New variable** button to add `TWILIO_ACCOUNT_SID`:
|
214 |
-
- Name: `TWILIO_ACCOUNT_SID`
|
215 |
-
- Value: Copy your `Account SID` from the Twilio dashboard and paste it here.
|
216 |
-
- Click on the **New secret** button to add `TWILIO_AUTH_TOKEN`:
|
217 |
-
- Name: `TWILIO_AUTH_TOKEN`
|
218 |
-
- Value: Copy your `Auth Token` from the Twilio dashboard and paste it here.
|
219 |
-
|
220 |
-
5. Save the changes.
|
221 |
-
|
222 |
-
You have now successfully added your Twilio credentials to the Huggingface Space settings. Your application should now be able to access and use the Twilio API for WebRTC functionality.
|
223 |
|
|
|
224 |
|
|
|
225 |
|
226 |
|
227 |
### Contributing
|
|
|
1 |
---
|
2 |
+
title: YOLO v8 Playground
|
3 |
emoji: 🦀
|
4 |
colorFrom: indigo
|
5 |
colorTo: blue
|
|
|
10 |
license: mit
|
11 |
---
|
12 |
|
13 |
+
# Yolo v8 Playground
|
14 |
|
15 |
+
This Streamlit application uses the Computer Vision Playground as its base and has modified the analyze_frame function, replacing the example face/sentiment detection with object detection.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
To learn how to do the same yourself and start playing with computer vision models read [here](https://huggingface.co/spaces/eusholli/computer-vision-playground).
|
18 |
|
19 |
+
A massive thanks to [Ultralytics](https://docs.ultralytics.com/modes/track/#python-examples)
|
20 |
|
21 |
|
22 |
### Contributing
|
app.py
CHANGED
@@ -1,11 +1,11 @@
|
|
|
|
1 |
import torch
|
2 |
import tensorflow as tf
|
3 |
import time
|
4 |
import os
|
5 |
import logging
|
6 |
-
import queue
|
7 |
from pathlib import Path
|
8 |
-
from typing import List
|
9 |
|
10 |
import av
|
11 |
import cv2
|
@@ -24,86 +24,56 @@ import requests
|
|
24 |
from io import BytesIO # Import for handling byte streams
|
25 |
|
26 |
|
27 |
-
# Named tuple to store detection results
|
28 |
-
class Detection(NamedTuple):
|
29 |
-
class_id: int
|
30 |
-
label: str
|
31 |
-
score: float
|
32 |
-
box: np.ndarray
|
33 |
-
|
34 |
-
|
35 |
-
# Queue to store detection results
|
36 |
-
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
37 |
-
|
38 |
# CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
39 |
# Update below string to set display title of analysis
|
40 |
|
41 |
-
#
|
42 |
|
43 |
-
|
44 |
-
mtcnn = MTCNN()
|
45 |
|
46 |
-
# Initialize the Hugging Face pipeline for facial emotion detection
|
47 |
-
emotion_pipeline = pipeline("image-classification",
|
48 |
-
model="trpakov/vit-face-expression")
|
49 |
|
|
|
|
|
50 |
|
51 |
-
# Default title - "Facial Sentiment Analysis"
|
52 |
-
|
53 |
-
ANALYSIS_TITLE = "Facial Sentiment Analysis"
|
54 |
|
55 |
# CHANGE THE CONTENTS OF THIS FUNCTION, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
56 |
#
|
57 |
-
#
|
58 |
-
# Function to analyze an input frame and generate an analyzed frame
|
59 |
-
# This function takes an input video frame, detects faces in it using MTCNN,
|
60 |
-
# then for each detected face, it analyzes the sentiment (emotion) using the analyze_sentiment function,
|
61 |
-
# draws a rectangle around the face, and overlays the detected emotion on the frame.
|
62 |
-
# It also records the time taken to process the frame and stores it in a global container.
|
63 |
-
# Constants for text and line size in the output image
|
64 |
-
TEXT_SIZE = 1
|
65 |
-
LINE_SIZE = 2
|
66 |
|
67 |
|
68 |
# Set analysis results in img_container and result queue for display
|
69 |
# img_container["input"] - holds the input frame contents - of type np.ndarray
|
70 |
# img_container["analyzed"] - holds the analyzed frame with any added annotations - of type np.ndarray
|
71 |
# img_container["analysis_time"] - holds how long the analysis has taken in miliseconds
|
72 |
-
# result_queue - holds the analysis metadata results - of type
|
73 |
def analyze_frame(frame: np.ndarray):
|
74 |
start_time = time.time() # Start timing the analysis
|
75 |
img_container["input"] = frame # Store the input frame
|
76 |
frame = frame.copy() # Create a copy of the frame to modify
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
cv2.FONT_HERSHEY_SIMPLEX,
|
103 |
-
TEXT_SIZE,
|
104 |
-
(255, 255, 255),
|
105 |
-
2,
|
106 |
-
)
|
107 |
|
108 |
end_time = time.time() # End timing the analysis
|
109 |
execution_time_ms = round(
|
@@ -112,27 +82,13 @@ def analyze_frame(frame: np.ndarray):
|
|
112 |
# Store the execution time
|
113 |
img_container["analysis_time"] = execution_time_ms
|
114 |
|
115 |
-
|
|
|
116 |
img_container["analyzed"] = frame # Store the analyzed frame
|
117 |
|
118 |
return # End of the function
|
119 |
|
120 |
|
121 |
-
# Function to analyze the sentiment (emotion) of a detected face
|
122 |
-
# This function converts the face from BGR to RGB format, then converts it to a PIL image,
|
123 |
-
# uses a pre-trained emotion detection model to get emotion predictions,
|
124 |
-
# and finally returns the most dominant emotion detected.
|
125 |
-
def analyze_sentiment(face):
|
126 |
-
# Convert face to RGB format
|
127 |
-
rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
|
128 |
-
pil_image = Image.fromarray(rgb_face) # Convert to PIL image
|
129 |
-
results = emotion_pipeline(pil_image) # Run emotion detection on the image
|
130 |
-
dominant_emotion = max(results, key=lambda x: x["score"])[
|
131 |
-
"label"
|
132 |
-
] # Get the dominant emotion
|
133 |
-
return dominant_emotion # Return the detected emotion
|
134 |
-
|
135 |
-
|
136 |
#
|
137 |
#
|
138 |
# DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
|
@@ -157,7 +113,8 @@ logging.getLogger("torch").setLevel(logging.ERROR)
|
|
157 |
logging.getLogger("streamlit").setLevel(logging.ERROR)
|
158 |
|
159 |
# Container to hold image data and analysis results
|
160 |
-
img_container = {"input": None, "analyzed": None,
|
|
|
161 |
|
162 |
# Logger for debugging and information
|
163 |
logger = logging.getLogger(__name__)
|
@@ -294,12 +251,6 @@ def analysis_init():
|
|
294 |
# This function retrieves the latest frames and results from the global container and result queue,
|
295 |
# and updates the placeholders in the Streamlit UI with the current input frame, analyzed frame, analysis time, and detected labels.
|
296 |
def publish_frame():
|
297 |
-
if not result_queue.empty():
|
298 |
-
result = result_queue.get()
|
299 |
-
if show_labels:
|
300 |
-
labels_placeholder.table(
|
301 |
-
result
|
302 |
-
) # Display labels if the checkbox is checked
|
303 |
|
304 |
img = img_container["input"]
|
305 |
if img is None:
|
@@ -318,6 +269,15 @@ def publish_frame():
|
|
318 |
# Display the analysis time
|
319 |
analysis_time.text(f"Analysis Time: {time} ms")
|
320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
|
322 |
# If the WebRTC streamer is playing, initialize and publish frames
|
323 |
if webrtc_ctx.state.playing:
|
@@ -361,13 +321,6 @@ def process_video(video_path):
|
|
361 |
) # Analyze the frame for face detection and sentiment analysis
|
362 |
publish_frame() # Publish the results
|
363 |
|
364 |
-
if not result_queue.empty():
|
365 |
-
result = result_queue.get()
|
366 |
-
if show_labels:
|
367 |
-
labels_placeholder.table(
|
368 |
-
result
|
369 |
-
) # Display labels if the checkbox is checked
|
370 |
-
|
371 |
cap.release() # Release the video capture object
|
372 |
|
373 |
|
|
|
1 |
+
from ultralytics import YOLO
|
2 |
import torch
|
3 |
import tensorflow as tf
|
4 |
import time
|
5 |
import os
|
6 |
import logging
|
|
|
7 |
from pathlib import Path
|
8 |
+
from typing import List
|
9 |
|
10 |
import av
|
11 |
import cv2
|
|
|
24 |
from io import BytesIO # Import for handling byte streams
|
25 |
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
28 |
# Update below string to set display title of analysis
|
29 |
|
30 |
+
# Default title - "Facial Sentiment Analysis"
|
31 |
|
32 |
+
ANALYSIS_TITLE = "YOLO-8 Object Detection Analysis"
|
|
|
33 |
|
|
|
|
|
|
|
34 |
|
35 |
+
# Load the YOLOv8 model
|
36 |
+
model = YOLO("yolov8n.pt")
|
37 |
|
|
|
|
|
|
|
38 |
|
39 |
# CHANGE THE CONTENTS OF THIS FUNCTION, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
40 |
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
|
43 |
# Set analysis results in img_container and result queue for display
|
44 |
# img_container["input"] - holds the input frame contents - of type np.ndarray
|
45 |
# img_container["analyzed"] - holds the analyzed frame with any added annotations - of type np.ndarray
|
46 |
# img_container["analysis_time"] - holds how long the analysis has taken in miliseconds
|
47 |
+
# result_queue - holds the analysis metadata results - of type dictionary
|
48 |
def analyze_frame(frame: np.ndarray):
|
49 |
start_time = time.time() # Start timing the analysis
|
50 |
img_container["input"] = frame # Store the input frame
|
51 |
frame = frame.copy() # Create a copy of the frame to modify
|
52 |
|
53 |
+
# Run YOLOv8 tracking on the frame, persisting tracks between frames
|
54 |
+
results = model.track(frame, persist=True)
|
55 |
+
|
56 |
+
# Initialize a list to store Detection objects
|
57 |
+
detections = []
|
58 |
+
object_counter = 1
|
59 |
+
|
60 |
+
# Iterate over the detected boxes
|
61 |
+
for box in results[0].boxes:
|
62 |
+
detection = {}
|
63 |
+
# Extract class id, label, score, and bounding box coordinates
|
64 |
+
class_id = int(box.cls)
|
65 |
+
|
66 |
+
detection["id"] = object_counter
|
67 |
+
detection["label"] = model.names[class_id]
|
68 |
+
detection["score"] = float(box.conf)
|
69 |
+
detection["box_coords"] = [round(value.item(), 2)
|
70 |
+
for value in box.xyxy.flatten()]
|
71 |
+
|
72 |
+
detections.append(detection)
|
73 |
+
object_counter += 1
|
74 |
+
|
75 |
+
# Visualize the results on the frame
|
76 |
+
frame = results[0].plot()
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
end_time = time.time() # End timing the analysis
|
79 |
execution_time_ms = round(
|
|
|
82 |
# Store the execution time
|
83 |
img_container["analysis_time"] = execution_time_ms
|
84 |
|
85 |
+
# store the detections
|
86 |
+
img_container["detections"] = detections
|
87 |
img_container["analyzed"] = frame # Store the analyzed frame
|
88 |
|
89 |
return # End of the function
|
90 |
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
#
|
93 |
#
|
94 |
# DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
|
|
|
113 |
logging.getLogger("streamlit").setLevel(logging.ERROR)
|
114 |
|
115 |
# Container to hold image data and analysis results
|
116 |
+
img_container = {"input": None, "analyzed": None,
|
117 |
+
"analysis_time": None, "detections": None}
|
118 |
|
119 |
# Logger for debugging and information
|
120 |
logger = logging.getLogger(__name__)
|
|
|
251 |
# This function retrieves the latest frames and results from the global container and result queue,
|
252 |
# and updates the placeholders in the Streamlit UI with the current input frame, analyzed frame, analysis time, and detected labels.
|
253 |
def publish_frame():
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
img = img_container["input"]
|
256 |
if img is None:
|
|
|
269 |
# Display the analysis time
|
270 |
analysis_time.text(f"Analysis Time: {time} ms")
|
271 |
|
272 |
+
detections = img_container["detections"]
|
273 |
+
if detections is None:
|
274 |
+
return
|
275 |
+
|
276 |
+
if show_labels:
|
277 |
+
labels_placeholder.table(
|
278 |
+
detections
|
279 |
+
) # Display labels if the checkbox is checked
|
280 |
+
|
281 |
|
282 |
# If the WebRTC streamer is playing, initialize and publish frames
|
283 |
if webrtc_ctx.state.playing:
|
|
|
321 |
) # Analyze the frame for face detection and sentiment analysis
|
322 |
publish_frame() # Publish the results
|
323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
324 |
cap.release() # Release the video capture object
|
325 |
|
326 |
|
object_detection.py
DELETED
@@ -1,420 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import tensorflow as tf
|
3 |
-
import time
|
4 |
-
import os
|
5 |
-
import logging
|
6 |
-
import queue
|
7 |
-
from pathlib import Path
|
8 |
-
from typing import List, NamedTuple
|
9 |
-
|
10 |
-
import av
|
11 |
-
import cv2
|
12 |
-
import numpy as np
|
13 |
-
import streamlit as st
|
14 |
-
from streamlit_webrtc import WebRtcMode, webrtc_streamer
|
15 |
-
|
16 |
-
from utils.download import download_file
|
17 |
-
from utils.turn import get_ice_servers
|
18 |
-
|
19 |
-
from PIL import Image, ImageDraw # Import PIL for image processing
|
20 |
-
from transformers import pipeline # Import Hugging Face transformers pipeline
|
21 |
-
|
22 |
-
import requests
|
23 |
-
from io import BytesIO # Import for handling byte streams
|
24 |
-
|
25 |
-
|
26 |
-
# Named tuple to store detection results
|
27 |
-
class Detection(NamedTuple):
|
28 |
-
class_id: int
|
29 |
-
label: str
|
30 |
-
score: float
|
31 |
-
box: np.ndarray
|
32 |
-
|
33 |
-
|
34 |
-
# Queue to store detection results
|
35 |
-
result_queue: "queue.Queue[List[Detection]]" = queue.Queue()
|
36 |
-
|
37 |
-
# CHANGE CODE BELOW HERE, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
38 |
-
# Update below string to set display title of analysis
|
39 |
-
|
40 |
-
# Appropriate imports needed for analysis
|
41 |
-
|
42 |
-
MODEL_URL = "https://github.com/robmarkcole/object-detection-app/raw/master/model/MobileNetSSD_deploy.caffemodel"
|
43 |
-
MODEL_LOCAL_PATH = Path("./models/MobileNetSSD_deploy.caffemodel")
|
44 |
-
PROTOTXT_URL = "https://github.com/robmarkcole/object-detection-app/raw/master/model/MobileNetSSD_deploy.prototxt.txt"
|
45 |
-
PROTOTXT_LOCAL_PATH = Path("./models/MobileNetSSD_deploy.prototxt.txt")
|
46 |
-
|
47 |
-
CLASSES = [
|
48 |
-
"background",
|
49 |
-
"aeroplane",
|
50 |
-
"bicycle",
|
51 |
-
"bird",
|
52 |
-
"boat",
|
53 |
-
"bottle",
|
54 |
-
"bus",
|
55 |
-
"car",
|
56 |
-
"cat",
|
57 |
-
"chair",
|
58 |
-
"cow",
|
59 |
-
"diningtable",
|
60 |
-
"dog",
|
61 |
-
"horse",
|
62 |
-
"motorbike",
|
63 |
-
"person",
|
64 |
-
"pottedplant",
|
65 |
-
"sheep",
|
66 |
-
"sofa",
|
67 |
-
"train",
|
68 |
-
"tvmonitor",
|
69 |
-
]
|
70 |
-
|
71 |
-
# Generate random colors for each class label
|
72 |
-
|
73 |
-
|
74 |
-
def generate_label_colors():
|
75 |
-
return np.random.uniform(0, 255, size=(len(CLASSES), 3))
|
76 |
-
|
77 |
-
|
78 |
-
COLORS = generate_label_colors()
|
79 |
-
|
80 |
-
# Download model and prototxt files
|
81 |
-
|
82 |
-
|
83 |
-
def download_file(url, local_path, expected_size=None):
|
84 |
-
if not local_path.exists() or (expected_size and local_path.stat().st_size != expected_size):
|
85 |
-
import requests
|
86 |
-
with open(local_path, "wb") as f:
|
87 |
-
response = requests.get(url)
|
88 |
-
f.write(response.content)
|
89 |
-
|
90 |
-
|
91 |
-
download_file(MODEL_URL, MODEL_LOCAL_PATH, expected_size=23147564)
|
92 |
-
download_file(PROTOTXT_URL, PROTOTXT_LOCAL_PATH, expected_size=29353)
|
93 |
-
|
94 |
-
# Load the model
|
95 |
-
net = cv2.dnn.readNetFromCaffe(str(PROTOTXT_LOCAL_PATH), str(MODEL_LOCAL_PATH))
|
96 |
-
|
97 |
-
|
98 |
-
# Default title - "Facial Sentiment Analysis"
|
99 |
-
|
100 |
-
ANALYSIS_TITLE = "Object Detection Analysis"
|
101 |
-
|
102 |
-
# CHANGE THE CONTENTS OF THIS FUNCTION, USE TO REPLACE WITH YOUR WANTED ANALYSIS.
|
103 |
-
#
|
104 |
-
|
105 |
-
# Set analysis results in img_container and result queue for display
|
106 |
-
# img_container["input"] - holds the input frame contents - of type np.ndarray
|
107 |
-
# img_container["analyzed"] - holds the analyzed frame with any added annotations - of type np.ndarray
|
108 |
-
# img_container["analysis_time"] - holds how long the analysis has taken in miliseconds
|
109 |
-
# result_queue - holds the analysis metadata results - of type queue.Queue[List[Detection]]
|
110 |
-
|
111 |
-
|
112 |
-
def analyze_frame(frame: np.ndarray):
|
113 |
-
start_time = time.time() # Start timing the analysis
|
114 |
-
img_container["input"] = frame # Store the input frame
|
115 |
-
frame = frame.copy() # Create a copy of the frame to modify
|
116 |
-
|
117 |
-
# Run inference
|
118 |
-
blob = cv2.dnn.blobFromImage(
|
119 |
-
cv2.resize(frame, (300, 300)), 0.007843, (300, 300), 127.5
|
120 |
-
)
|
121 |
-
net.setInput(blob)
|
122 |
-
output = net.forward()
|
123 |
-
|
124 |
-
h, w = frame.shape[:2]
|
125 |
-
|
126 |
-
# Filter the detections based on the score threshold
|
127 |
-
score_threshold = 0.5 # You can adjust the score threshold as needed
|
128 |
-
output = output.squeeze() # (1, 1, N, 7) -> (N, 7)
|
129 |
-
output = output[output[:, 2] >= score_threshold]
|
130 |
-
detections = [
|
131 |
-
Detection(
|
132 |
-
class_id=int(detection[1]),
|
133 |
-
label=CLASSES[int(detection[1])],
|
134 |
-
score=float(detection[2]),
|
135 |
-
box=(detection[3:7] * np.array([w, h, w, h])),
|
136 |
-
)
|
137 |
-
for detection in output
|
138 |
-
]
|
139 |
-
|
140 |
-
# Render bounding boxes and captions
|
141 |
-
for detection in detections:
|
142 |
-
caption = f"{detection.label}: {round(detection.score * 100, 2)}%"
|
143 |
-
color = COLORS[detection.class_id]
|
144 |
-
xmin, ymin, xmax, ymax = detection.box.astype("int")
|
145 |
-
|
146 |
-
cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color, 2)
|
147 |
-
cv2.putText(
|
148 |
-
frame,
|
149 |
-
caption,
|
150 |
-
(xmin, ymin - 15 if ymin - 15 > 15 else ymin + 15),
|
151 |
-
cv2.FONT_HERSHEY_SIMPLEX,
|
152 |
-
0.5,
|
153 |
-
color,
|
154 |
-
2,
|
155 |
-
)
|
156 |
-
|
157 |
-
end_time = time.time() # End timing the analysis
|
158 |
-
# Calculate execution time in milliseconds
|
159 |
-
execution_time_ms = round((end_time - start_time) * 1000, 2)
|
160 |
-
# Store the execution time
|
161 |
-
img_container["analysis_time"] = execution_time_ms
|
162 |
-
|
163 |
-
result_queue.put(detections) # Put the results in the result queue
|
164 |
-
img_container["analyzed"] = frame # Store the analyzed frame
|
165 |
-
|
166 |
-
return # End of the function
|
167 |
-
|
168 |
-
#
|
169 |
-
#
|
170 |
-
# DO NOT TOUCH THE BELOW CODE (NOT NEEDED)
|
171 |
-
#
|
172 |
-
#
|
173 |
-
|
174 |
-
|
175 |
-
# Suppress FFmpeg logs
|
176 |
-
os.environ["FFMPEG_LOG_LEVEL"] = "quiet"
|
177 |
-
|
178 |
-
# Suppress TensorFlow or PyTorch progress bars
|
179 |
-
|
180 |
-
tf.get_logger().setLevel("ERROR")
|
181 |
-
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
182 |
-
|
183 |
-
# Suppress PyTorch logs
|
184 |
-
|
185 |
-
logging.getLogger().setLevel(logging.WARNING)
|
186 |
-
torch.set_num_threads(1)
|
187 |
-
logging.getLogger("torch").setLevel(logging.ERROR)
|
188 |
-
|
189 |
-
# Suppress Streamlit logs using the logging module
|
190 |
-
logging.getLogger("streamlit").setLevel(logging.ERROR)
|
191 |
-
|
192 |
-
# Container to hold image data and analysis results
|
193 |
-
img_container = {"input": None, "analyzed": None, "analysis_time": None}
|
194 |
-
|
195 |
-
# Logger for debugging and information
|
196 |
-
logger = logging.getLogger(__name__)
|
197 |
-
|
198 |
-
|
199 |
-
# Callback function to process video frames
|
200 |
-
# This function is called for each video frame in the WebRTC stream.
|
201 |
-
# It converts the frame to a numpy array in RGB format, analyzes the frame,
|
202 |
-
# and returns the original frame.
|
203 |
-
def video_frame_callback(frame: av.VideoFrame) -> av.VideoFrame:
|
204 |
-
# Convert frame to numpy array in RGB format
|
205 |
-
img = frame.to_ndarray(format="rgb24")
|
206 |
-
analyze_frame(img) # Analyze the frame
|
207 |
-
return frame # Return the original frame
|
208 |
-
|
209 |
-
|
210 |
-
# Get ICE servers for WebRTC
|
211 |
-
ice_servers = get_ice_servers()
|
212 |
-
|
213 |
-
# Streamlit UI configuration
|
214 |
-
st.set_page_config(layout="wide")
|
215 |
-
|
216 |
-
# Custom CSS for the Streamlit page
|
217 |
-
st.markdown(
|
218 |
-
"""
|
219 |
-
<style>
|
220 |
-
.main {
|
221 |
-
padding: 2rem;
|
222 |
-
}
|
223 |
-
h1, h2, h3 {
|
224 |
-
font-family: 'Arial', sans-serif;
|
225 |
-
}
|
226 |
-
h1 {
|
227 |
-
font-weight: 700;
|
228 |
-
font-size: 2.5rem;
|
229 |
-
}
|
230 |
-
h2 {
|
231 |
-
font-weight: 600;
|
232 |
-
font-size: 2rem;
|
233 |
-
}
|
234 |
-
h3 {
|
235 |
-
font-weight: 500;
|
236 |
-
font-size: 1.5rem;
|
237 |
-
}
|
238 |
-
</style>
|
239 |
-
""",
|
240 |
-
unsafe_allow_html=True,
|
241 |
-
)
|
242 |
-
|
243 |
-
# Streamlit page title and subtitle
|
244 |
-
st.title("Computer Vision Playground")
|
245 |
-
|
246 |
-
# Add a link to the README file
|
247 |
-
st.markdown(
|
248 |
-
"""
|
249 |
-
<div style="text-align: left;">
|
250 |
-
<p>See the <a href="https://huggingface.co/spaces/eusholli/sentiment-analyzer/blob/main/README.md"
|
251 |
-
target="_blank">README</a> to learn how to use this code to help you start your computer vision exploration.</p>
|
252 |
-
</div>
|
253 |
-
""",
|
254 |
-
unsafe_allow_html=True,
|
255 |
-
)
|
256 |
-
|
257 |
-
st.subheader(ANALYSIS_TITLE)
|
258 |
-
|
259 |
-
# Columns for input and output streams
|
260 |
-
col1, col2 = st.columns(2)
|
261 |
-
|
262 |
-
with col1:
|
263 |
-
st.header("Input Stream")
|
264 |
-
st.subheader("input")
|
265 |
-
# WebRTC streamer to get video input from the webcam
|
266 |
-
webrtc_ctx = webrtc_streamer(
|
267 |
-
key="input-webcam",
|
268 |
-
mode=WebRtcMode.SENDRECV,
|
269 |
-
rtc_configuration=ice_servers,
|
270 |
-
video_frame_callback=video_frame_callback,
|
271 |
-
media_stream_constraints={"video": True, "audio": False},
|
272 |
-
async_processing=True,
|
273 |
-
)
|
274 |
-
|
275 |
-
# File uploader for images
|
276 |
-
st.subheader("Upload an Image")
|
277 |
-
uploaded_file = st.file_uploader(
|
278 |
-
"Choose an image...", type=["jpg", "jpeg", "png"])
|
279 |
-
|
280 |
-
# Text input for image URL
|
281 |
-
st.subheader("Or Enter Image URL")
|
282 |
-
image_url = st.text_input("Image URL")
|
283 |
-
|
284 |
-
# File uploader for videos
|
285 |
-
st.subheader("Upload a Video")
|
286 |
-
uploaded_video = st.file_uploader(
|
287 |
-
"Choose a video...", type=["mp4", "avi", "mov", "mkv"]
|
288 |
-
)
|
289 |
-
|
290 |
-
# Text input for video URL
|
291 |
-
st.subheader("Or Enter Video Download URL")
|
292 |
-
video_url = st.text_input("Video URL")
|
293 |
-
|
294 |
-
# Streamlit footer
|
295 |
-
st.markdown(
|
296 |
-
"""
|
297 |
-
<div style="text-align: center; margin-top: 2rem;">
|
298 |
-
<p>If you want to set up your own computer vision playground see <a href="https://huggingface.co/spaces/eusholli/computer-vision-playground/blob/main/README.md" target="_blank">here</a>.</p>
|
299 |
-
</div>
|
300 |
-
""",
|
301 |
-
unsafe_allow_html=True
|
302 |
-
)
|
303 |
-
|
304 |
-
# Function to initialize the analysis UI
|
305 |
-
# This function sets up the placeholders and UI elements in the analysis section.
|
306 |
-
# It creates placeholders for input and output frames, analysis time, and detected labels.
|
307 |
-
|
308 |
-
|
309 |
-
def analysis_init():
|
310 |
-
global analysis_time, show_labels, labels_placeholder, input_placeholder, output_placeholder
|
311 |
-
|
312 |
-
with col2:
|
313 |
-
st.header("Analysis")
|
314 |
-
st.subheader("Input Frame")
|
315 |
-
input_placeholder = st.empty() # Placeholder for input frame
|
316 |
-
|
317 |
-
st.subheader("Output Frame")
|
318 |
-
output_placeholder = st.empty() # Placeholder for output frame
|
319 |
-
analysis_time = st.empty() # Placeholder for analysis time
|
320 |
-
show_labels = st.checkbox(
|
321 |
-
"Show the detected labels", value=True
|
322 |
-
) # Checkbox to show/hide labels
|
323 |
-
labels_placeholder = st.empty() # Placeholder for labels
|
324 |
-
|
325 |
-
|
326 |
-
# Function to publish frames and results to the Streamlit UI
|
327 |
-
# This function retrieves the latest frames and results from the global container and result queue,
|
328 |
-
# and updates the placeholders in the Streamlit UI with the current input frame, analyzed frame, analysis time, and detected labels.
|
329 |
-
def publish_frame():
|
330 |
-
if not result_queue.empty():
|
331 |
-
result = result_queue.get()
|
332 |
-
if show_labels:
|
333 |
-
labels_placeholder.table(
|
334 |
-
result
|
335 |
-
) # Display labels if the checkbox is checked
|
336 |
-
|
337 |
-
img = img_container["input"]
|
338 |
-
if img is None:
|
339 |
-
return
|
340 |
-
input_placeholder.image(img, channels="RGB") # Display the input frame
|
341 |
-
|
342 |
-
analyzed = img_container["analyzed"]
|
343 |
-
if analyzed is None:
|
344 |
-
return
|
345 |
-
# Display the analyzed frame
|
346 |
-
output_placeholder.image(analyzed, channels="RGB")
|
347 |
-
|
348 |
-
time = img_container["analysis_time"]
|
349 |
-
if time is None:
|
350 |
-
return
|
351 |
-
# Display the analysis time
|
352 |
-
analysis_time.text(f"Analysis Time: {time} ms")
|
353 |
-
|
354 |
-
|
355 |
-
# If the WebRTC streamer is playing, initialize and publish frames
|
356 |
-
if webrtc_ctx.state.playing:
|
357 |
-
analysis_init() # Initialize the analysis UI
|
358 |
-
while True:
|
359 |
-
publish_frame() # Publish the frames and results
|
360 |
-
time.sleep(0.1) # Delay to control frame rate
|
361 |
-
|
362 |
-
|
363 |
-
# If an image is uploaded or a URL is provided, process the image
|
364 |
-
if uploaded_file is not None or image_url:
|
365 |
-
analysis_init() # Initialize the analysis UI
|
366 |
-
|
367 |
-
if uploaded_file is not None:
|
368 |
-
image = Image.open(uploaded_file) # Open the uploaded image
|
369 |
-
img = np.array(image.convert("RGB")) # Convert the image to RGB format
|
370 |
-
else:
|
371 |
-
response = requests.get(image_url) # Download the image from the URL
|
372 |
-
# Open the downloaded image
|
373 |
-
image = Image.open(BytesIO(response.content))
|
374 |
-
img = np.array(image.convert("RGB")) # Convert the image to RGB format
|
375 |
-
|
376 |
-
analyze_frame(img) # Analyze the image
|
377 |
-
publish_frame() # Publish the results
|
378 |
-
|
379 |
-
|
380 |
-
# Function to process video files
|
381 |
-
# This function reads frames from a video file, analyzes each frame for face detection and sentiment analysis,
|
382 |
-
# and updates the Streamlit UI with the current input frame, analyzed frame, and detected labels.
|
383 |
-
def process_video(video_path):
|
384 |
-
cap = cv2.VideoCapture(video_path) # Open the video file
|
385 |
-
while cap.isOpened():
|
386 |
-
ret, frame = cap.read() # Read a frame from the video
|
387 |
-
if not ret:
|
388 |
-
break # Exit the loop if no more frames are available
|
389 |
-
|
390 |
-
# Display the current frame as the input frame
|
391 |
-
input_placeholder.image(frame)
|
392 |
-
analyze_frame(
|
393 |
-
frame
|
394 |
-
) # Analyze the frame for face detection and sentiment analysis
|
395 |
-
publish_frame() # Publish the results
|
396 |
-
|
397 |
-
if not result_queue.empty():
|
398 |
-
result = result_queue.get()
|
399 |
-
if show_labels:
|
400 |
-
labels_placeholder.table(
|
401 |
-
result
|
402 |
-
) # Display labels if the checkbox is checked
|
403 |
-
|
404 |
-
cap.release() # Release the video capture object
|
405 |
-
|
406 |
-
|
407 |
-
# If a video is uploaded or a URL is provided, process the video
|
408 |
-
if uploaded_video is not None or video_url:
|
409 |
-
analysis_init() # Initialize the analysis UI
|
410 |
-
|
411 |
-
if uploaded_video is not None:
|
412 |
-
video_path = uploaded_video.name # Get the name of the uploaded video
|
413 |
-
with open(video_path, "wb") as f:
|
414 |
-
# Save the uploaded video to a file
|
415 |
-
f.write(uploaded_video.getbuffer())
|
416 |
-
else:
|
417 |
-
# Download the video from the URL
|
418 |
-
video_path = download_file(video_url)
|
419 |
-
|
420 |
-
process_video(video_path) # Process the video
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
@@ -3,9 +3,9 @@ opencv-python-headless
|
|
3 |
numpy
|
4 |
transformers
|
5 |
torch
|
6 |
-
mtcnn
|
7 |
setuptools
|
8 |
tensorflow
|
9 |
tf-keras
|
10 |
streamlit_webrtc
|
11 |
-
twilio
|
|
|
|
3 |
numpy
|
4 |
transformers
|
5 |
torch
|
|
|
6 |
setuptools
|
7 |
tensorflow
|
8 |
tf-keras
|
9 |
streamlit_webrtc
|
10 |
+
twilio
|
11 |
+
ultralytics
|
sentiment.py
DELETED
@@ -1,141 +0,0 @@
|
|
1 |
-
import threading
|
2 |
-
|
3 |
-
import streamlit as st
|
4 |
-
import cv2
|
5 |
-
import numpy as np
|
6 |
-
from transformers import pipeline
|
7 |
-
from PIL import Image, ImageDraw
|
8 |
-
from mtcnn import MTCNN
|
9 |
-
from streamlit_webrtc import webrtc_streamer
|
10 |
-
import logging
|
11 |
-
|
12 |
-
# Suppress transformers progress bars
|
13 |
-
logging.getLogger("transformers").setLevel(logging.ERROR)
|
14 |
-
|
15 |
-
lock = threading.Lock()
|
16 |
-
img_container = {"webcam": None,
|
17 |
-
"analyzed": None}
|
18 |
-
|
19 |
-
# Initialize the Hugging Face pipeline for facial emotion detection
|
20 |
-
emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")
|
21 |
-
|
22 |
-
# Initialize MTCNN for face detection
|
23 |
-
mtcnn = MTCNN()
|
24 |
-
|
25 |
-
# Function to analyze sentiment
|
26 |
-
def analyze_sentiment(face):
|
27 |
-
# Convert face to RGB
|
28 |
-
rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
|
29 |
-
# Convert the face to a PIL image
|
30 |
-
pil_image = Image.fromarray(rgb_face)
|
31 |
-
# Analyze sentiment using the Hugging Face pipeline
|
32 |
-
results = emotion_pipeline(pil_image)
|
33 |
-
# Get the dominant emotion
|
34 |
-
dominant_emotion = max(results, key=lambda x: x['score'])['label']
|
35 |
-
return dominant_emotion
|
36 |
-
|
37 |
-
TEXT_SIZE = 3
|
38 |
-
|
39 |
-
# Function to detect faces, analyze sentiment, and draw a red box around them
|
40 |
-
def detect_and_draw_faces(frame):
|
41 |
-
# Detect faces using MTCNN
|
42 |
-
results = mtcnn.detect_faces(frame)
|
43 |
-
|
44 |
-
# Draw on the frame
|
45 |
-
for result in results:
|
46 |
-
x, y, w, h = result['box']
|
47 |
-
face = frame[y:y+h, x:x+w]
|
48 |
-
sentiment = analyze_sentiment(face)
|
49 |
-
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 10) # Thicker red box
|
50 |
-
|
51 |
-
# Calculate position for the text background and the text itself
|
52 |
-
text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
|
53 |
-
text_x = x
|
54 |
-
text_y = y - 10
|
55 |
-
background_tl = (text_x, text_y - text_size[1])
|
56 |
-
background_br = (text_x + text_size[0], text_y + 5)
|
57 |
-
|
58 |
-
# Draw black rectangle as background
|
59 |
-
cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
|
60 |
-
# Draw white text on top
|
61 |
-
cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)
|
62 |
-
|
63 |
-
return frame
|
64 |
-
|
65 |
-
# Streamlit UI
|
66 |
-
st.markdown(
|
67 |
-
"""
|
68 |
-
<style>
|
69 |
-
.main {
|
70 |
-
background-color: #FFFFFF;
|
71 |
-
}
|
72 |
-
.reportview-container .main .block-container{
|
73 |
-
padding-top: 2rem;
|
74 |
-
}
|
75 |
-
h1 {
|
76 |
-
color: #E60012;
|
77 |
-
font-family: 'Arial Black', Gadget, sans-serif;
|
78 |
-
}
|
79 |
-
h2 {
|
80 |
-
color: #E60012;
|
81 |
-
font-family: 'Arial', sans-serif;
|
82 |
-
}
|
83 |
-
h3 {
|
84 |
-
color: #333333;
|
85 |
-
font-family: 'Arial', sans-serif;
|
86 |
-
}
|
87 |
-
.stButton button {
|
88 |
-
background-color: #E60012;
|
89 |
-
color: white;
|
90 |
-
border-radius: 5px;
|
91 |
-
font-size: 16px;
|
92 |
-
}
|
93 |
-
</style>
|
94 |
-
""",
|
95 |
-
unsafe_allow_html=True
|
96 |
-
)
|
97 |
-
|
98 |
-
st.title("Computer Vision Test Lab")
|
99 |
-
st.subheader("Facial Sentiment")
|
100 |
-
|
101 |
-
# Columns for input and output streams
|
102 |
-
col1, col2 = st.columns(2)
|
103 |
-
|
104 |
-
with col1:
|
105 |
-
st.header("Input Stream")
|
106 |
-
st.subheader("Webcam")
|
107 |
-
video_placeholder = st.empty()
|
108 |
-
|
109 |
-
with col2:
|
110 |
-
st.header("Output Stream")
|
111 |
-
st.subheader("Analysis")
|
112 |
-
output_placeholder = st.empty()
|
113 |
-
|
114 |
-
sentiment_placeholder = st.empty()
|
115 |
-
|
116 |
-
def video_frame_callback(frame):
|
117 |
-
try:
|
118 |
-
with lock:
|
119 |
-
img = frame.to_ndarray(format="bgr24")
|
120 |
-
img_container["webcam"] = img
|
121 |
-
frame_with_boxes = detect_and_draw_faces(img)
|
122 |
-
img_container["analyzed"] = frame_with_boxes
|
123 |
-
|
124 |
-
except Exception as e:
|
125 |
-
st.error(f"Error processing frame: {e}")
|
126 |
-
|
127 |
-
return frame
|
128 |
-
|
129 |
-
ctx = webrtc_streamer(key="webcam", video_frame_callback=video_frame_callback)
|
130 |
-
|
131 |
-
while ctx.state.playing:
|
132 |
-
with lock:
|
133 |
-
print(img_container)
|
134 |
-
img = img_container["webcam"]
|
135 |
-
frame_with_boxes = img_container["analyzed"]
|
136 |
-
|
137 |
-
if img is None:
|
138 |
-
continue
|
139 |
-
|
140 |
-
video_placeholder.image(img, channels="BGR")
|
141 |
-
output_placeholder.image(frame_with_boxes, channels="BGR")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|