Spaces:
Sleeping
Sleeping
max-unfinity
commited on
Commit
•
c66f90e
1
Parent(s):
7589132
main code
Browse files- .gitignore +3 -0
- Dockerfile +10 -1
- app.py +101 -0
- infer.py +74 -0
- yolov8-test.ipynb +146 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
*.pt
|
2 |
+
/images
|
3 |
+
__pycache__
|
Dockerfile
CHANGED
@@ -1,3 +1,12 @@
|
|
1 |
FROM nvcr.io/nvidia/pytorch:23.12-py3
|
2 |
|
3 |
-
RUN pip install ultralytics
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
FROM nvcr.io/nvidia/pytorch:23.12-py3
|
2 |
|
3 |
+
RUN pip install ultralytics
|
4 |
+
RUN pip install streamlit
|
5 |
+
RUN pip install opencv-python==4.6.0.66
|
6 |
+
RUN pip install Pillow==10.3.0
|
7 |
+
|
8 |
+
RUN apt update && apt install fonts-dejavu
|
9 |
+
|
10 |
+
EXPOSE 8501
|
11 |
+
CMD streamlit run app.py \
|
12 |
+
--server.headless true
|
app.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import numpy as np
|
3 |
+
from PIL import Image, ImageDraw, ImageFont
|
4 |
+
from ultralytics import YOLO
|
5 |
+
import torch
|
6 |
+
import infer
|
7 |
+
|
8 |
+
|
9 |
+
@st.cache_resource()
|
10 |
+
def load_model():
|
11 |
+
print('Loading model...')
|
12 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
13 |
+
model_pose = YOLO('yolov8l-pose.pt')
|
14 |
+
model_pose.to(device)
|
15 |
+
return model_pose
|
16 |
+
|
17 |
+
|
18 |
+
def draw_output(image_pil: Image.Image, keypoints: dict):
|
19 |
+
draw = ImageDraw.Draw(image_pil)
|
20 |
+
line_width = 10
|
21 |
+
font = ImageFont.truetype("DejaVuSerif-Bold.ttf", 70)
|
22 |
+
|
23 |
+
ear, eye = None, None
|
24 |
+
if keypoints["left_ear"] and keypoints["left_eye"]:
|
25 |
+
ear = keypoints["left_ear"]
|
26 |
+
eye = keypoints["left_eye"]
|
27 |
+
elif keypoints["right_ear"] and keypoints["right_eye"]:
|
28 |
+
ear = keypoints["right_ear"]
|
29 |
+
eye = keypoints["right_eye"]
|
30 |
+
|
31 |
+
# draw extended left and right eye lines
|
32 |
+
if ear and eye:
|
33 |
+
left_new_point = infer.extend_line(ear, eye, 3)
|
34 |
+
l1 = [ear, left_new_point]
|
35 |
+
draw.line(l1, fill='red', width=line_width)
|
36 |
+
# draw a horizontal line from ear forwards
|
37 |
+
ear = np.array(ear)
|
38 |
+
l1 = np.array(l1)
|
39 |
+
l1_vector = l1[1] - l1[0]
|
40 |
+
x_s = np.sign(l1_vector)[0]
|
41 |
+
length_l1 = np.linalg.norm(l1_vector)
|
42 |
+
p2 = ear + np.array([length_l1*x_s, 0])
|
43 |
+
ear = tuple(ear.tolist())
|
44 |
+
l = [ear, tuple(p2.tolist())]
|
45 |
+
draw.line(l, fill='gray', width=line_width//2)
|
46 |
+
# draw angle
|
47 |
+
angle = infer.calculate_angle_to_horizontal(l1_vector)
|
48 |
+
draw.text(ear, f'{angle:.2f}', fill='red', font=font)
|
49 |
+
|
50 |
+
# draw elbow angles
|
51 |
+
left_elbow_angle, right_elbow_angle = infer.get_elbow_angles(keypoints)
|
52 |
+
if left_elbow_angle:
|
53 |
+
draw.text(keypoints['left_elbow'], f'{left_elbow_angle:.2f}', fill='red', font=font)
|
54 |
+
# draw polyline for left arm
|
55 |
+
draw.line([keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist']], fill='blue', width=line_width)
|
56 |
+
if right_elbow_angle:
|
57 |
+
draw.text(keypoints['right_elbow'], f'{right_elbow_angle:.2f}', fill='red', font=font)
|
58 |
+
# draw polyline for right arm
|
59 |
+
draw.line([keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist']], fill='blue', width=line_width)
|
60 |
+
|
61 |
+
return image_pil
|
62 |
+
|
63 |
+
|
64 |
+
st.title('Pose Estimation App')
|
65 |
+
|
66 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
67 |
+
st.caption(f'Using device: {device}')
|
68 |
+
|
69 |
+
upload_tab, camera_tab = st.tabs(["Upload Photo", "Webcam Capture"])
|
70 |
+
|
71 |
+
with upload_tab:
|
72 |
+
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
|
73 |
+
|
74 |
+
with camera_tab:
|
75 |
+
img_file_buffer = st.camera_input("Take a picture")
|
76 |
+
|
77 |
+
img = None
|
78 |
+
if img_file_buffer is not None:
|
79 |
+
img = Image.open(img_file_buffer)
|
80 |
+
if uploaded_file is not None:
|
81 |
+
img = Image.open(uploaded_file)
|
82 |
+
|
83 |
+
if img is not None:
|
84 |
+
# predict
|
85 |
+
with st.spinner('Predicting...'):
|
86 |
+
model = load_model()
|
87 |
+
pred = model(img)[0]
|
88 |
+
keypoints = infer.get_keypoints(pred)
|
89 |
+
if keypoints is not None:
|
90 |
+
img = draw_output(img, keypoints)
|
91 |
+
st.image(img, caption='Predicted image', use_column_width=True)
|
92 |
+
lea, rea = infer.get_eye_angles(keypoints)
|
93 |
+
lba, rba = infer.get_elbow_angles(keypoints)
|
94 |
+
st.write('Angles:')
|
95 |
+
st.json({'left_eye_angle': lea, 'right_eye_angle': rea, 'left_elbow_angle': lba, 'right_elbow_angle': rba})
|
96 |
+
st.write('Raw keypoints:')
|
97 |
+
st.json(keypoints)
|
98 |
+
else:
|
99 |
+
st.error('No keypoints detected!')
|
100 |
+
st.image(img, caption='Original image', use_column_width=True)
|
101 |
+
|
infer.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from ultralytics.engine.results import Results
|
3 |
+
|
4 |
+
|
5 |
+
KEYPOINT_NAMES = ["nose","left_eye","right_eye","left_ear","right_ear","left_shoulder",
|
6 |
+
"right_shoulder","left_elbow","right_elbow","left_wrist","right_wrist",
|
7 |
+
"left_hip","right_hip","left_knee","right_knee","left_ankle","right_ankle"]
|
8 |
+
|
9 |
+
|
10 |
+
def get_keypoints(result: Results):
|
11 |
+
keypoints = None
|
12 |
+
for i, box in enumerate(result.boxes):
|
13 |
+
if box.cls != 0.: # Only consider the person class
|
14 |
+
continue
|
15 |
+
person_conf = box.conf.item()
|
16 |
+
k = result.keypoints.data[i]
|
17 |
+
x = k[:, 0].tolist()
|
18 |
+
y = k[:, 1].tolist()
|
19 |
+
score = k[:, 2]
|
20 |
+
visible = (score > 0.5).tolist()
|
21 |
+
# keypoints = {'x': x, 'y': y, 'visible': visible}
|
22 |
+
keypoints = {key_name: (x_, y_) if v_ else None for key_name,x_,y_,v_ in zip(KEYPOINT_NAMES, x, y, visible)}
|
23 |
+
break
|
24 |
+
return keypoints
|
25 |
+
|
26 |
+
|
27 |
+
def calculate_angle(p1, p2, p3):
|
28 |
+
v1 = np.array([p1[0] - p2[0], p1[1] - p2[1]])
|
29 |
+
v2 = np.array([p3[0] - p2[0], p3[1] - p2[1]])
|
30 |
+
angle_rad = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
|
31 |
+
angle_deg = np.degrees(angle_rad)
|
32 |
+
return angle_deg
|
33 |
+
|
34 |
+
|
35 |
+
def calculate_angle_to_horizontal(vector):
|
36 |
+
angle_rad = np.arctan2(vector[1], vector[0])
|
37 |
+
angle_deg = np.degrees(angle_rad)
|
38 |
+
# Adjust the angle to be within -90 to +90 degrees
|
39 |
+
if angle_deg > 90:
|
40 |
+
angle_deg = 180 - angle_deg
|
41 |
+
elif angle_deg < -90:
|
42 |
+
angle_deg = -180 - angle_deg
|
43 |
+
return -angle_deg
|
44 |
+
|
45 |
+
|
46 |
+
def extend_line(start, end, extend_factor=3):
|
47 |
+
vector = np.array(end) - np.array(start)
|
48 |
+
length = np.linalg.norm(vector)
|
49 |
+
unit_vector = vector / np.linalg.norm(vector)
|
50 |
+
new_point = end + unit_vector * length * extend_factor
|
51 |
+
new_point = new_point.tolist()
|
52 |
+
return (new_point[0], new_point[1])
|
53 |
+
|
54 |
+
|
55 |
+
def get_elbow_angles(keypoints: dict):
|
56 |
+
left_elbow_angle = None
|
57 |
+
right_elbow_angle = None
|
58 |
+
if keypoints['left_shoulder'] and keypoints['left_elbow'] and keypoints['left_wrist']:
|
59 |
+
left_elbow_angle = calculate_angle(keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist'])
|
60 |
+
if keypoints['right_shoulder'] and keypoints['right_elbow'] and keypoints['right_wrist']:
|
61 |
+
right_elbow_angle = calculate_angle(keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist'])
|
62 |
+
return left_elbow_angle, right_elbow_angle
|
63 |
+
|
64 |
+
|
65 |
+
def get_eye_angles(keypoints: dict):
|
66 |
+
left_eye_angle = None
|
67 |
+
right_eye_angle = None
|
68 |
+
if keypoints['left_ear'] and keypoints['left_eye']:
|
69 |
+
left_vector = (keypoints['left_eye'][0] - keypoints['left_ear'][0], keypoints['left_eye'][1] - keypoints['left_ear'][1])
|
70 |
+
left_eye_angle = calculate_angle_to_horizontal(left_vector)
|
71 |
+
if keypoints['right_ear'] and keypoints['right_eye']:
|
72 |
+
right_vector = (keypoints['right_eye'][0] - keypoints['right_ear'][0], keypoints['right_eye'][1] - keypoints['right_ear'][1])
|
73 |
+
right_eye_angle = calculate_angle_to_horizontal(right_vector)
|
74 |
+
return left_eye_angle, right_eye_angle
|
yolov8-test.ipynb
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": null,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"from ultralytics import YOLO\n",
|
10 |
+
"import torch\n",
|
11 |
+
"from PIL import Image, ImageDraw, ImageFont\n",
|
12 |
+
"import numpy as np\n",
|
13 |
+
"import infer"
|
14 |
+
]
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"cell_type": "code",
|
18 |
+
"execution_count": null,
|
19 |
+
"metadata": {},
|
20 |
+
"outputs": [],
|
21 |
+
"source": [
|
22 |
+
"from importlib import reload\n",
|
23 |
+
"reload(infer)"
|
24 |
+
]
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"cell_type": "code",
|
28 |
+
"execution_count": null,
|
29 |
+
"metadata": {},
|
30 |
+
"outputs": [],
|
31 |
+
"source": [
|
32 |
+
"device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
|
33 |
+
"model_pose = YOLO('yolov8l-pose.pt')\n",
|
34 |
+
"model_pose.to(device)\n",
|
35 |
+
"\n",
|
36 |
+
"model_det = YOLO('yolov8m.pt')\n",
|
37 |
+
"model_det.to(device);"
|
38 |
+
]
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"cell_type": "code",
|
42 |
+
"execution_count": null,
|
43 |
+
"metadata": {},
|
44 |
+
"outputs": [],
|
45 |
+
"source": [
|
46 |
+
"url = \"image.jpg\"\n",
|
47 |
+
"results = model_pose(url)\n",
|
48 |
+
"results_det = model_det(url)"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
{
|
52 |
+
"cell_type": "code",
|
53 |
+
"execution_count": null,
|
54 |
+
"metadata": {},
|
55 |
+
"outputs": [],
|
56 |
+
"source": [
|
57 |
+
"def draw_output(image_pil: Image.Image, keypoints: dict): \n",
|
58 |
+
" draw = ImageDraw.Draw(image_pil)\n",
|
59 |
+
" line_width = 10\n",
|
60 |
+
" font = ImageFont.truetype(\"DejaVuSerif-Bold.ttf\", 70)\n",
|
61 |
+
" \n",
|
62 |
+
" ear, eye = None, None\n",
|
63 |
+
" if keypoints[\"left_ear\"] and keypoints[\"left_eye\"]:\n",
|
64 |
+
" ear = keypoints[\"left_ear\"]\n",
|
65 |
+
" eye = keypoints[\"left_eye\"]\n",
|
66 |
+
" elif keypoints[\"right_ear\"] and keypoints[\"right_eye\"]:\n",
|
67 |
+
" ear = keypoints[\"right_ear\"]\n",
|
68 |
+
" eye = keypoints[\"right_eye\"]\n",
|
69 |
+
" \n",
|
70 |
+
" # draw extended left and right eye lines\n",
|
71 |
+
" if ear and eye:\n",
|
72 |
+
" left_new_point = infer.extend_line(ear, eye, 3)\n",
|
73 |
+
" l1 = [ear, left_new_point]\n",
|
74 |
+
" draw.line(l1, fill='red', width=line_width)\n",
|
75 |
+
" # draw a horizontal line from ear forwards\n",
|
76 |
+
" ear = np.array(ear)\n",
|
77 |
+
" l1 = np.array(l1)\n",
|
78 |
+
" l1_vector = l1[1] - l1[0]\n",
|
79 |
+
" x_s = np.sign(l1_vector)[0]\n",
|
80 |
+
" length_l1 = np.linalg.norm(l1_vector)\n",
|
81 |
+
" p2 = ear + np.array([length_l1*x_s, 0])\n",
|
82 |
+
" ear = tuple(ear.tolist())\n",
|
83 |
+
" l = [ear, tuple(p2.tolist())]\n",
|
84 |
+
" draw.line(l, fill='gray', width=line_width//2)\n",
|
85 |
+
" # draw angle\n",
|
86 |
+
" angle = infer.calculate_angle_to_horizontal(l1_vector)\n",
|
87 |
+
" draw.text(ear, f'{angle:.2f}', fill='red', font=font)\n",
|
88 |
+
" print(infer.get_eye_angles(keypoints))\n",
|
89 |
+
"\n",
|
90 |
+
"\n",
|
91 |
+
" # draw elbow angles\n",
|
92 |
+
" left_elbow_angle, right_elbow_angle = infer.get_elbow_angles(keypoints)\n",
|
93 |
+
" if left_elbow_angle:\n",
|
94 |
+
" draw.text(keypoints['left_elbow'], f'{left_elbow_angle:.2f}', fill='red', font=font)\n",
|
95 |
+
" # draw polyline for left arm\n",
|
96 |
+
" draw.line([keypoints['left_shoulder'], keypoints['left_elbow'], keypoints['left_wrist']], fill='blue', width=line_width)\n",
|
97 |
+
" if right_elbow_angle:\n",
|
98 |
+
" draw.text(keypoints['right_elbow'], f'{right_elbow_angle:.2f}', fill='red', font=font)\n",
|
99 |
+
" # draw polyline for right arm\n",
|
100 |
+
" draw.line([keypoints['right_shoulder'], keypoints['right_elbow'], keypoints['right_wrist']], fill='blue', width=line_width)\n",
|
101 |
+
"\n",
|
102 |
+
" return image_pil"
|
103 |
+
]
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"cell_type": "code",
|
107 |
+
"execution_count": null,
|
108 |
+
"metadata": {},
|
109 |
+
"outputs": [],
|
110 |
+
"source": [
|
111 |
+
"keypoints = infer.get_keypoints(results[0])\n",
|
112 |
+
"img = Image.open(url)\n",
|
113 |
+
"img = draw_output(img, keypoints)\n",
|
114 |
+
"img.resize((800, 800))"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": null,
|
120 |
+
"metadata": {},
|
121 |
+
"outputs": [],
|
122 |
+
"source": []
|
123 |
+
}
|
124 |
+
],
|
125 |
+
"metadata": {
|
126 |
+
"kernelspec": {
|
127 |
+
"display_name": "Python 3",
|
128 |
+
"language": "python",
|
129 |
+
"name": "python3"
|
130 |
+
},
|
131 |
+
"language_info": {
|
132 |
+
"codemirror_mode": {
|
133 |
+
"name": "ipython",
|
134 |
+
"version": 3
|
135 |
+
},
|
136 |
+
"file_extension": ".py",
|
137 |
+
"mimetype": "text/x-python",
|
138 |
+
"name": "python",
|
139 |
+
"nbconvert_exporter": "python",
|
140 |
+
"pygments_lexer": "ipython3",
|
141 |
+
"version": "3.10.12"
|
142 |
+
}
|
143 |
+
},
|
144 |
+
"nbformat": 4,
|
145 |
+
"nbformat_minor": 2
|
146 |
+
}
|