Spaces:

antoniospoletojr
/

HPE-streaming

Sleeping

App Files Files Community

antoniospoletojr commited on Feb 16

Commit

e2508c0

•

1 Parent(s): a12e461

first commit

Browse files

Files changed (7) hide show

app.py +111 -0
mean.pt +3 -0
model.pt +3 -0
model.py +41 -0
requirements.txt +5 -0
std.pt +3 -0
utils.py +162 -0

app.py ADDED Viewed

	@@ -0,0 +1,111 @@

+import gradio as gr
+import numpy as np
+import cv2
+import torch
+from facenet_pytorch import MTCNN
+from model import HPEnet
+from torchvision import transforms
+from scipy.spatial.transform import Rotation as R
+import numpy as np
+from PIL import Image
+from utils import draw_2D_axes
+def detect_faces(image):
+    # Detect face
+    boxes, _ = mtcnn.detect(image)
+    boxes_centroids = []
+    sizes = []
+    faces = []
+    # If no boxes have been detected return
+    if boxes is None:
+        return None, None, None
+    # Add margin to each box, calculate centroids and crop the face image
+    for i in range(len(boxes)):
+        # Add margin while safe checking
+        margin=50
+        boxes[i][0] = max(0, boxes[i][0] - margin)
+        boxes[i][1] = max(0, boxes[i][1] - margin)
+        boxes[i][2] = min(image.width, boxes[i][2] + margin)
+        boxes[i][3] = min(image.height, boxes[i][3] + margin)
+        # Calculate centroids and sizes
+        boxes_centroids.append([int((boxes[i][0] + boxes[i][2])/2), int((boxes[i][1] + boxes[i][3]) /2)])
+        sizes.append(boxes[i][2] - boxes[i][0])
+        # Crop the face using boxes
+        faces.append(image.crop(boxes[i]))
+    return faces, boxes_centroids, sizes
+def process(frame):
+    # Convert from opencv to PIL
+    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+    image = Image.fromarray(frame)
+    # Detect face
+    faces, centroids, sizes = detect_faces(image)
+    if faces is None:
+        return frame
+    for idx, face in enumerate(faces):
+        # Preprocess the image
+        transform = transforms.Compose([
+            transforms.PILToTensor(),
+            transforms.Resize((200, 200)),
+        ])
+        face_tensor = transform(face)
+        face_tensor = face_tensor.permute(1, 2, 0)
+        # Standardize the tensor
+        face_tensor = (face_tensor - mean) / std
+        face_tensor = face_tensor.permute(2, 0, 1)
+        face_tensor = face_tensor.type(torch.float32)
+        # Run the inference
+        with torch.inference_mode():
+            face_tensor = face_tensor.unsqueeze(0).to(device)
+            r1, r2, r3, _ = model(face_tensor)
+            # Create a numpy matrix out of r1, r2, r3 (these vectors are the columns of the rotation matrix)
+            r1 = r1.squeeze().numpy()
+            r2 = r2.squeeze().numpy()
+            r3 = r3.squeeze().numpy()
+            rotation_matrix = np.array([r1, r2, r3])
+            r = R.from_matrix(rotation_matrix)
+            pitch, yaw, roll = r.as_euler('zyx', degrees=True)
+            center = centroids[idx]
+            size = sizes[idx]*0.5
+            frame = draw_2D_axes(frame, yaw, roll, pitch, center[0], center[1], size)
+    return frame
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = HPEnet().to(device)
+# Load model from checkpoint
+model.load_state_dict(torch.load('model.pt', map_location=torch.device('cpu')))
+model.to(device);
+model.eval()
+mtcnn = MTCNN(keep_all=True, post_process=False, device='cpu')
+mean = torch.load('mean.pt')
+std = torch.load('std.pt')
+demo = gr.Interface(
+    process,
+    gr.Image(sources="webcam", streaming=True),
+    "image",
+    live=True,
+    allow_flagging="never",
+)
+if __name__ == "__main__":
+    demo.launch()

mean.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:173d644e647a24e9d317b78ea47774a3bc60257952fc5f8f3a5951af56b80b80
+size 961101

model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:997e75103549eab362cec3939c35c9ebe1cba720a18d166424d48425e41af20d
+size 143750654

model.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import torchvision
+import torch.nn as nn
+import torch
+import math
+class HPEnet(nn.Module):
+    def __init__(self, roll_bins=18, yaw_bins=29, pitch_bins=21):
+        super(HPEnet, self).__init__()
+        print("Loading the model...")
+        self.resnet = torchvision.models.resnet50(weights="ResNet50_Weights.DEFAULT") #ResNet50_Weights.DEFAULT
+        self.resnet.fc = nn.Linear(2048, 2048)
+        self.fc = nn.Linear(2048, 2048)
+        # Classification layers
+        self.fc_class = nn.Linear(2048, 1921)
+        # Regression layers
+        self.fc_r1 = nn.Linear(2048, 3)
+        self.fc_r2 = nn.Linear(2048, 3)
+        self.fc_r3 = nn.Linear(2048, 3)
+    def forward(self, x):
+        # Backbone
+        x = self.resnet(x)
+        # Dense layer
+        x = torch.nn.functional.relu(x)
+        x = self.fc(x)
+        # Regression layers
+        r1 = self.fc_r1(x)
+        r2 = self.fc_r2(x)
+        r3 = self.fc_r3(x)
+        # Classification layers
+        x = torch.nn.functional.relu(x)
+        x = self.fc_class(x)
+        return r1, r2, r3, x

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+opencv-python
+scipy
+facenet_pytorch
+pillow

std.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e1e3b7a9db0ac613f8c5d4deb0b73151d08aa485459096cdb39aa2e294f5fea
+size 961096

utils.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import numpy as np
+import cv2
+import numpy as np
+import plotly.graph_objects as go
+from math import cos, sin
+from scipy.spatial.transform import Rotation as R
+def plot_3D_rotation(rotation_matrix):
+    fig = go.Figure()
+    # Original axis orientation
+    axes_points = np.array([
+        [1, 0, 0, 0],
+        [0, 1, 0, 0],
+        [0, 0, 1, 0]
+    ], dtype=np.float64)
+    # Plot original axes
+    fig.add_trace(go.Scatter3d(
+        x=[0, axes_points[0, 0]],
+        y=[0, axes_points[1, 0]],
+        z=[0, axes_points[2, 0]],
+        mode='lines+text',
+        line=dict(color='blue', width=6),
+        name='Canonical X-axis',
+        text=['', 'X axis'],
+        textposition='middle center',
+    ))
+    fig.add_trace(go.Scatter3d(
+        x=[0, axes_points[0, 1]],
+        y=[0, axes_points[1, 1]],
+        z=[0, axes_points[2, 1]],
+        mode='lines+text',
+        line=dict(color='blue', width=6),
+        name='Canonical Z-axis',
+        text=['', 'Z axis'],
+        textposition='middle center',
+    ))
+    fig.add_trace(go.Scatter3d(
+        x=[0, axes_points[0, 2]],
+        y=[0, axes_points[1, 2]],
+        z=[0, axes_points[2, 2]],
+        mode='lines+text',
+        line=dict(color='blue', width=6),
+        name='Canonical Y-axis',
+        text=['', 'Y axis'],
+        textposition='middle center',
+    ))
+     # Apply rotation
+    axes_points = rotation_matrix @ axes_points
+    # Plot rotated axes
+    fig.add_trace(go.Scatter3d(
+        x=[0, axes_points[0, 0]],
+        y=[0, axes_points[1, 0]],
+        z=[0, axes_points[2, 0]],
+        mode='lines+text',
+        line=dict(color='red', width=6),
+        name='Rotated X\'-axis',
+        text=['', 'Rotated X axis'],
+        textposition='middle center',
+    ))
+    fig.add_trace(go.Scatter3d(
+        x=[0, axes_points[0, 1]],
+        y=[0, axes_points[1, 1]],
+        z=[0, axes_points[2, 1]],
+        mode='lines+text',
+        line=dict(color='red', width=6),
+        name='Rotated Z\'-axis',
+        text=['', 'Rotated Z axis'],
+        textposition='middle center',
+    ))
+    fig.add_trace(go.Scatter3d(
+        x=[0, axes_points[0, 2]],
+        y=[0, axes_points[1, 2]],
+        z=[0, axes_points[2, 2]],
+        mode='lines+text',
+        line=dict(color='red', width=6),
+        name='Rotated Y\'-axis',
+        text=['', 'Rotated Y axis'],
+        textposition='middle center',
+    ))
+    # Retrieve pitch, yaw, roll from rotation matrix
+    r = R.from_matrix(rotation_matrix)
+    pitch, yaw, roll = r.as_euler('xzy', degrees=True)
+    # Set layout
+    fig.update_layout(
+        scene=dict(
+            xaxis=dict(title='X-axis', range=[-1.2, 1.2]),
+            yaxis=dict(title='Z-axis', range=[-1.2, 1.2]),
+            zaxis=dict(title='Y-axis', range=[-1.2, 1.2]),
+            xaxis_tickvals=np.arange(-1.2, 1.2, 0.6),
+            yaxis_tickvals=np.arange(-1.2, 1.2, 0.5),
+            zaxis_tickvals=np.arange(-1.2, 1.2, 0.5),
+            aspectmode='cube',
+            aspectratio=dict(x=1, y=1, z=1),
+        ),
+        margin=dict(l=0, r=0, t=0, b=30),
+    )
+    # add annotation
+    fig.add_annotation(dict(font=dict(color='black',size=15),
+        x=-30,
+        y=50,
+        showarrow=False,
+        text=f"Pitch: {int(pitch)} - Yaw: {int(yaw)} - Roll: {int(roll)}",
+        textangle=0,
+        xanchor='left',
+        xref="paper",
+        yref="paper"))
+    return fig
+def draw_2D_axes(img, roll, pitch, yaw, tdx=None, tdy=None, size=150.):
+    # Input is a cv2 image
+    # pose_params: (pitch, yaw, roll, tdx, tdy)
+    # Where (tdx, tdy) is the translation of the face.
+    # For pose we have [pitch yaw roll tdx tdy tdz scale_factor]
+    p = pitch * np.pi / 180
+    y = (yaw * np.pi / 180)
+    r = -roll * np.pi / 180
+    if tdx != None and tdy != None:
+        face_x = tdx - 0.50 * size
+        face_y = tdy - 0.50 * size
+    else:
+        height, width = img.shape[:2]
+        face_x = width / 2 - 0.5 * size
+        face_y = height / 2 - 0.5 * size
+    x1 = size * (cos(y) * cos(r)) + face_x
+    y1 = size * (cos(p) * sin(r) + cos(r) * sin(p) * sin(y)) + face_y
+    x2 = size * (-cos(y) * sin(r)) + face_x
+    y2 = size * (cos(p) * cos(r) - sin(p) * sin(y) * sin(r)) + face_y
+    x3 = size * (sin(y)) + face_x
+    y3 = size * (-cos(y) * sin(p)) + face_y
+    # Draw base in red
+    cv2.line(img, (int(face_x), int(face_y)), (int(x1),int(y1)),(0,0,255),3)
+    cv2.line(img, (int(face_x), int(face_y)), (int(x2),int(y2)),(0,0,255),3)
+    cv2.line(img, (int(x2), int(y2)), (int(x2+x1-face_x),int(y2+y1-face_y)),(0,0,255),3)
+    cv2.line(img, (int(x1), int(y1)), (int(x1+x2-face_x),int(y1+y2-face_y)),(0,0,255),3)
+    # Draw pillars in blue
+    cv2.line(img, (int(face_x), int(face_y)), (int(x3),int(y3)),(255,0,0),2)
+    cv2.line(img, (int(x1), int(y1)), (int(x1+x3-face_x),int(y1+y3-face_y)),(255,0,0),2)
+    cv2.line(img, (int(x2), int(y2)), (int(x2+x3-face_x),int(y2+y3-face_y)),(255,0,0),2)
+    cv2.line(img, (int(x2+x1-face_x),int(y2+y1-face_y)), (int(x3+x1+x2-2*face_x),int(y3+y2+y1-2*face_y)),(255,0,0),2)
+    # Draw top in green
+    cv2.line(img, (int(x3+x1-face_x),int(y3+y1-face_y)), (int(x3+x1+x2-2*face_x),int(y3+y2+y1-2*face_y)),(0,255,0),2)
+    cv2.line(img, (int(x2+x3-face_x),int(y2+y3-face_y)), (int(x3+x1+x2-2*face_x),int(y3+y2+y1-2*face_y)),(0,255,0),2)
+    cv2.line(img, (int(x3), int(y3)), (int(x3+x1-face_x),int(y3+y1-face_y)),(0,255,0),2)
+    cv2.line(img, (int(x3), int(y3)), (int(x3+x2-face_x),int(y3+y2-face_y)),(0,255,0),2)
+    return img