ev2hands

Sleeping

App Files Files Community

chris10 commited on Jan 6, 2024

Commit

15bc41b

1 Parent(s): e6c4101

init

Browse files

Files changed (16) hide show

.gitignore +162 -0
Dockerfile +1 -6
README.md +0 -1
app.py +25 -14
arg_parser.py +72 -0
demo.py +263 -0
model/TEHNet.py +208 -0
model/__init__.py +1 -0
model/model.py +64 -0
model/pointnet2_utils.py +315 -0
model/utils.py +42 -0
record.py +20 -0
requirements.txt +5 -0
settings.py +45 -0
test.py +93 -0
vis.py +13 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+src/Ev2Hands/outputs
+src/HandSimulator/logs

Dockerfile CHANGED Viewed

@@ -15,9 +15,4 @@ RUN pip install --no-cache-dir --upgrade -r requirements.txt
 RUN cd esim_py && pip install .
-EXPOSE 8501
-# CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
-# streamlit run app.py
-CMD ["streamlit", "run", "app.py", "0.0.0.0", "--port", "7860"]


15
16	RUN cd esim_py && pip install .
17
18	+ CMD ["python3", "app.py"]

README.md CHANGED Viewed

@@ -6,7 +6,6 @@ colorTo: indigo
 sdk: docker
 pinned: false
 license: cc-by-4.0
-app_port: 8501
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk: docker
 pinned: false
 license: cc-by-4.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,14 +1,25 @@
-import cv2
-import streamlit as st
-st.title("Webcam Live Feed")
-run = st.checkbox('Run')
-FRAME_WINDOW = st.image([])
-camera = cv2.VideoCapture(0)
-while run:
-    _, frame = camera.read()
-    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    FRAME_WINDOW.image(frame)
-else:
-    st.write('Stopped')

+import gradio as gr
+import requests
+import gradio as gr
+import os
+def video_identity(video):
+    print(video)
+    return video
+demo = gr.Interface(video_identity,
+                    gr.Video(),
+                    "playable_video",
+                    examples=[
+                        os.path.join(os.path.dirname(__file__),
+                                     "example/video.mp4")],
+                    cache_examples=True)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

arg_parser.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import argparse
+import os
+def demo():
+    parser = argparse.ArgumentParser(description='Demo for Ev2Hands')
+    parser.add_argument('--batch_size', dest='batch_size', required=False,
+                        help='Set the batch_size (default: 128)', default='32')
+    parser.add_argument('--checkpoint_path', dest='checkpoint', required=False,
+                        help='path of checkpoint_path', default='./savedmodels/best_model_state_dict.pth')
+    args = parser.parse_args()
+    os.environ['CHECKPOINT_PATH'] = args.checkpoint
+    os.environ['BATCH_SIZE'] = args.batch_size
+    return args
+def evaluate():
+    parser = argparse.ArgumentParser(description='Evaluation of Ev2Hands')
+    parser.add_argument('--batch_size', dest='batch_size', required=False,
+                        help='Set the batch_size (default: 128)', default='128')
+    parser.add_argument('--checkpoint_path', dest='checkpoint', required=False,
+                        help='path of checkpoint',
+                        default='./savedmodels/best_model_state_dict.pth')
+    args = parser.parse_args()
+    os.environ['CHECKPOINT_PATH'] = args.checkpoint
+    os.environ['BATCH_SIZE'] = args.batch_size
+    return args
+def train():
+    parser = argparse.ArgumentParser(description='Trainer of Ev2Hands')
+    parser.add_argument('--batch_size', dest='batch_size', required=False,
+                        help='Set the batch_size (default: 8)', default='8')
+    parser.add_argument('--checkpoint_path', dest='checkpoint', required=False,
+                        help='path of checkpoint', default='')
+    args = parser.parse_args()
+    os.environ['CHECKPOINT_PATH'] = args.checkpoint
+    os.environ['BATCH_SIZE'] = args.batch_size
+    return args
+def finetune():
+    parser = argparse.ArgumentParser(description='FineTuner of Ev2Hands for real data')
+    parser.add_argument('--batch_size', dest='batch_size', required=False,
+                        help='Set the batch_size (default: 8)', default='8')
+    parser.add_argument('--checkpoint_path', dest='checkpoint', required=False,
+                        help='path of checkpoint',
+                        default='./savedmodels/best_model_state_dict.pth')
+    args = parser.parse_args()
+    os.environ['CHECKPOINT_PATH'] = args.checkpoint
+    os.environ['BATCH_SIZE'] = args.batch_size
+    return args

demo.py ADDED Viewed

	@@ -0,0 +1,263 @@

+import sys
+import os
+os.environ['ERPC'] = '1'
+import esim_py
+import torch
+import cv2
+import time
+import pyrender
+import numpy as np
+import trimesh
+import arg_parser
+from model import TEHNetWrapper
+from settings import OUTPUT_HEIGHT, OUTPUT_WIDTH, MAIN_CAMERA, REAL_TEST_DATA_PATH
+def pc_normalize(pc):
+    pc[:, 0] /= OUTPUT_WIDTH
+    pc[:, 1] /= OUTPUT_HEIGHT
+    pc[:, :2] = 2 * pc[:, :2] - 1
+    ts = pc[:, 2:]
+    t_max = ts.max(0).values
+    t_min = ts.min(0).values
+    ts = (2 * ((ts - t_min) / (t_max - t_min))) - 1
+    pc[:, 2:] = ts
+    return pc
+def process_events(events):
+    n_events = 2048
+    events[:, 2] -= events[0, 2] # normalize ts
+    event_grid = np.zeros((OUTPUT_HEIGHT, OUTPUT_WIDTH, 3), dtype=np.float32)
+    count_grid = np.zeros((OUTPUT_HEIGHT, OUTPUT_WIDTH), dtype=np.float32)
+    x, y, t, p = events.T
+    x, y = x.astype(dtype=np.int32), y.astype(dtype=np.int32)
+    np.add.at(event_grid, (y, x, 0), t)
+    np.add.at(event_grid, (y, x, 1), p == 1)
+    np.add.at(event_grid, (y, x, 2), p != 1)
+    np.add.at(count_grid, (y, x), 1)
+    yi, xi = np.nonzero(count_grid)
+    t_avg = event_grid[yi, xi, 0] / count_grid[yi, xi]
+    p_evn = event_grid[yi, xi, 1]
+    n_evn = event_grid[yi, xi, 2]
+    events = np.hstack([xi[:, None], yi[:, None], t_avg[:, None], p_evn[:, None], n_evn[:, None]])
+    sampled_indices = np.random.choice(events.shape[0], n_events)
+    events = events[sampled_indices]
+    events = torch.tensor(events, dtype=torch.float32)
+    coordinates = np.zeros((events.shape[0], 2))
+    event_frame = np.zeros((OUTPUT_HEIGHT, OUTPUT_WIDTH, 3), dtype=np.uint8)
+    for idx, (x, y, t_avg, p_evn, n_evn) in enumerate(events):
+        y, x = y.int(), x.int()
+        coordinates[idx] = (y, x)
+        event_frame[y, x, 0] = (p_evn / (p_evn + n_evn)) * 255
+        event_frame[y, x, -1] = (n_evn / (p_evn + n_evn)) * 255
+    events[:, :3] = pc_normalize(events[:, :3])
+    hand_data = {
+        'event_frame': torch.tensor(event_frame, dtype=torch.uint8),
+        'events': events.permute(1, 0).unsqueeze(0),
+        'coordinates': torch.tensor(coordinates, dtype=torch.float32)
+    }
+    return hand_data
+def demo(net, device, data):
+    net.eval()
+    events = data['events']
+    events = events.to(device=device, dtype=torch.float32)
+    start_time = time.time()
+    with torch.no_grad():
+        outputs = net(events)
+    end_time = time.time()
+    N = events.shape[0]
+    print(end_time - start_time)
+    outputs['class_logits'] = outputs['class_logits'].softmax(1).argmax(1).int().cpu()
+    frames = list()
+    for idx in range(N):
+        hands = dict()
+        hands['left'] = {
+            'vertices': outputs['left']['vertices'][idx].cpu(),
+            'j3d': outputs['left']['j3d'][idx].cpu(),
+        }
+        hands['right'] = {
+            'vertices': outputs['right']['vertices'][idx].cpu(),
+            'j3d': outputs['right']['j3d'][idx].cpu(),
+        }
+        coordinates = data['coordinates']
+        seg_mask = np.zeros((OUTPUT_HEIGHT, OUTPUT_WIDTH, 3), dtype=np.uint8)
+        for edx, (y, x) in enumerate(coordinates):
+            y, x = y.int(), x.int()
+            cid = outputs['class_logits'][idx][edx]
+            if cid == 3:
+                seg_mask[y, x] = 255
+            else:
+                seg_mask[y, x, cid] = 255
+        hands['seg_mask'] = seg_mask
+        frames.append(hands)
+    return frames
+def main():
+    arg_parser.demo()
+    os.makedirs('outputs', exist_ok=True)
+    device = torch.device('cpu')
+    net = TEHNetWrapper(device=device)
+    save_path = os.environ['CHECKPOINT_PATH']
+    batch_size = int(os.environ['BATCH_SIZE'])
+    checkpoint = torch.load(save_path, map_location=device)
+    net.load_state_dict(checkpoint['state_dict'], strict=True)
+    renderer = pyrender.OffscreenRenderer(viewport_width=OUTPUT_WIDTH, viewport_height=OUTPUT_HEIGHT)
+    scene = pyrender.Scene(ambient_light=(0.3, 0.3, 0.3))
+    light = pyrender.DirectionalLight(color=[1.0, 1.0, 1.0], intensity=0.8)
+    light_pose = np.eye(4)
+    light_pose[:3, 3] = np.array([0, -1, 1])
+    scene.add(light, pose=light_pose)
+    light_pose[:3, 3] = np.array([0, 1, 1])
+    scene.add(light, pose=light_pose)
+    light_pose[:3, 3] = np.array([1, 1, 2])
+    scene.add(light, pose=light_pose)
+    rot = trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0])
+    mano_hands = net.hands
+    # camera = cv2.VideoCapture(0)
+    input_video_stream = cv2.VideoCapture('video.mp4')
+    video_fps = 25
+    video = cv2.VideoWriter('outputs/video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (3 * OUTPUT_WIDTH, OUTPUT_HEIGHT))
+    POS_THRESHOLD = 0.5
+    NEG_THRESHOLD = 0.5
+    REF_PERIOD = 0.000
+    esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
+    fps = cv2.CAP_PROP_FPS
+    ts_s = 1 / fps
+    ts_ns = ts_s * 1e9 # convert s to ns
+    is_init = False
+    idx = 0
+    while True:
+        _, frame_bgr = input_video_stream.read()
+        frame_bgr = cv2.resize(frame_bgr, (OUTPUT_WIDTH, OUTPUT_HEIGHT))
+        frame_gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+        frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
+        height, width = frame_log.shape[:2]
+        current_ts_ns = idx * ts_ns
+        if not is_init:
+            esim.init(frame_log, current_ts_ns)
+            is_init = True
+            idx += 1
+            continue
+        idx += 1
+        events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
+        data = process_events(events)
+        event_frame = data['event_frame'].cpu().numpy().astype(dtype=np.uint8)
+        cv2.imwrite(f"outputs/event_frame_{idx}.png", event_frame)
+        print(idx, event_frame.shape)
+        frame = demo(net=net, device=device, data=data)[0]
+        seg_mask = frame['seg_mask']
+        pred_meshes = list()
+        for hand_type in ['left', 'right']:
+            faces = mano_hands[hand_type].faces
+            pred_mesh = trimesh.Trimesh(frame[hand_type]['vertices'].cpu().numpy() * 1000, faces)
+            pred_mesh.visual.vertex_colors = [255, 0, 0]
+            pred_meshes.append(pred_mesh)
+        pred_meshes = trimesh.util.concatenate(pred_meshes)
+        pred_meshes.apply_transform(rot)
+        camera = MAIN_CAMERA
+        nc = pyrender.Node(camera=camera, matrix=np.eye(4))
+        scene.add_node(nc)
+        mesh_node = pyrender.Node(mesh=pyrender.Mesh.from_trimesh(pred_meshes))
+        scene.add_node(mesh_node)
+        pred_rgb, depth = renderer.render(scene)
+        scene.remove_node(mesh_node)
+        scene.remove_node(nc)
+        pred_rgb = cv2.cvtColor(pred_rgb, cv2.COLOR_RGB2BGR)
+        pred_rgb[pred_rgb == 255] = 0
+        img_stack = np.hstack([event_frame, seg_mask, pred_rgb])
+        video.write(img_stack)
+        cv2.imshow('image', img_stack)
+        c = cv2.waitKey(1)
+        if c == ord('q'):
+            video.release()
+            exit(0)
+    video.release()
+if __name__ == '__main__':
+    main()

model/TEHNet.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import numpy as np
+import torch.nn as nn
+import torch
+import os
+import torch.nn.functional as F
+from .pointnet2_utils import PointNetSetAbstractionMsg, PointNetSetAbstraction, PointNetFeaturePropagation
+class AttentionBlock(nn.Module):
+    def __init__(self):
+        super(AttentionBlock, self).__init__()
+    def forward(self, key, value, query):
+        query = query.permute(0, 2, 1)
+        N, KC = key.shape[:2]
+        key = key.view(N, KC, -1)
+        N, KC = value.shape[:2]
+        value = value.view(N, KC, -1)
+        sim_map = torch.bmm(key, query)
+        sim_map = (KC ** -.5 ) * sim_map
+        sim_map = F.softmax(sim_map, dim=1)
+        context = torch.bmm(sim_map, value)
+        return context
+class MANORegressor(nn.Module):
+    def __init__(self, n_inp_features=4, n_pose_params=6, n_shape_params=10):
+        super(MANORegressor, self).__init__()
+        normal_channel = True
+        if normal_channel:
+            additional_channel = n_inp_features
+        else:
+            additional_channel = 0
+        self.normal_channel = normal_channel
+        self.sa1 = PointNetSetAbstractionMsg(128, [0.4,0.8], [64, 128], additional_channel, [[128, 128, 256], [128, 196, 256]])
+        self.sa2 = PointNetSetAbstraction(npoint=None, radius=None, nsample=None, in_channel=512 + 3, mlp=[256, 512], group_all=True)
+        self.n_pose_params = n_pose_params
+        self.n_mano_params = n_pose_params + n_shape_params
+        self.mano_regressor = nn.Sequential(
+            nn.Linear(512, 1024),
+            nn.ReLU(),
+            nn.BatchNorm1d(1024),
+            nn.Dropout(0.3),
+            nn.Linear(1024, 3 + self.n_mano_params + 3),
+        )
+    def J3dtoJ2d(self, j3d, scale):
+        B, N = j3d.shape[:2]
+        device = j3d.device
+        j2d = torch.zeros(B, N, 2, device=device)
+        j2d[:, :, 0] = scale[:, :, 0] * j3d[:, :, 0]
+        j2d[:, :, 1] = scale[:, :, 1] * j3d[:, :, 1]
+        return j2d
+    def forward(self, xyz, features, mano_hand, previous_mano_params=None):
+        device = xyz.device
+        batch_size = xyz.shape[0]
+        l0_xyz = xyz
+        l0_points = features
+        l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)
+        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
+        l2_xyz = l2_xyz.squeeze(-1)
+        l2_points = l2_points.squeeze(-1)
+        if previous_mano_params is None:
+            previous_mano_params = torch.zeros(self.n_mano_params).unsqueeze(0).expand(batch_size, -1).to(device)
+            previous_rot_trans_params = torch.zeros(6).unsqueeze(0).expand(batch_size, -1).to(device)
+        mano_params = self.mano_regressor(l2_points)
+        global_orient = mano_params[:, :3]
+        hand_pose = mano_params[:, 3:3+self.n_pose_params]
+        betas = mano_params[:, 3+self.n_pose_params:-3]
+        transl = mano_params[:, -3:]
+        device = mano_hand.shapedirs.device
+        mano_args = {
+            'global_orient': global_orient.to(device),
+            'hand_pose'    : hand_pose.to(device),
+            'betas'        : betas.to(device),
+            'transl'       : transl.to(device),
+        }
+        mano_outs = dict()
+        output = mano_hand(**mano_args)
+        mano_outs['vertices'] = output.vertices
+        mano_outs['j3d'] = output.joints
+        mano_outs.update(mano_args)
+        if not self.training:
+            mano_outs['faces'] = np.tile(mano_hand.faces, (batch_size, 1, 1))
+        return mano_outs
+class TEHNet(nn.Module):
+    def __init__(self, n_pose_params, num_classes=4):
+        super(TEHNet, self).__init__()
+        normal_channel = True
+        if normal_channel:
+            additional_channel = 1 + int(os.getenv('ERPC', 0))
+        else:
+            additional_channel = 0
+        self.normal_channel = normal_channel
+        self.sa1 = PointNetSetAbstractionMsg(512, [0.1, 0.2, 0.4], [32, 64, 128], 3+additional_channel, [[32, 32, 64], [64, 64, 128], [64, 96, 128]])
+        self.sa2 = PointNetSetAbstractionMsg(128, [0.4,0.8], [64, 128], 128+128+64, [[128, 128, 256], [128, 196, 256]])
+        self.sa3 = PointNetSetAbstraction(npoint=None, radius=None, nsample=None, in_channel=512 + 3, mlp=[256, 512, 1024], group_all=True)
+        self.fp3 = PointNetFeaturePropagation(in_channel=1536, mlp=[256, 256])
+        self.fp2 = PointNetFeaturePropagation(in_channel=576, mlp=[256, 128])
+        self.fp1 = PointNetFeaturePropagation(128, [128, 128, 256])
+        self.classifier = nn.Sequential(
+            nn.Conv1d(256, 256, 1),
+            nn.ReLU(),
+            nn.BatchNorm1d(256),
+            nn.Dropout(0.3),
+            nn.Conv1d(256, num_classes, 1)
+        )
+        self.attention_block = AttentionBlock()
+        self.left_mano_regressor = MANORegressor(n_pose_params=n_pose_params)
+        self.right_mano_regressor = MANORegressor(n_pose_params=n_pose_params)
+        self.mhlnes = int(os.getenv('MHLNES', 0))
+        self.left_query_conv = nn.Sequential(
+            nn.Conv1d(256, 256, 3, 1, 3//2),
+            nn.ReLU(),
+            nn.BatchNorm1d(256),
+            nn.Dropout(0.1),
+            nn.Conv1d(256, 256, 3, 1, 3//2),
+            nn.BatchNorm1d(256),
+        )
+        self.right_query_conv = nn.Sequential(
+            nn.Conv1d(256, 256, 3, 1, 3//2),
+            nn.ReLU(),
+            nn.BatchNorm1d(256),
+            nn.Dropout(0.1),
+            nn.Conv1d(256, 256, 3, 1, 3//2),
+            nn.BatchNorm1d(256),
+        )
+    def forward(self, xyz, mano_hands):
+        device = xyz.device
+        # Set Abstraction layers
+        l0_points = xyz
+        l0_xyz = xyz[:, :3, :]
+        if self.mhlnes:
+            l0_xyz[:, -1, :] = xyz[:, 3:, :].mean(1)
+        l1_xyz, l1_points = self.sa1(l0_xyz, l0_points)
+        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)
+        l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)
+        # Feature Propagation layers
+        l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)
+        l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)
+        l0_points = self.fp1(l0_xyz, l1_xyz, None, l1_points)
+        seg_out = self.classifier(l0_points)
+        feat_fuse = l0_points
+        left_hand_features = self.attention_block(seg_out, feat_fuse, self.left_query_conv(feat_fuse))
+        right_hand_features = self.attention_block(seg_out, feat_fuse, self.right_query_conv(feat_fuse))
+        left = self.left_mano_regressor(l0_xyz, left_hand_features, mano_hands['left'])
+        right = self.right_mano_regressor(l0_xyz, right_hand_features, mano_hands['right'])
+        return {'class_logits': seg_out, 'left': left, 'right': right}
+def main():
+    net = TEHNet(n_pose_params=6)
+    points = torch.rand(4, 4, 128)
+    net(points)
+if __name__ == '__main__':
+    main()

model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import TEHNetWrapper

model/model.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import torch
+import torch.nn as nn
+import trimesh
+import numpy as np
+from .TEHNet import TEHNet
+from .utils import create_mano_layers
+from settings import MANO_PATH, MANO_CMPS
+class TEHNetWrapper():
+    def state_dict(self):
+        return self.net.state_dict()
+    def load_state_dict(self, params, *args, **kwargs):
+        modified_params = dict()
+        for k, v in params.items():
+            if k.startswith('module.'):
+                k = k[len('module.'):]
+            modified_params[k] = v
+        self.net.load_state_dict(modified_params, *args, **kwargs)
+    def parameters(self):
+        return self.net.parameters()
+    def train(self):
+        self.training = True
+        return self.net.train()
+    def eval(self):
+        self.training = False
+        return self.net.eval()
+    def P3dtoP2d(self, j3d, scale, translation):
+        B, N = j3d.shape[:2]
+        homogeneous_j3d = torch.cat([j3d, torch.ones(B, N, 1, device=j3d.device)], 2)
+        homogeneous_j3d = homogeneous_j3d @ self.rot.detach()
+        translation = translation.unsqueeze(1)
+        scale = scale.unsqueeze(1)
+        j2d = torch.zeros(B, N, 2, device=j3d.device)
+        j2d[:, :, 0] = translation[:, :, 0] + scale[:, :, 0] * homogeneous_j3d[:, :, 0]
+        j2d[:, :, 1] = translation[:, :, 1] + scale[:, :, 1] * homogeneous_j3d[:, :, 1]
+        return j2d
+    def __init__(self, device):
+        net = TEHNet(n_pose_params=MANO_CMPS).to(device)
+        self.net = net
+        self.training = False
+        self.hands = create_mano_layers(MANO_PATH, device, MANO_CMPS)
+        self.rot = torch.tensor(trimesh.transformations.rotation_matrix(np.radians(180), [1, 0, 0]), device=device).float()
+    def __call__(self, inp):
+        outputs = self.net(inp, self.hands)
+        return outputs

model/pointnet2_utils.py ADDED Viewed

	@@ -0,0 +1,315 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from time import time
+import numpy as np
+def timeit(tag, t):
+    print("{}: {}s".format(tag, time() - t))
+    return time()
+def pc_normalize(pc):
+    l = pc.shape[0]
+    centroid = np.mean(pc, axis=0)
+    pc = pc - centroid
+    m = np.max(np.sqrt(np.sum(pc**2, axis=1)))
+    pc = pc / m
+    return pc
+def square_distance(src, dst):
+    """
+    Calculate Euclid distance between each two points.
+    src^T * dst = xn * xm + yn * ym + zn * zm；
+    sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn;
+    sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm;
+    dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2
+         = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst
+    Input:
+        src: source points, [B, N, C]
+        dst: target points, [B, M, C]
+    Output:
+        dist: per-point square distance, [B, N, M]
+    """
+    B, N, _ = src.shape
+    _, M, _ = dst.shape
+    dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
+    dist += torch.sum(src ** 2, -1).view(B, N, 1)
+    dist += torch.sum(dst ** 2, -1).view(B, 1, M)
+    return dist
+def index_points(points, idx):
+    """
+    Input:
+        points: input points data, [B, N, C]
+        idx: sample index data, [B, S]
+    Return:
+        new_points:, indexed points data, [B, S, C]
+    """
+    device = points.device
+    B = points.shape[0]
+    view_shape = list(idx.shape)
+    view_shape[1:] = [1] * (len(view_shape) - 1)
+    repeat_shape = list(idx.shape)
+    repeat_shape[0] = 1
+    batch_indices = torch.arange(B, dtype=torch.long).view(view_shape).repeat(repeat_shape)
+    new_points = points[batch_indices, idx, :]
+    return new_points
+def farthest_point_sample(xyz, npoint):
+    """
+    Input:
+        xyz: pointcloud data, [B, N, 3]
+        npoint: number of samples
+    Return:
+        centroids: sampled pointcloud index, [B, npoint]
+    """
+    device = xyz.device
+    B, N, C = xyz.shape
+    centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
+    distance = torch.ones(B, N).to(device) * 1e10
+    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
+    batch_indices = torch.arange(B, dtype=torch.long).to(device)
+    for i in range(npoint):
+        centroids[:, i] = farthest
+        centroid = xyz[batch_indices, farthest, :].view(B, 1, 3)
+        dist = torch.sum((xyz - centroid) ** 2, -1)
+        mask = dist < distance
+        distance[mask] = dist[mask]
+        farthest = torch.max(distance, -1)[1]
+    return centroids
+def query_ball_point(radius, nsample, xyz, new_xyz):
+    """
+    Input:
+        radius: local region radius
+        nsample: max sample number in local region
+        xyz: all points, [B, N, 3]
+        new_xyz: query points, [B, S, 3]
+    Return:
+        group_idx: grouped points index, [B, S, nsample]
+    """
+    device = xyz.device
+    B, N, C = xyz.shape
+    _, S, _ = new_xyz.shape
+    group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
+    sqrdists = square_distance(new_xyz, xyz)
+    group_idx[sqrdists > radius ** 2] = N
+    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
+    group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
+    mask = group_idx == N
+    group_idx[mask] = group_first[mask]
+    return group_idx
+def sample_and_group(npoint, radius, nsample, xyz, points, returnfps=False):
+    """
+    Input:
+        npoint:
+        radius:
+        nsample:
+        xyz: input points position data, [B, N, 3]
+        points: input points data, [B, N, D]
+    Return:
+        new_xyz: sampled points position data, [B, npoint, nsample, 3]
+        new_points: sampled points data, [B, npoint, nsample, 3+D]
+    """
+    B, N, C = xyz.shape
+    S = npoint
+    fps_idx = farthest_point_sample(xyz, npoint) # [B, npoint, C]
+    new_xyz = index_points(xyz, fps_idx)
+    idx = query_ball_point(radius, nsample, xyz, new_xyz)
+    grouped_xyz = index_points(xyz, idx) # [B, npoint, nsample, C]
+    grouped_xyz_norm = grouped_xyz - new_xyz.view(B, S, 1, C)
+    if points is not None:
+        grouped_points = index_points(points, idx)
+        new_points = torch.cat([grouped_xyz_norm, grouped_points], dim=-1) # [B, npoint, nsample, C+D]
+    else:
+        new_points = grouped_xyz_norm
+    if returnfps:
+        return new_xyz, new_points, grouped_xyz, fps_idx
+    else:
+        return new_xyz, new_points
+def sample_and_group_all(xyz, points):
+    """
+    Input:
+        xyz: input points position data, [B, N, 3]
+        points: input points data, [B, N, D]
+    Return:
+        new_xyz: sampled points position data, [B, 1, 3]
+        new_points: sampled points data, [B, 1, N, 3+D]
+    """
+    device = xyz.device
+    B, N, C = xyz.shape
+    new_xyz = torch.zeros(B, 1, C).to(device)
+    grouped_xyz = xyz.view(B, 1, N, C)
+    if points is not None:
+        new_points = torch.cat([grouped_xyz, points.view(B, 1, N, -1)], dim=-1)
+    else:
+        new_points = grouped_xyz
+    return new_xyz, new_points
+class PointNetSetAbstraction(nn.Module):
+    def __init__(self, npoint, radius, nsample, in_channel, mlp, group_all):
+        super(PointNetSetAbstraction, self).__init__()
+        self.npoint = npoint
+        self.radius = radius
+        self.nsample = nsample
+        self.mlp_convs = nn.ModuleList()
+        self.mlp_bns = nn.ModuleList()
+        last_channel = in_channel
+        for out_channel in mlp:
+            self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1))
+            self.mlp_bns.append(nn.BatchNorm2d(out_channel))
+            last_channel = out_channel
+        self.group_all = group_all
+    def forward(self, xyz, points):
+        """
+        Input:
+            xyz: input points position data, [B, C, N]
+            points: input points data, [B, D, N]
+        Return:
+            new_xyz: sampled points position data, [B, C, S]
+            new_points_concat: sample points feature data, [B, D', S]
+        """
+        xyz = xyz.permute(0, 2, 1).contiguous()
+        if points is not None:
+            points = points.permute(0, 2, 1).contiguous()
+        if self.group_all:
+            new_xyz, new_points = sample_and_group_all(xyz, points)
+        else:
+            new_xyz, new_points = sample_and_group(self.npoint, self.radius, self.nsample, xyz, points)
+        # new_xyz: sampled points position data, [B, npoint, C]
+        # new_points: sampled points data, [B, npoint, nsample, C+D]
+        new_points = new_points.permute(0, 3, 2, 1).contiguous() # [B, C+D, nsample,npoint]
+        for i, conv in enumerate(self.mlp_convs):
+            bn = self.mlp_bns[i]
+            new_points =  F.relu(bn(conv(new_points)))
+        new_points = torch.max(new_points, 2)[0]
+        new_xyz = new_xyz.permute(0, 2, 1).contiguous()
+        return new_xyz, new_points
+class PointNetSetAbstractionMsg(nn.Module):
+    def __init__(self, npoint, radius_list, nsample_list, in_channel, mlp_list):
+        super(PointNetSetAbstractionMsg, self).__init__()
+        self.npoint = npoint
+        self.radius_list = radius_list
+        self.nsample_list = nsample_list
+        self.conv_blocks = nn.ModuleList()
+        self.bn_blocks = nn.ModuleList()
+        for i in range(len(mlp_list)):
+            convs = nn.ModuleList()
+            bns = nn.ModuleList()
+            last_channel = in_channel + 3
+            for out_channel in mlp_list[i]:
+                convs.append(nn.Conv2d(last_channel, out_channel, 1))
+                bns.append(nn.BatchNorm2d(out_channel))
+                last_channel = out_channel
+            self.conv_blocks.append(convs)
+            self.bn_blocks.append(bns)
+    def forward(self, xyz, points):
+        """
+        Input:
+            xyz: input points position data, [B, C, N]
+            points: input points data, [B, D, N]
+        Return:
+            new_xyz: sampled points position data, [B, C, S]
+            new_points_concat: sample points feature data, [B, D', S]
+        """
+        xyz = xyz.permute(0, 2, 1).contiguous()
+        if points is not None:
+            points = points.permute(0, 2, 1).contiguous()
+        B, N, C = xyz.shape
+        S = self.npoint
+        new_xyz = index_points(xyz, farthest_point_sample(xyz, S))
+        new_points_list = []
+        for i, radius in enumerate(self.radius_list):
+            K = self.nsample_list[i]
+            group_idx = query_ball_point(radius, K, xyz, new_xyz)
+            grouped_xyz = index_points(xyz, group_idx)
+            grouped_xyz -= new_xyz.view(B, S, 1, C)
+            if points is not None:
+                grouped_points = index_points(points, group_idx)
+                grouped_points = torch.cat([grouped_points, grouped_xyz], dim=-1)
+            else:
+                grouped_points = grouped_xyz
+            grouped_points = grouped_points.permute(0, 3, 2, 1).contiguous()  # [B, D, K, S]
+            for j in range(len(self.conv_blocks[i])):
+                conv = self.conv_blocks[i][j]
+                bn = self.bn_blocks[i][j]
+                grouped_points =  F.relu(bn(conv(grouped_points)))
+            new_points = torch.max(grouped_points, 2)[0]  # [B, D', S]
+            new_points_list.append(new_points)
+        new_xyz = new_xyz.permute(0, 2, 1).contiguous()
+        new_points_concat = torch.cat(new_points_list, dim=1)
+        return new_xyz, new_points_concat
+class PointNetFeaturePropagation(nn.Module):
+    def __init__(self, in_channel, mlp):
+        super(PointNetFeaturePropagation, self).__init__()
+        self.mlp_convs = nn.ModuleList()
+        self.mlp_bns = nn.ModuleList()
+        last_channel = in_channel
+        for out_channel in mlp:
+            self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
+            self.mlp_bns.append(nn.BatchNorm1d(out_channel))
+            last_channel = out_channel
+    def forward(self, xyz1, xyz2, points1, points2):
+        """
+        Input:
+            xyz1: input points position data, [B, C, N]
+            xyz2: sampled input points position data, [B, C, S]
+            points1: input points data, [B, D, N]
+            points2: input points data, [B, D, S]
+        Return:
+            new_points: upsampled points data, [B, D', N]
+        """
+        xyz1 = xyz1.permute(0, 2, 1).contiguous()
+        xyz2 = xyz2.permute(0, 2, 1).contiguous()
+        points2 = points2.permute(0, 2, 1).contiguous()
+        B, N, C = xyz1.shape
+        _, S, _ = xyz2.shape
+        if S == 1:
+            interpolated_points = points2.repeat(1, N, 1)
+        else:
+            dists = square_distance(xyz1, xyz2)
+            dists, idx = dists.sort(dim=-1)
+            dists, idx = dists[:, :, :3], idx[:, :, :3]  # [B, N, 3]
+            dist_recip = 1.0 / (dists + 1e-8)
+            norm = torch.sum(dist_recip, dim=2, keepdim=True)
+            weight = dist_recip / norm
+            interpolated_points = torch.sum(index_points(points2, idx) * weight.view(B, N, 3, 1), dim=2)
+        if points1 is not None:
+            points1 = points1.permute(0, 2, 1).contiguous()
+            new_points = torch.cat([points1, interpolated_points], dim=-1)
+        else:
+            new_points = interpolated_points
+        new_points = new_points.permute(0, 2, 1).contiguous()
+        for i, conv in enumerate(self.mlp_convs):
+            bn = self.mlp_bns[i]
+            new_points = F.relu(bn(conv(new_points)))
+        return new_points

model/utils.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import torch
+import torch.nn as nn
+import torch.nn.parallel
+import torch.utils.data
+from torch.autograd import Variable
+import numpy as np
+import torch.nn.functional as F
+from manopth.manolayer import ManoLayer
+def create_mano_layers(mano_path, device, n_cmps):
+    class Output:
+        def __init__(self, vertices, joints):
+            self.vertices = vertices
+            self.joints = joints
+    class SmplxAdapter:
+        def __init__(self, side):
+            self.m = ManoLayer(mano_root=f'{mano_path}/mano', use_pca=True, ncomps=n_cmps, side=side, flat_hand_mean=False, robust_rot=True).to(device)
+            self.faces = self.m.th_faces.cpu().numpy()
+            self.shapedirs = self.m.th_shapedirs
+        def __call__(self, global_orient, hand_pose, betas, transl):
+            vertices, joints = self.m(torch.cat([global_orient, hand_pose], 1), betas, transl)
+            vertices /= 1000
+            joints /= 1000
+            return Output(vertices, joints)
+    mano_layer = {
+            'left': SmplxAdapter(side='left'),
+            'right': SmplxAdapter(side='right')
+            }
+    if torch.sum(torch.abs(mano_layer['left'].m.th_shapedirs[:,0,:] - mano_layer['right'].m.th_shapedirs[:,0,:])) < 1:
+        print('Fix th_shapedirs bug of MANO')
+        mano_layer['left'].m.th_shapedirs[:,0,:] *= -1
+    return mano_layer

record.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import cv2
+camera = cv2.VideoCapture(0)
+fps = cv2.CAP_PROP_FPS
+video = cv2.VideoWriter('video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, (640, 480))
+while True:
+    _, frame = camera.read()
+    video.write(frame)
+    cv2.imshow("Frame", frame)
+    c = cv2.waitKey(1)
+    if c == ord('q'):
+        break
+video.release()
+camera.release()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+opencv-python
+git+https://github.com/hassony2/manopth
+pyrender
+git+https://github.com/mattloper/chumpy.git
+gradio

settings.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+if os.name != 'nt': os.environ["PYOPENGL_PLATFORM"] = "egl"
+import pyrender
+import numpy as np
+import os
+import platform
+ESIM_REFRACTORY_PERIOD_NS = 0
+ESIM_POSITIVE_THRESHOLD = 0.4
+ESIM_NEGATIVE_THRESHOLD = 0.4
+RENDER_SMPLX = False
+AUGMENTATED_SEQUENCE = True
+NUMBER_OF_AUGMENTATED_SEQUENCES = 10
+SIMULATOR_FPS = 1000                         # fps for event generation using ESIM
+INTERPOLATION_FPS = 30
+OUTPUT_WIDTH = 346
+OUTPUT_HEIGHT = 260
+LNES_WINDOW_MS = 5
+INTERHAND_ROOT_PATH = '/CT/datasets01/static00/InterHand2.6m/InterHand2.6M_5fps_batch1'
+ROOT_TRAIN_DATA_PATH = '/CT/datasets07/nobackup/Ev2Hands/Ev2Hands-S'
+REAL_TRAIN_DATA_PATH = '/CT/datasets07/nobackup/Ev2Hands/Ev2Hands-R/train_data'
+REAL_TEST_DATA_PATH = '/CT/datasets07/nobackup/Ev2Hands/Ev2Hands-R/test_data'
+DATA_PATH = '../data'
+MANO_PATH = 'data/models'
+GENERATION_MODE = str(os.getenv('GENERATION_MODE', 'train'))
+MANO_CMPS = 6
+SEGMENTAION_COLOR = {'left': [0, 1, 0], 'right': [0, 0, 1]}
+MAIN_CAMERA = pyrender.PerspectiveCamera(yfov=np.deg2rad(30), aspectRatio=OUTPUT_WIDTH / OUTPUT_HEIGHT)
+PROJECTION_MATRIX = MAIN_CAMERA.get_projection_matrix(OUTPUT_WIDTH, OUTPUT_HEIGHT)
+HAND_COLOR = [198/255, 134/255, 66/255]

test.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import numpy as np
+import cv2
+import esim_py
+# camera = cv2.VideoCapture(0)
+camera = cv2.VideoCapture('video.mp4')
+POS_THRESHOLD = 0.5
+NEG_THRESHOLD = 0.5
+REF_PERIOD = 0.000
+esim = esim_py.EventSimulator(POS_THRESHOLD, NEG_THRESHOLD, REF_PERIOD, 1e-4, True)
+# # generate events from list of images and timestamps
+# events_list_of_images = esim.generateFromStampedImageSequence(
+#     list_of_image_files,   # list of absolute paths to images
+#     list_of_timestamps     # list of timestamps in ascending order
+# )
+fps = cv2.CAP_PROP_FPS
+ts_s = 1 / fps
+ts_ns = ts_s * 1e9 # convert s to ns
+is_init = False
+idx = 0
+while True:
+    _, frame_bgr = camera.read()
+    frame_gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY)
+    frame_log = np.log(frame_gray.astype("float32") / 255 + 1e-4)
+    height, width = frame_log.shape[:2]
+    current_ts_ns = idx * ts_ns
+    if not is_init:
+        esim.init(frame_log, current_ts_ns)
+        is_init = True
+        idx += 1
+        continue
+    events = esim.generateEventFromCVImage(frame_log, current_ts_ns)
+    x, y, t, p = events.T
+    t = t.astype(dtype=np.float32) * 1e-6 # convert ns to milliseconds
+    last_timestamp = t[-1]
+    event_frame = np.zeros((height, width, 3), dtype=np.float32)
+    x = x.astype(dtype=np.int32)
+    y = y.astype(dtype=np.int32)
+    p = p.astype(dtype=np.int32)
+    print(idx, events.shape)
+    if last_timestamp <= 0:
+        continue
+    event_frame[y, x, 1 - p] = (last_timestamp - t) / (last_timestamp - t[0])
+    event_frame *= 255
+    event_frame = event_frame.astype(dtype=np.uint8)
+    stack = np.hstack([frame_bgr, event_frame])
+    cv2.imwrite(f"outputs/stack_{idx}.png", stack)
+    # cv2.imwrite("frame.png", frame_bgr)
+    # input(idx)
+    #     #     t, x, y, p = event
+    #     # x, y = x.astype(dtype=np.int32), y.astype(dtype=np.int32)
+    #     # events = np.hstack([x[..., None], y[..., None], t[..., None], p[..., None]])
+    #     # event_labels = segmentation[y, x].astype(dtype=np.uint8)
+    #     # write_frame = False
+    #     # show_frame = False
+    #     # if write_frame or show_frame:
+    #     #     ts, xs, ys, ps = event
+    #     #     h, w = frame_color.shape[:2]
+    #     #     event_bgr = np.zeros((h, w, 3), dtype=np.uint8)
+    #     #     for x, y, p in zip(xs, ys, ps):
+    #     #         event_bgr[y, x, 0 if p == -1 else 2] = 255
+    #     #     image_path = image_paths[frame_keys[frame_index]]
+    #     #     rgb_image = cv2.imread(image_path)
+    # cv2.imshow("Frame", frame)
+    # cv2.waitKey(1)
+    idx += 1

vis.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import cv2
+import os
+files = os.listdir("outputs")
+n_files = len(files)
+for i in range(1, n_files):
+    frame = cv2.imread(f"outputs/stack_{i}.png")
+    cv2.imshow("Frame", frame)
+    c = cv2.waitKey(1)
+    if c == ord('q'):
+        break