Spaces:
Running
Running
from pathlib import Path | |
import json | |
from typing import Dict, Optional, List, Tuple | |
from collections import defaultdict | |
import streamlit as st | |
from streamlit.runtime.uploaded_file_manager import UploadedFile | |
import numpy as np | |
from pose_format import Pose | |
from pose_format.utils.generic import pose_hide_legs, reduce_holistic | |
from pose_format.pose_visualizer import PoseVisualizer | |
from pyzstd import decompress | |
from PIL import Image | |
import mediapipe as mp | |
mp_holistic = mp.solutions.holistic | |
FACEMESH_CONTOURS_POINTS = [ | |
str(p) | |
for p in sorted( | |
set([p for p_tup in list(mp_holistic.FACEMESH_CONTOURS) for p in p_tup]) | |
) | |
] | |
COMPONENT_SELECTION_METHODS = ["manual", "signclip", "youtube-asl", "reduce_holistic"] | |
def download_json(data): | |
json_data = json.dumps(data) | |
json_bytes = json_data.encode('utf-8') | |
return json_bytes | |
def get_points_dict_and_components_with_index_list( | |
pose: Pose, landmark_indices: List[int], components_to_include: Optional[List[str]] | |
) -> Tuple[List[str], Dict[str, List[str]]]: | |
"""Used to get components/points if you only have a list of indices, | |
e.g. listed in a research paper like YouTube-ASL. | |
If you want to also explicitly specify component names, you can. | |
So for example, to get the two hands and the nose you could do the following: | |
c_names, points_dict = get_points_dict_and_components_with_index_list(pose, | |
landmark_indices=[0] # which is "NOSE" within POSE_LANDMARKS components | |
components_to_include=["LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS] | |
) | |
then you can just use get_components | |
filtered_pose = pose.get_components(c_names, points_dict) | |
""" | |
components_to_get = [] | |
points_dict = defaultdict(list) | |
for c in pose.header.components: | |
for point_name in c.points: | |
point_index = pose.header.get_point_index(c.name, point_name) | |
if point_index in landmark_indices: | |
components_to_get.append(c.name) | |
points_dict[c.name].append(point_name) | |
# print(f"Point with index {point_index} has name {c.name}:{point_name}") | |
if components_to_include: | |
components_to_get.extend(components_to_include) | |
components_to_get = list(set(components_to_get)) | |
# print("*********************") | |
# print(components_to_get) | |
# print(points_dict) | |
return components_to_get, points_dict | |
# @st.cache_data(hash_funcs={UploadedFile: lambda p: str(p.name)}) | |
def load_pose(uploaded_file: UploadedFile) -> Pose: | |
# with input_path.open("rb") as f_in: | |
if uploaded_file.name.endswith(".zst"): | |
return Pose.read(decompress(uploaded_file.read())) | |
else: | |
return Pose.read(uploaded_file.read()) | |
def get_pose_frames(pose: Pose, transparency: bool = False): | |
v = PoseVisualizer(pose) | |
frames = [frame_data for frame_data in v.draw()] | |
if transparency: | |
cv_code = v.cv2.COLOR_BGR2RGBA | |
else: | |
cv_code = v.cv2.COLOR_BGR2RGB | |
images = [Image.fromarray(v.cv2.cvtColor(frame, cv_code)) for frame in frames] | |
return frames, images | |
def get_pose_gif( | |
pose: Pose, | |
step: int = 1, | |
start_frame: Optional[int] = None, | |
end_frame: Optional[int] = None, | |
fps: Optional[float] = None, | |
): | |
if fps is not None: | |
pose.body.fps = fps | |
v = PoseVisualizer(pose) | |
frames = [frame_data for frame_data in v.draw()] | |
frames = frames[start_frame:end_frame:step] | |
return v.save_gif(None, frames=frames) | |
st.write("# Pose-format explorer") | |
st.write( | |
"`pose-format` is a toolkit/library for 'handling, manipulation, and visualization of poses'. See [The documentation](https://pose-format.readthedocs.io/en/latest/)" | |
) | |
st.write( | |
"I made this app to help me visualize and understand the format, including different 'components' and 'points', and what they are named." | |
) | |
st.write( | |
"If you need a .pose file, here's a few:" | |
) | |
st.write("* One of [me doing a self-introduction](https://drive.google.com/file/d/1_L5sYVhONDBABuTmQUvjsl94LbFqzEyP/view?usp=sharing)") | |
st.write("* One of [me signing ASL 'HOUSE'](https://drive.google.com/file/d/1uggYqLyTA4XdDWaWsS9w5hKaPwW86IF_/view?usp=sharing)") | |
st.write( | |
"* ... or [the same file, but with the 10 extra landmarks](https://drive.google.com/file/d/1XHkfn24PIas1a3XUUXYXTX2DvYeUDuCI/view?usp=drive_link) from mediapipe holistic's [`refine_face_landmarks` option](https://github.com/sign-language-processing/pose/?tab=readme-ov-file#2-estimating-pose-from-video)" | |
) | |
uploaded_file = st.file_uploader("Upload a .pose file", type=[".pose", ".pose.zst"]) | |
if uploaded_file is not None: | |
with st.spinner(f"Loading {uploaded_file.name}"): | |
pose = load_pose(uploaded_file) | |
# st.write(pose.body.data.shape) | |
frames, images = get_pose_frames(pose=pose) | |
st.success("Done loading!") | |
st.write("### File Info") | |
with st.expander(f"Show full Pose-format header from {uploaded_file.name}"): | |
st.write(pose.header) | |
st.write(f"### Selection") | |
component_selection = st.radio( | |
"How to select components?", options=COMPONENT_SELECTION_METHODS | |
) | |
component_names = [c.name for c in pose.header.components] | |
chosen_component_names = [] | |
points_dict = {} | |
HIDE_LEGS = False | |
if component_selection == "manual": | |
chosen_component_names = st.pills( | |
"Select components to visualize", | |
options=component_names, | |
default=component_names, | |
selection_mode="multi", | |
) | |
for component in pose.header.components: | |
if component.name in chosen_component_names: | |
with st.expander(f"Points for {component.name}"): | |
selected_points = st.multiselect( | |
f"Select points for component {component.name}:", | |
options=component.points, | |
default=component.points, | |
) | |
if ( | |
selected_points != component.points | |
): # Only add entry if not all points are selected | |
points_dict[component.name] = selected_points | |
elif component_selection == "signclip": | |
st.write("Selected landmarks used for [SignCLIP](https://arxiv.org/abs/2407.01264).") | |
chosen_component_names = [ | |
"POSE_LANDMARKS", | |
"FACE_LANDMARKS", | |
"LEFT_HAND_LANDMARKS", | |
"RIGHT_HAND_LANDMARKS", | |
] | |
points_dict = {"FACE_LANDMARKS": FACEMESH_CONTOURS_POINTS} | |
elif component_selection == "reduce_holistic": | |
st.write("Using [pose_format.utils.generic.reduce_holistic](https://github.com/sign-language-processing/pose/blob/master/src/python/pose_format/utils/generic.py#L286)") | |
elif component_selection == "youtube-asl": | |
st.write("Selected landmarks used for [YouTube-ASL](https://arxiv.org/pdf/2306.15162).") | |
# https://arxiv.org/pdf/2306.15162 | |
# For each hand, we use all 21 landmark points. | |
# Colin: So that's | |
# For the pose, we use 6 landmark points, for the shoulders, elbows and hips | |
# These are indices 11, 12, 13, 14, 23, 24 | |
# For the face, we use 37 landmark points, from the eyes, eyebrows, lips, and face outline. | |
# These are indices 0, 4, 13, 14, 17, 33, 37, 39, 46, 52, 55, 61, 64, 81, 82, 93, 133, 151, 152, 159, 172, 178, | |
# 181, 263, 269, 276, 282, 285, 291, 294, 311, 323, 362, 386, 397, 468, 473. | |
# Colin: note that these are with refine_face_landmarks on, and are relative to the component itself. Working it all out the result is: | |
chosen_component_names=['POSE_LANDMARKS', 'FACE_LANDMARKS', 'LEFT_HAND_LANDMARKS', 'RIGHT_HAND_LANDMARKS'] | |
points_dict={ | |
"POSE_LANDMARKS": [ | |
"LEFT_SHOULDER", | |
"RIGHT_SHOULDER", | |
"LEFT_HIP", | |
"RIGHT_HIP", | |
"LEFT_ELBOW", | |
"RIGHT_ELBOW" | |
], | |
"FACE_LANDMARKS": [ | |
"0", | |
"4", | |
"13", | |
"14", | |
"17", | |
"33", | |
"37", | |
"39", | |
"46", | |
"52", | |
"55", | |
"61", | |
"64", | |
"81", | |
"82", | |
"93", | |
"133", | |
"151", | |
"152", | |
"159", | |
"172", | |
"178", | |
"181", | |
"263", | |
"269", | |
"276", | |
"282", | |
"285", | |
"291", | |
"294", | |
"311", | |
"323", | |
"362", | |
"386", | |
"397", | |
] | |
} | |
# check if we have the extra points from refine_face_landmarks | |
additional_face_points = ["468", "473"] | |
for additional_point in additional_face_points: | |
try: | |
point_index = pose.header.get_point_index("FACE_LANDMARKS", additional_point) | |
points_dict['FACE_LANDMARKS'].append(additional_point) | |
except ValueError: | |
# not in the list | |
# st.write(f"Point {additional_point} not in file") | |
pass | |
# Filter button logic | |
# Filter section | |
st.write("### Filter .pose File") | |
filtered = st.button("Apply Filter!") | |
if filtered: | |
st.write(f"Filtering strategy: {component_selection}") | |
if component_selection == "reduce_holistic": | |
# st.write(f"reduce_holistic:") | |
pose = reduce_holistic(pose) | |
st.write("Used pose_format.reduce_holistic") | |
else: | |
pose = pose.get_components(components=chosen_component_names, points=points_dict if points_dict else None | |
) | |
with st.expander("Show component list and points dict used for get_components"): | |
st.write("##### Component names") | |
st.write(chosen_component_names) | |
st.write("##### Points dict") | |
st.write(points_dict) | |
with st.expander("How to replicate in pose-format"): | |
st.write("##### Usage:") | |
st.write("How to achieve the same result with pose-format library") | |
# points_dict_str = json.dumps(points_dict, indent=4) | |
usage_string = f"components={chosen_component_names}\npoints_dict={points_dict}\npose = pose.get_components(components=components, points=points_dict)" | |
st.code(usage_string) | |
if HIDE_LEGS: | |
pose = pose_hide_legs(pose, remove=True) | |
st.session_state.filtered_pose = pose | |
filtered_pose = st.session_state.get("filtered_pose", pose) | |
if filtered_pose: | |
filtered_pose = st.session_state.get("filtered_pose", pose) | |
st.write("#### Filtered .pose file") | |
st.write(f"Pose data shape: {filtered_pose.body.data.shape}") | |
with st.expander("Show header"): | |
st.write(filtered_pose.header) | |
with st.expander("Show body"): | |
st.write(filtered_pose.body) | |
# with st.expander("Show data:"): | |
# for frame in filtered_pose.body.data: | |
# st.write(f"Frame:{frame}") | |
# for person in frame: | |
# st.write(person) | |
pose_file_out = Path(uploaded_file.name).with_suffix(".pose") | |
with pose_file_out.open("wb") as f: | |
pose.write(f) | |
with pose_file_out.open("rb") as f: | |
st.download_button( | |
"Download Filtered Pose", f, file_name=pose_file_out.name | |
) | |
st.write("### Visualization") | |
step = st.select_slider( | |
"Step value to select every nth image", list(range(1, len(frames))), value=1 | |
) | |
fps = st.slider( | |
"FPS for visualization", | |
min_value=1.0, | |
max_value=filtered_pose.body.fps, | |
value=filtered_pose.body.fps, | |
) | |
start_frame, end_frame = st.slider( | |
"Select Frame Range", | |
0, | |
len(frames), | |
(0, len(frames)), # Default range | |
) | |
# Visualization button logic | |
if st.button("Visualize"): | |
# Load filtered pose if it exists; otherwise, use the unfiltered pose | |
pose_bytes = get_pose_gif( | |
pose=filtered_pose, | |
step=step, | |
start_frame=start_frame, | |
end_frame=end_frame, | |
fps=fps, | |
) | |
if pose_bytes is not None: | |
st.image(pose_bytes) | |