from pathlib import Path import json from typing import Dict, Optional, List, Tuple from collections import defaultdict import streamlit as st from streamlit.runtime.uploaded_file_manager import UploadedFile import numpy as np from pose_format import Pose from pose_format.utils.generic import pose_hide_legs, reduce_holistic from pose_format.pose_visualizer import PoseVisualizer from pyzstd import decompress from PIL import Image import mediapipe as mp mp_holistic = mp.solutions.holistic FACEMESH_CONTOURS_POINTS = [ str(p) for p in sorted( set([p for p_tup in list(mp_holistic.FACEMESH_CONTOURS) for p in p_tup]) ) ] COMPONENT_SELECTION_METHODS = ["manual", "signclip", "youtube-asl", "reduce_holistic"] def download_json(data): json_data = json.dumps(data) json_bytes = json_data.encode('utf-8') return json_bytes def get_points_dict_and_components_with_index_list( pose: Pose, landmark_indices: List[int], components_to_include: Optional[List[str]] ) -> Tuple[List[str], Dict[str, List[str]]]: """Used to get components/points if you only have a list of indices, e.g. listed in a research paper like YouTube-ASL. If you want to also explicitly specify component names, you can. So for example, to get the two hands and the nose you could do the following: c_names, points_dict = get_points_dict_and_components_with_index_list(pose, landmark_indices=[0] # which is "NOSE" within POSE_LANDMARKS components components_to_include=["LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS] ) then you can just use get_components filtered_pose = pose.get_components(c_names, points_dict) """ components_to_get = [] points_dict = defaultdict(list) for c in pose.header.components: for point_name in c.points: point_index = pose.header.get_point_index(c.name, point_name) if point_index in landmark_indices: components_to_get.append(c.name) points_dict[c.name].append(point_name) # print(f"Point with index {point_index} has name {c.name}:{point_name}") if components_to_include: components_to_get.extend(components_to_include) components_to_get = list(set(components_to_get)) # print("*********************") # print(components_to_get) # print(points_dict) return components_to_get, points_dict # @st.cache_data(hash_funcs={UploadedFile: lambda p: str(p.name)}) def load_pose(uploaded_file: UploadedFile) -> Pose: # with input_path.open("rb") as f_in: if uploaded_file.name.endswith(".zst"): return Pose.read(decompress(uploaded_file.read())) else: return Pose.read(uploaded_file.read()) @st.cache_data(hash_funcs={Pose: lambda p: np.asarray(p.body.data.data)}) def get_pose_frames(pose: Pose, transparency: bool = False): v = PoseVisualizer(pose) frames = [frame_data for frame_data in v.draw()] if transparency: cv_code = v.cv2.COLOR_BGR2RGBA else: cv_code = v.cv2.COLOR_BGR2RGB images = [Image.fromarray(v.cv2.cvtColor(frame, cv_code)) for frame in frames] return frames, images def get_pose_gif( pose: Pose, step: int = 1, start_frame: Optional[int] = None, end_frame: Optional[int] = None, fps: Optional[float] = None, ): if fps is not None: pose.body.fps = fps v = PoseVisualizer(pose) frames = [frame_data for frame_data in v.draw()] frames = frames[start_frame:end_frame:step] return v.save_gif(None, frames=frames) st.write("# Pose-format explorer") st.write( "`pose-format` is a toolkit/library for 'handling, manipulation, and visualization of poses'. See [The documentation](https://pose-format.readthedocs.io/en/latest/)" ) st.write( "I made this app to help me visualize and understand the format, including different 'components' and 'points', and what they are named." ) st.write( "If you need a .pose file, here's a few:" ) st.write("* One of [me doing a self-introduction](https://drive.google.com/file/d/1_L5sYVhONDBABuTmQUvjsl94LbFqzEyP/view?usp=sharing)") st.write("* One of [me signing ASL 'HOUSE'](https://drive.google.com/file/d/1uggYqLyTA4XdDWaWsS9w5hKaPwW86IF_/view?usp=sharing)") st.write( "* ... or [the same file, but with the 10 extra landmarks](https://drive.google.com/file/d/1XHkfn24PIas1a3XUUXYXTX2DvYeUDuCI/view?usp=drive_link) from mediapipe holistic's [`refine_face_landmarks` option](https://github.com/sign-language-processing/pose/?tab=readme-ov-file#2-estimating-pose-from-video)" ) uploaded_file = st.file_uploader("Upload a .pose file", type=[".pose", ".pose.zst"]) if uploaded_file is not None: with st.spinner(f"Loading {uploaded_file.name}"): pose = load_pose(uploaded_file) # st.write(pose.body.data.shape) frames, images = get_pose_frames(pose=pose) st.success("Done loading!") st.write("### File Info") with st.expander(f"Show full Pose-format header from {uploaded_file.name}"): st.write(pose.header) st.write(f"### Selection") component_selection = st.radio( "How to select components?", options=COMPONENT_SELECTION_METHODS ) component_names = [c.name for c in pose.header.components] chosen_component_names = [] points_dict = {} HIDE_LEGS = False if component_selection == "manual": chosen_component_names = st.pills( "Select components to visualize", options=component_names, default=component_names, selection_mode="multi", ) for component in pose.header.components: if component.name in chosen_component_names: with st.expander(f"Points for {component.name}"): selected_points = st.multiselect( f"Select points for component {component.name}:", options=component.points, default=component.points, ) if ( selected_points != component.points ): # Only add entry if not all points are selected points_dict[component.name] = selected_points elif component_selection == "signclip": st.write("Selected landmarks used for [SignCLIP](https://arxiv.org/abs/2407.01264).") chosen_component_names = [ "POSE_LANDMARKS", "FACE_LANDMARKS", "LEFT_HAND_LANDMARKS", "RIGHT_HAND_LANDMARKS", ] points_dict = {"FACE_LANDMARKS": FACEMESH_CONTOURS_POINTS} elif component_selection == "reduce_holistic": st.write("Using [pose_format.utils.generic.reduce_holistic](https://github.com/sign-language-processing/pose/blob/master/src/python/pose_format/utils/generic.py#L286)") elif component_selection == "youtube-asl": st.write("Selected landmarks used for [YouTube-ASL](https://arxiv.org/pdf/2306.15162).") # https://arxiv.org/pdf/2306.15162 # For each hand, we use all 21 landmark points. # Colin: So that's # For the pose, we use 6 landmark points, for the shoulders, elbows and hips # These are indices 11, 12, 13, 14, 23, 24 # For the face, we use 37 landmark points, from the eyes, eyebrows, lips, and face outline. # These are indices 0, 4, 13, 14, 17, 33, 37, 39, 46, 52, 55, 61, 64, 81, 82, 93, 133, 151, 152, 159, 172, 178, # 181, 263, 269, 276, 282, 285, 291, 294, 311, 323, 362, 386, 397, 468, 473. # Colin: note that these are with refine_face_landmarks on, and are relative to the component itself. Working it all out the result is: chosen_component_names=['POSE_LANDMARKS', 'FACE_LANDMARKS', 'LEFT_HAND_LANDMARKS', 'RIGHT_HAND_LANDMARKS'] points_dict={ "POSE_LANDMARKS": [ "LEFT_SHOULDER", "RIGHT_SHOULDER", "LEFT_HIP", "RIGHT_HIP", "LEFT_ELBOW", "RIGHT_ELBOW" ], "FACE_LANDMARKS": [ "0", "4", "13", "14", "17", "33", "37", "39", "46", "52", "55", "61", "64", "81", "82", "93", "133", "151", "152", "159", "172", "178", "181", "263", "269", "276", "282", "285", "291", "294", "311", "323", "362", "386", "397", ] } # check if we have the extra points from refine_face_landmarks additional_face_points = ["468", "473"] for additional_point in additional_face_points: try: point_index = pose.header.get_point_index("FACE_LANDMARKS", additional_point) points_dict['FACE_LANDMARKS'].append(additional_point) except ValueError: # not in the list # st.write(f"Point {additional_point} not in file") pass # Filter button logic # Filter section st.write("### Filter .pose File") filtered = st.button("Apply Filter!") if filtered: st.write(f"Filtering strategy: {component_selection}") if component_selection == "reduce_holistic": # st.write(f"reduce_holistic:") pose = reduce_holistic(pose) st.write("Used pose_format.reduce_holistic") else: pose = pose.get_components(components=chosen_component_names, points=points_dict if points_dict else None ) with st.expander("Show component list and points dict used for get_components"): st.write("##### Component names") st.write(chosen_component_names) st.write("##### Points dict") st.write(points_dict) with st.expander("How to replicate in pose-format"): st.write("##### Usage:") st.write("How to achieve the same result with pose-format library") # points_dict_str = json.dumps(points_dict, indent=4) usage_string = f"components={chosen_component_names}\npoints_dict={points_dict}\npose = pose.get_components(components=components, points=points_dict)" st.code(usage_string) if HIDE_LEGS: pose = pose_hide_legs(pose, remove=True) st.session_state.filtered_pose = pose filtered_pose = st.session_state.get("filtered_pose", pose) if filtered_pose: filtered_pose = st.session_state.get("filtered_pose", pose) st.write("#### Filtered .pose file") st.write(f"Pose data shape: {filtered_pose.body.data.shape}") with st.expander("Show header"): st.write(filtered_pose.header) with st.expander("Show body"): st.write(filtered_pose.body) # with st.expander("Show data:"): # for frame in filtered_pose.body.data: # st.write(f"Frame:{frame}") # for person in frame: # st.write(person) pose_file_out = Path(uploaded_file.name).with_suffix(".pose") with pose_file_out.open("wb") as f: pose.write(f) with pose_file_out.open("rb") as f: st.download_button( "Download Filtered Pose", f, file_name=pose_file_out.name ) st.write("### Visualization") step = st.select_slider( "Step value to select every nth image", list(range(1, len(frames))), value=1 ) fps = st.slider( "FPS for visualization", min_value=1.0, max_value=filtered_pose.body.fps, value=filtered_pose.body.fps, ) start_frame, end_frame = st.slider( "Select Frame Range", 0, len(frames), (0, len(frames)), # Default range ) # Visualization button logic if st.button("Visualize"): # Load filtered pose if it exists; otherwise, use the unfiltered pose pose_bytes = get_pose_gif( pose=filtered_pose, step=step, start_frame=start_frame, end_frame=end_frame, fps=fps, ) if pose_bytes is not None: st.image(pose_bytes)