Spaces:

1mpreccable
/

YOGAI

Sleeping

App Files Files Community

YOGAI / yoga_position_gradio.py

1mpreccable

Upload 35 files

0ccc9b6 verified 12 months ago

raw

history blame contribute delete

9.62 kB

	import tqdm
	import cv2
	import numpy as np
	import re
	import os
	from mediapipe.python.solutions import drawing_utils as mp_drawing
	import mediapipe as mp
	from PoseClassification.pose_embedding import FullBodyPoseEmbedding
	from PoseClassification.pose_classifier import PoseClassifier
	from PoseClassification.utils import EMADictSmoothing
	# from PoseClassification.utils import RepetitionCounter
	from PoseClassification.visualize import PoseClassificationVisualizer
	import argparse
	from PoseClassification.utils import show_image


	def check_major_current_position(positions_detected:dict, threshold_position) -> str:
	'''
	return the major position between those detected in frame, or return none

	INPUTS
	positions_detected :
	dict of positions given by position classifier and pose_classification_filtered
	{'pose1':8.0, 'pose2':2.0}
	threshold_position :
	values strictly below are considered "none" position

	OUTPUT
	major_position :
	string with position (classes from classifier and "none")

	'''
	if max(positions_detected.values())<float(threshold_position):
	major_position='none'
	else:
	major_position=max(positions_detected, key=positions_detected.get)
	return major_position


	def yoga_position_classifier():
	#Load arguments
	parser = argparse.ArgumentParser()

	parser.add_argument("video_path", help="string video path in")
	args = parser.parse_args()

	video_path_in = args.video_path
	direct_video=False

	if video_path_in=="live":
	video_path_in='data/live.mp4'
	direct_video=True

	video_path_out = re.sub(r'.mp4', r'_classified_video.mp4', video_path_in)
	results_classification_path_out = re.sub(r'.mp4', r'_classified_results.csv', video_path_in)


	# Initialize tracker, classifier and current position.
	# Initialize tracker.
	mp_pose = mp.solutions.pose
	pose_tracker = mp_pose.Pose()
	# Folder with pose class CSVs. That should be the same folder you used while
	# building classifier to output CSVs.
	pose_samples_folder = 'data/yoga_poses_csvs_out'
	# Initialize embedder.
	pose_embedder = FullBodyPoseEmbedding()
	# Initialize classifier.
	# Check that you are using the same parameters as during bootstrapping.
	pose_classifier = PoseClassifier(
	pose_samples_folder=pose_samples_folder,
	pose_embedder=pose_embedder,
	top_n_by_max_distance=30,
	top_n_by_mean_distance=10)


	# Initialize EMA smoothing.
	pose_classification_filter = EMADictSmoothing(
	window_size=10,
	alpha=0.2)


	# Initialize list of results
	position_list=[]
	frame_list=[]

	# Instruction if direct flux video
	if direct_video :
	video_cap = cv2.VideoCapture(0)
	# Instruction if path video
	else :
	assert type(video_path_in)==str, "Error in video path format, not a string. Abort."
	# Open video and get video parameters and check if video is OK
	video_cap = cv2.VideoCapture(video_path_in)
	video_n_frames = video_cap.get(cv2.CAP_PROP_FRAME_COUNT)
	assert type(video_n_frames)==float, 'Error in input video frames type. Abort.'
	assert video_n_frames>0.0, 'Error in input video frames number : no frame. Abort.'

	video_fps = video_cap.get(cv2.CAP_PROP_FPS)
	video_width = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	video_height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	class_names=['chair', 'cobra', 'dog', 'goddess', 'plank', 'tree', 'warrior', 'none']
	position_threshold = 8.0

	# Open output video.
	out_video = cv2.VideoWriter(video_path_out, cv2.VideoWriter_fourcc(*'mp4v'), video_fps, (video_width, video_height))

	# Initialize results
	frame_idx = 0
	current_position = {"none":10.0}
	output_frame = None

	position_timer = 0
	previous_position_major = 'none'

	try:
	with tqdm.tqdm(position=0, leave=True) as pbar:
	while True:
	# Get current time from beggining of video
	time_sec = float(frame_idx*(1/video_fps))

	# Get current major position (str)
	current_position_major = check_major_current_position(current_position, position_threshold)

	success, input_frame = video_cap.read()
	if not success:
	print("Unable to read input video frame, breaking!")
	break

	# Run pose tracker
	input_frame_rgb = cv2.cvtColor(input_frame, cv2.COLOR_BGR2RGB)
	result = pose_tracker.process(image=input_frame_rgb)
	pose_landmarks = result.pose_landmarks

	# Prepare the output frame
	output_frame = input_frame.copy()

	# Add a white banner on top
	banner_height = int(video_height//10)
	output_frame[0:banner_height, :] = (255, 255, 255) # White color

	# Load the logo image
	logo = cv2.imread("src/logo_impredalam.jpg")
	logo_height, logo_width = logo.shape[:2]
	logo_height_rescaled = banner_height
	logo_width_rescaled = int((logo_width*logo_height_rescaled)// logo_height )
	logo = cv2.resize(logo, (logo_width_rescaled, logo_height_rescaled)) # Resize to banner scale

	# Overlay the logo on the upper right corner
	output_frame[0 : logo.shape[0], output_frame.shape[1] - logo.shape[1] :] = (logo)

	# If landmarks are detected
	if pose_landmarks is not None:
	mp_drawing.draw_landmarks(
	image=output_frame,
	landmark_list=pose_landmarks,
	connections=mp_pose.POSE_CONNECTIONS,)

	# Get landmarks
	frame_height, frame_width = output_frame.shape[0], output_frame.shape[1]
	pose_landmarks = np.array(
	[
	[lmk.x * frame_width, lmk.y * frame_height, lmk.z * frame_width]
	for lmk in pose_landmarks.landmark
	],
	dtype=np.float32,)
	assert pose_landmarks.shape == (33,3,), "Unexpected landmarks shape: {}".format(pose_landmarks.shape)

	# Classify the pose on the current frame
	pose_classification = pose_classifier(pose_landmarks)

	# Smooth classification using EMA
	pose_classification_filtered = pose_classification_filter(pose_classification)
	current_position=pose_classification_filtered
	current_position_major=check_major_current_position(current_position, position_threshold)

	# If no landmarks are detected
	else:

	current_position={'none':10.0}
	current_position_major=check_major_current_position(current_position, position_threshold)


	# If landmarks or no landmarks detected :

	# Compute position timer according to current and previous position
	if current_position_major==previous_position_major:
	#increase position_timer
	position_timer+=(1/video_fps)
	else:
	previous_position_major=current_position_major
	position_timer=0

	# Display current position on frame
	cv2.putText(
	output_frame,
	f"Pose: {current_position_major}",
	(int(0+(1//50*video_width)), int(0+banner_height//3)), #coord
	cv2.FONT_HERSHEY_SIMPLEX,
	float(0.9*(video_height/video_width)), # Font size
	(0, 0, 0), #color
	1, # Thinner line
	cv2.LINE_AA,)

	# Display current position timer on frame
	cv2.putText(
	output_frame,
	f"Duration: {int(position_timer)} seconds",
	(int(0+(1//50video_width)), int(0+(2banner_height)//3)), #coord
	cv2.FONT_HERSHEY_SIMPLEX,
	float(0.9*(video_height/video_width)), # Font size
	(0, 0, 0), #color
	1, # Thinner line
	cv2.LINE_AA,)

	# Show output frame
	cv2.imshow("Yoga position", output_frame)

	# Add current_position (dict) and frame index to list (output file for debug)
	position_list.append(current_position)
	frame_list.append(frame_idx)
	# Output file for debug
	with open(results_classification_path_out, 'a') as f:
	f.write(f'{frame_idx},{current_position}\n')

	key = cv2.waitKey(1) & 0xFF
	if key == ord("q"):
	break
	elif key == ord("r"):
	current_position = {'none':10.0}
	print("Position reset !")

	frame_idx += 1
	pbar.update()

	finally:
	pose_tracker.close()
	video_cap.release()
	cv2.destroyAllWindows()
	# Close output video.
	out_video.release()

	return frame_list, position_list



	if __name__ == "__main__":
	yoga_position_classifier()