Spaces:
Running
Running
File size: 6,124 Bytes
ea32d62 a6f6a7a ea32d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
import os
import numpy as np
import imageio # requires imageio
import cv2
from tqdm import tqdm
from media_pipe.mp_utils import LMKExtractor
from media_pipe.draw_util import FaceMeshVisualizer
from media_pipe import FaceMeshAlign
def get_video_fps(video_path):
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
video.release()
return int(fps)
def process_video(video_path, save_dir, save_gif=True):
lmk_extractor = LMKExtractor()
vis = FaceMeshVisualizer(forehead_edge=False)
face_aligner = FaceMeshAlign()
frames = imageio.get_reader(video_path)
face_results = []
motions = []
# Process first frame to get reference
first_frame = next(iter(frames))
first_frame_bgr = cv2.cvtColor(np.array(first_frame), cv2.COLOR_RGB2BGR)
ref_result = lmk_extractor(first_frame_bgr)
if ref_result is None:
print("No face detected in the first frame. Exiting.")
return None, 0
ref_result['width'] = first_frame_bgr.shape[1]
ref_result['height'] = first_frame_bgr.shape[0]
face_results.append(ref_result)
# Process remaining frames
for frame in tqdm(frames):
frame_bgr = cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR)
face_result = lmk_extractor(frame_bgr)
if face_result is None:
continue
face_result['width'] = frame_bgr.shape[1]
face_result['height'] = frame_bgr.shape[0]
face_results.append(face_result)
lmks = face_result['lmks'].astype(np.float32)
motion = vis.draw_landmarks((frame_bgr.shape[1], frame_bgr.shape[0]), lmks, normed=True)
motions.append(motion)
# Perform alignment
aligned_motions = face_aligner(ref_result, face_results)
base_name = os.path.splitext(os.path.basename(video_path))[0]
npy_path = os.path.join(save_dir, f"{base_name}_mppose.npy")
np.save(npy_path, face_results)
if save_gif:
# Save regular GIF
gif_path = os.path.join(save_dir, f"{base_name}_mppose.gif")
imageio.mimsave(gif_path, motions, 'GIF', duration=0.2, loop=0)
# Save aligned GIF
aligned_gif_path = os.path.join(save_dir, f"{base_name}_mppose_aligned.gif")
imageio.mimsave(aligned_gif_path, aligned_motions, 'GIF', duration=0.2, loop=0)
return npy_path, len(face_results)
def get_npy_files(root_dir):
npy_files = []
for root, dirs, files in os.walk(root_dir):
for file in files:
if file.endswith('.npy'):
npy_files.append(os.path.join(root, file))
return npy_files
def get_frame_count(npy_path):
data = np.load(npy_path, allow_pickle=True)
return len(data) - 1
def show_gif(npy_path):
aligned_gif_path = npy_path.replace('.npy', '_aligned.gif')
if os.path.exists(aligned_gif_path):
return aligned_gif_path, "Aligned GIF found and displayed"
return None, "No aligned GIF found for this NPY file"
def process_image(image_path, npy_path, save_dir, expand_x=1.0, expand_y=1.0, offset_x=0.0, offset_y=0.0):
lmk_extractor = LMKExtractor()
vis = FaceMeshVisualizer(forehead_edge=False)
# Load data from npy file
face_results = np.load(npy_path, allow_pickle=True)
if len(face_results) == 0:
print("No face data in the NPY file. Exiting.")
return None
# Get dimensions from first frame in npy
target_width = face_results[0]['width']
target_height = face_results[0]['height']
# Process image
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
face_result = lmk_extractor(image)
if face_result is None:
print("No face detected in the image. Exiting.")
return None
# Crop image
landmarks = face_result['lmks']
min_x, min_y = np.min(landmarks, axis=0)[:2]
max_x, max_y = np.max(landmarks, axis=0)[:2]
center_x = (min_x + max_x) / 2 * image.shape[1]
center_y = (min_y + max_y) / 2 * image.shape[0]
# Apply expansion and offset
crop_width = target_width * expand_x
crop_height = target_height * expand_y
offset_x_pixels = offset_x * target_width
offset_y_pixels = offset_y * target_height
left = int(max(center_x - crop_width / 2 + offset_x_pixels, 0))
top = int(max(center_y - crop_height / 2 + offset_y_pixels, 0))
right = int(min(left + crop_width, image.shape[1]))
bottom = int(min(top + crop_height, image.shape[0]))
cropped_image = image_rgb[top:bottom, left:right]
# If cropped image is smaller than target size, add padding
if cropped_image.shape[0] < target_height or cropped_image.shape[1] < target_width:
pad_top = max(0, (target_height - cropped_image.shape[0]) // 2)
pad_bottom = max(0, target_height - cropped_image.shape[0] - pad_top)
pad_left = max(0, (target_width - cropped_image.shape[1]) // 2)
pad_right = max(0, target_width - cropped_image.shape[1] - pad_left)
cropped_image = cv2.copyMakeBorder(cropped_image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT)
cropped_image = cv2.resize(cropped_image, (target_width, target_height))
# Save cropped image
base_name = os.path.splitext(os.path.basename(image_path))[0]
cropped_image_path = os.path.join(save_dir, f"{base_name}_cropped.png")
cv2.imwrite(cropped_image_path, cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR))
# Process cropped image
cropped_face_result = lmk_extractor(cv2.cvtColor(cropped_image, cv2.COLOR_RGB2BGR))
if cropped_face_result is None:
print("No face detected in the cropped image. Exiting.")
return None
cropped_face_result['width'] = target_width
cropped_face_result['height'] = target_height
# Visualize facial landmarks
lmks = cropped_face_result['lmks'].astype(np.float32)
motion = vis.draw_landmarks((target_width, target_height), lmks, normed=True)
# Save visualization
motion_path = os.path.join(save_dir, f"{base_name}_motion.png")
cv2.imwrite(motion_path, cv2.cvtColor(motion, cv2.COLOR_RGB2BGR))
return cropped_image_path, motion_path
|