my_tango / datasets /process_testdata.py
Akira00's picture
Upload folder using huggingface_hub
721e031 verified
import os
import numpy as np
import wave
from moviepy.editor import VideoFileClip
def split_npz(npz_path, output_prefix):
try:
# Load the npz file
data = np.load(npz_path)
# Get the arrays and split them along the time dimension (T)
poses = data["poses"]
betas = data["betas"]
expressions = data["expressions"]
trans = data["trans"]
# Determine the halfway point (T/2)
half = poses.shape[0] // 2
# Save the first half (0-5 seconds)
np.savez(
output_prefix + "_0_5.npz",
betas=betas[:half],
poses=poses[:half],
expressions=expressions[:half],
trans=trans[:half],
model=data["model"],
gender=data["gender"],
mocap_frame_rate=data["mocap_frame_rate"],
)
# Save the second half (5-10 seconds)
np.savez(
output_prefix + "_5_10.npz",
betas=betas[half:],
poses=poses[half:],
expressions=expressions[half:],
trans=trans[half:],
model=data["model"],
gender=data["gender"],
mocap_frame_rate=data["mocap_frame_rate"],
)
print(f"NPZ split saved for {output_prefix}")
except Exception as e:
print(f"Error processing NPZ file {npz_path}: {e}")
def split_wav(wav_path, output_prefix):
try:
with wave.open(wav_path, "rb") as wav_file:
params = wav_file.getparams()
frames = wav_file.readframes(wav_file.getnframes())
half_frame = len(frames) // 2
# Create two half files
for i, start_frame in enumerate([0, half_frame]):
with wave.open(f"{output_prefix}_{i*5}_{(i+1)*5}.wav", "wb") as out_wav:
out_wav.setparams(params)
if i == 0:
out_wav.writeframes(frames[:half_frame])
else:
out_wav.writeframes(frames[half_frame:])
print(f"WAV split saved for {output_prefix}")
except Exception as e:
print(f"Error processing WAV file {wav_path}: {e}")
def split_mp4(mp4_path, output_prefix):
try:
clip = VideoFileClip(mp4_path)
for i in range(2):
subclip = clip.subclip(i * 5, (i + 1) * 5)
subclip.write_videofile(f"{output_prefix}_{i*5}_{(i+1)*5}.mp4", codec="libx264", audio_codec="aac")
print(f"MP4 split saved for {output_prefix}")
except Exception as e:
print(f"Error processing MP4 file {mp4_path}: {e}")
def process_files(root_dir, output_dir):
import json
clips = []
dirs = os.listdir(root_dir)
for dir in dirs:
video_id = dir
root = os.path.join(root_dir, dir)
clip = {
"video_id": video_id,
"video_path": root,
"audio_path": root,
"motion_path": root,
"mode": "test",
"start_idx": 0,
"end_idx": 150,
}
clips.append(clip)
output_json = output_dir + "/test.json"
with open(output_json, "w") as f:
json.dump(clips, f, indent=4)
# Set the root directory path of your dataset and output directory
root_dir = "/content/oliver/oliver/Abortion_Laws_-_Last_Week_Tonight_with_John_Oliver_HBO-DRauXXz6t0Y.webm/test/"
output_dir = "/content/test"
# Make sure the output directory exists
os.makedirs(output_dir, exist_ok=True)
# Process all the files
process_files(root_dir, output_dir)