VQMIVC / ParallelWaveGAN /utils /make_subset_data.sh
akhaliq3
spaces demo
2b7bf83
raw
history blame
1.26 kB
#!/bin/bash
# Make subset files located in data direcoty.
# Copyright 2020 Tomoki Hayashi
# MIT License (https://opensource.org/licenses/MIT)
# shellcheck disable=SC1091
. ./path.sh || exit 1;
if [ $# -ne 3 ]; then
echo "Usage: $0 <src_dir> <num_split> <dst_dir>"
echo "e.g.: $0 data/train_nodev 16 data/train_nodev/split16"
exit 1
fi
set -eu
src_dir=$1
num_split=$2
dst_dir=$3
src_scp=${src_dir}/wav.scp
if [ -e "${src_dir}/segments" ]; then
has_segments=true
src_segments=${src_dir}/segments
else
has_segments=false
fi
if ! ${has_segments}; then
split_scps=""
for i in $(seq 1 "${num_split}"); do
split_scps+=" ${dst_dir}/wav.${i}.scp"
done
# shellcheck disable=SC2086
utils/split_scp.pl "${src_scp}" ${split_scps}
else
split_scps=""
for i in $(seq 1 "${num_split}"); do
split_scps+=" ${dst_dir}/segments.${i}"
done
# shellcheck disable=SC2086
utils/split_scp.pl "${src_segments}" ${split_scps}
for i in $(seq 1 "${num_split}"); do
awk '{print $2}' < "${dst_dir}/segments.${i}" | sort | uniq | while read -r wav_id; do
grep "^${wav_id} " < "${src_scp}" >> "${dst_dir}/wav.${i}.scp"
done
done
fi
echo "Successfully make subsets."