File size: 1,257 Bytes
2b7bf83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash

# Make subset files located in data direcoty.

# Copyright 2020 Tomoki Hayashi
#  MIT License (https://opensource.org/licenses/MIT)

# shellcheck disable=SC1091
. ./path.sh || exit 1;


if [ $# -ne 3 ]; then
    echo "Usage: $0 <src_dir> <num_split> <dst_dir>"
    echo "e.g.: $0 data/train_nodev 16 data/train_nodev/split16"
    exit 1
fi

set -eu

src_dir=$1
num_split=$2
dst_dir=$3

src_scp=${src_dir}/wav.scp
if [ -e "${src_dir}/segments" ]; then
    has_segments=true
    src_segments=${src_dir}/segments
else
    has_segments=false
fi

if ! ${has_segments}; then
    split_scps=""
    for i in $(seq 1 "${num_split}"); do
        split_scps+=" ${dst_dir}/wav.${i}.scp"
    done
    # shellcheck disable=SC2086
    utils/split_scp.pl "${src_scp}" ${split_scps}
else
    split_scps=""
    for i in $(seq 1 "${num_split}"); do
        split_scps+=" ${dst_dir}/segments.${i}"
    done
    # shellcheck disable=SC2086
    utils/split_scp.pl "${src_segments}" ${split_scps}
    for i in $(seq 1 "${num_split}"); do
        awk '{print $2}' < "${dst_dir}/segments.${i}" | sort | uniq | while read -r wav_id; do
            grep "^${wav_id} " < "${src_scp}" >> "${dst_dir}/wav.${i}.scp"
        done
    done
fi
echo "Successfully make subsets."