|
#!/usr/bin/env bash |
|
export PYTHONPATH="" |
|
source /esat/spchtemp/scratch/jponcele/anaconda3/bin/activate espnet21 |
|
python --version |
|
|
|
|
|
|
|
set -e |
|
set -u |
|
set -o pipefail |
|
|
|
|
|
stage=12 |
|
stop_stage=12 |
|
|
|
|
|
|
|
|
|
|
|
outdir=/esat/spchtemp/scratch/jponcele/espnet2 |
|
expdir=${outdir}/exp/exp-hpc |
|
st_tag=train_subtitling_chained_PL_C10_new_combined_nelf_all_dim512_iter10k |
|
st_stats_dir=/esat/spchtemp/scratch/jponcele/espnet2/exp/exp-st/st_stats_fbank_pitch_vl_joint_bpe5000_cgn_combined_tags_cased_punct_subs_train_all |
|
token_dir_suffix=cgn_combined_tags_cased_punct_subs_train_all |
|
|
|
|
|
nj=8 |
|
ngpu=0 |
|
|
|
|
|
st_train_set=st_train_all_combined_stochastic_cased_punct_speed_perturbed_cgn_subset |
|
st_valid_set=st_valid_nelf_12000h_combined_stochastic_cased_punct |
|
st_test_set="nbest-test" |
|
|
|
|
|
asr_train_set=train_s |
|
asr_valid_set=valid_s |
|
asr_test_set=dev_s |
|
subs_train_set=subs_train |
|
subs_valid_set=subs_valid |
|
subs_test_set=subs_test |
|
|
|
|
|
traincomps="a;b;c;d;f;g;h;i;j;k;l;m;n;o" |
|
decodecomps="b;f;g;h;i;j;k;l;m;n;o" |
|
local_data_opts="--repstr false --lowercase true --outdir data --traincomps ${traincomps} --decodecomps ${decodecomps}" |
|
|
|
subs_dir=/users/spraak/jponcele/vrt-scraper/vrtnew_subtitles_4feb |
|
local_subs_opts="--outdir data --subsdir ${subs_dir}" |
|
|
|
feats_type=fbank_pitch |
|
|
|
|
|
use_word_lm=false |
|
use_lm=false |
|
lm_config=conf/train_lm_transformer.yaml |
|
use_ngram=false |
|
|
|
|
|
feats_normalize=utterance_mvn |
|
st_config=conf/tuning/train_subtitling_chained_C10_new_6layers_transformer_dim512.yaml |
|
inference_config=conf/st_decode_chained.yaml |
|
inference_nj=1 |
|
gpu_inference=true |
|
inference_st_model=averaged_model_30epochs.pth |
|
st_args="--batch_type custom_folded --valid_batch_type custom_folded" |
|
|
|
./subs.sh \ |
|
--stage ${stage} \ |
|
--stop_stage ${stop_stage} \ |
|
--ngpu ${ngpu} \ |
|
--nj ${nj} \ |
|
--gpu_inference false \ |
|
--dumpdir ${outdir}/dump \ |
|
--expdir ${expdir} \ |
|
--feats_type ${feats_type} \ |
|
--audio_format wav \ |
|
--min_wav_duration 0.1 \ |
|
--max_wav_duration 30 \ |
|
--token_joint true \ |
|
--src_token_type bpe \ |
|
--src_nbpe 5000 \ |
|
--src_bpemode unigram \ |
|
--src_case lc \ |
|
--tgt_token_type bpe \ |
|
--tgt_nbpe 5000 \ |
|
--tgt_bpemode unigram \ |
|
--tgt_case lc \ |
|
--oov "<unk>" \ |
|
--lang "vl" \ |
|
--src_lang "verbatim" \ |
|
--tgt_lang "subtitle" \ |
|
--local_subs_opts "${local_subs_opts}" \ |
|
--local_data_opts "${local_data_opts}" \ |
|
--use_lm ${use_lm} \ |
|
--use_word_lm ${use_word_lm} \ |
|
--lm_config ${lm_config} \ |
|
--use_ngram ${use_ngram} \ |
|
--st_config ${st_config} \ |
|
--st_args "${st_args}" \ |
|
--st_tag ${st_tag} \ |
|
--inference_config ${inference_config} \ |
|
--inference_nj ${inference_nj} \ |
|
--feats_normalize ${feats_normalize} \ |
|
--st_train_set "${st_train_set}" \ |
|
--st_valid_set "${st_valid_set}" \ |
|
--st_test_set "${st_test_set}" \ |
|
--asr_train_set ${asr_train_set} \ |
|
--asr_valid_set ${asr_valid_set} \ |
|
--asr_test_set ${asr_test_set} \ |
|
--subs_train_set ${subs_train_set} \ |
|
--subs_valid_set ${subs_valid_set} \ |
|
--subs_test_set ${subs_test_set} \ |
|
--st_stats_dir ${st_stats_dir} \ |
|
--inference_st_model ${inference_st_model} \ |
|
--token_dir_suffix ${token_dir_suffix} \ |
|
--gpu_inference ${gpu_inference} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|