#!/bin/bash [ $# -ge 2 ] || { echo Usage: $0 datadir treebank args... >&2; exit 1; } data="$1"; shift treebank="$1"; shift mkdir -p models/$data-$treebank${EXP:+-$EXP} size=$(grep -P "^$treebank\t" $data/langs_sizes | cut -f2) if [ "$size" -ge 400000 ]; then args=--batch_size=64 else args=--batch_size=32 fi if [ "$size" -ge 50000 ]; then args="$args --rnn_cell_dim=512" else args="$args --rnn_cell_dim=384" fi case $treebank in cs*) args="$args --wembedding_model=robeczech-base-last4";; *) args="$args --wembedding_model=bert-base-multilingual-uncased-last4";; esac case $treebank in *_all) evaluation_data="--dev" for dev in $data/${treebank%_all}_*/*-dev.conllu; do evaluation_data="$evaluation_data $(basename $(dirname $dev)):$dev" done evaluation_data="$evaluation_data --test" for test in $data/${treebank%_all}_*/*-test.conllu; do evaluation_data="$evaluation_data $(basename $(dirname $test)):$test" done ;; la_evalatin20) args="$args --tags=UPOS,LEMMAS --parse=0 --rnn_layers=3" evaluation_data="--dev=$data/la_evalatin20/la_evalatin20-dev.conllu --test" for kind in "" "-crossgenre" "-crosstime"; do evaluation_data="$evaluation_data ${kind:+la_evalatin20$kind}:$data/la_evalatin20/la_evalatin20-test$kind.conllu" done ;; *) evaluation_data="--dev=$data/$treebank/$treebank-ud-dev.conllu --test=$data/$treebank/$treebank-ud-test.conllu" esac sbatch $SLURM_ARGS -p gpu-ms,gpu-troja -G 1 -C "gpu_cc6.1|gpu_cc7.5" --mem=24G -o models/$data-$treebank${EXP:+-$EXP}/training.log run withcuda100 venv/bin/python udpipe2.py models/$data-$treebank${EXP:+-$EXP} --train $data/$treebank/$treebank-*train.conllu $evaluation_data $args "$@"