for D in classical-tibetan-corpus old-tibetan-corpus modern-tibetan-corpus | |
do test -d $D || git clone --depth=1 https://github.com/tibetan-nlp/$D | |
done | |
( for F in *-tibetan-corpus/conllu/*.conllu | |
do case $F in | |
*-translated.conllu) : ;; | |
*) cat $F ;; | |
esac | |
done | |
) | awk ' | |
{ | |
if($0==""){ | |
if(u!~/\tNOTAG\t/) | |
print u; | |
u=""; | |
} | |
else | |
u=u$0"\n"; | |
}'> all.conllu | |
python3 -m esupar.train KoichiYasuoka/bert-base-tibetan KoichiYasuoka/bert-base-tibetan-upos 32 /tmp all.conllu | |
exit 0 | |