KoichiYasuoka's picture
initial release
cf6f740
raw
history blame
528 Bytes
#! /bin/sh
for D in classical-tibetan-corpus old-tibetan-corpus modern-tibetan-corpus
do test -d $D || git clone --depth=1 https://github.com/tibetan-nlp/$D
done
( for F in *-tibetan-corpus/conllu/*.conllu
do case $F in
*-translated.conllu) : ;;
*) cat $F ;;
esac
done
) | awk '
{
if($0==""){
if(u!~/\tNOTAG\t/)
print u;
u="";
}
else
u=u$0"\n";
}'> all.conllu
python3 -m esupar.train KoichiYasuoka/roberta-base-tibetan KoichiYasuoka/roberta-base-tibetan-upos 24 /tmp all.conllu
exit 0