File size: 758 Bytes
ee21b96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#!/usr/bin/env sh
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
# Please follow the instructions here http://alt.qcri.org/tools/arabic-normalizer/
# to install tools needed for Arabic

echo "Please install Arabic tools: http://alt.qcri.org/tools/arabic-normalizer/"
echo "Then update environment variables in tokenizer_ar.sh"
exit 1

SVMTOOL=...
GOMOSESGO=...
QCRI_ARABIC_NORMALIZER=...

export PERL5LIB="$SVMTOOL/lib":"$GOMOSESGO/bin/MADA-3.2":$PERL5LIB


tempfile=$(mktemp)
cat - > $tempfile

cd $QCRI_ARABIC_NORMALIZER

bash qcri_normalizer_mada3.2_aramorph1.2.1.sh $tempfile
cat $tempfile.mada_norm-aramorph.europarl_tok