sakharamg's picture
Uploading all files
158b61b
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;
use Encode;
use FindBin qw($RealBin);
my ($in,$out,$tmpdir) = @ARGV;
`mkdir -p $tmpdir`;
`$RealBin/../../tokenizer/deescape-special-chars.perl < $in | /home/pkoehn/statmt/bin/unicode2latin1.perl > $tmpdir/tok.$$`;
`/home/pkoehn/statmt/bin/run-lopar-tagger.perl $tmpdir/tok.$$ $tmpdir/lopar.$$`;
open(LOPAR,"$tmpdir/lopar.$$");
open(OUT,"|$RealBin/../../tokenizer/escape-special-chars.perl >$out");
while(<LOPAR>) {
chomp;
s/ +/ /g;
s/^ //;
s/ $//;
my $first = 1;
foreach (split) {
die("ERROR: choked on token '$_'") unless /^(.+)_([^_]+)_(.+)$/;
my ($word,$morph,$lemma) = ($1,$2,$3);
print OUT " " unless $first;
$first = 0;
print OUT encode('utf8', decode('iso-8859-1', $morph));
}
print OUT "\n";
}
close(LOPAR);
close(OUT);