| |
|
|
| use strict; |
|
|
| use Getopt::Long; |
| use File::Basename; |
| use FindBin qw($RealBin); |
|
|
| sub systemCheck($); |
|
|
| my $mosesDir = "$RealBin/../.."; |
| my $ptPath; |
| my $lexRoPath; |
| my $outPath; |
| my $numScores = 4; |
| my $numLexScores; |
| my $pruneNum = 100; |
| my $scfg = 0; |
|
|
| GetOptions("phrase-table=s" => \$ptPath, |
| "lex-ro=s" => \$lexRoPath, |
| "output-dir=s" => \$outPath, |
| "num-scores=s" => \$numScores, |
| "num-lex-scores=i" => \$numLexScores, |
| "prune=i" => \$pruneNum, |
| "scfg" => \$scfg |
| ) or exit 1; |
|
|
| |
| die("ERROR: please set --phrase-table") unless defined($ptPath); |
| |
| die("ERROR: please set --output-dir") unless defined($outPath); |
| |
| die("ERROR: compile contrib/sigtest-filter") if (!-X "$mosesDir/contrib/sigtest-filter/filter-pt"); |
| die("ERROR: compile with bjam --with-cmph") if (!-X "$mosesDir/bin/processLexicalTableMin"); |
| die("ERROR: compile with bjam --with-xmlrpc-c") if (!-X "$mosesDir/bin/CreateProbingPT"); |
|
|
| my $cmd; |
|
|
| my $tempPath = dirname($outPath) ."/tmp.$$"; |
| `mkdir -p $tempPath`; |
|
|
| $cmd = "gzip -dc $ptPath | $mosesDir/contrib/sigtest-filter/filter-pt -n $pruneNum | gzip -c > $tempPath/pt.gz"; |
| systemCheck($cmd); |
|
|
| if (defined($lexRoPath)) { |
| die("ERROR: please set --num-lex-scores") unless defined($numLexScores); |
|
|
| $cmd = "$mosesDir/bin/processLexicalTableMin -in $lexRoPath -out $tempPath/lex-ro -T . -threads all"; |
| systemCheck($cmd); |
|
|
| $cmd = "$mosesDir/bin/addLexROtoPT $tempPath/pt.gz $tempPath/lex-ro.minlexr | gzip -c > $tempPath/pt.withLexRO.gz"; |
| systemCheck($cmd); |
|
|
| $cmd = "ln -s pt.withLexRO.gz $tempPath/pt.txt.gz"; |
| systemCheck($cmd); |
| } |
| else { |
| $cmd = "ln -s pt.gz $tempPath/pt.txt.gz"; |
| systemCheck($cmd); |
| } |
|
|
| $cmd = "$mosesDir/bin/CreateProbingPT2 --num-scores $numScores --log-prob --input-pt $tempPath/pt.txt.gz --output-dir $outPath"; |
|
|
| if (defined($lexRoPath)) { |
| $cmd .= " --num-lex-scores $numLexScores"; |
| } |
|
|
| if ($scfg) { |
| $cmd .= " --scfg"; |
| } |
|
|
| systemCheck($cmd); |
|
|
| exit(0); |
|
|
| |
| sub systemCheck($) |
| { |
| my $cmd = shift; |
| print STDERR "Executing: $cmd\n"; |
| |
| my $retVal = system($cmd); |
| if ($retVal != 0) |
| { |
| exit(1); |
| } |
| } |
|
|