|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
use strict; |
|
|
|
my $false = 0; |
|
my $true = 42; |
|
|
|
my $boundary = "-X-"; |
|
my $correct; |
|
my $correctChunk = 0; |
|
my $correctTags = 0; |
|
my $correctType; |
|
my $delimiter = " "; |
|
my $FB1 = 0.0; |
|
my $firstItem; |
|
my $foundCorrect = 0; |
|
my $foundGuessed = 0; |
|
my $guessed; |
|
my $guessedType; |
|
my $i; |
|
my $inCorrect = $false; |
|
my $lastCorrect = "O"; |
|
my $latex = 0; |
|
my $lastCorrectType = ""; |
|
my $lastGuessed = "O"; |
|
my $lastGuessedType = ""; |
|
my $lastType; |
|
my $line; |
|
my $nbrOfFeatures = -1; |
|
my $precision = 0.0; |
|
my $oTag = "O"; |
|
my $raw = 0; |
|
my $recall = 0.0; |
|
my $tokenCounter = 0; |
|
|
|
my %correctChunk = (); |
|
my %foundCorrect = (); |
|
my %foundGuessed = (); |
|
|
|
my @features; |
|
my @sortedTypes; |
|
|
|
|
|
while (@ARGV and $ARGV[0] =~ /^-/) { |
|
if ($ARGV[0] eq "-l") { $latex = 1; shift(@ARGV); } |
|
elsif ($ARGV[0] eq "-r") { $raw = 1; shift(@ARGV); } |
|
elsif ($ARGV[0] eq "-d") { |
|
shift(@ARGV); |
|
if (not defined $ARGV[0]) { |
|
die "conlleval: -d requires delimiter character"; |
|
} |
|
$delimiter = shift(@ARGV); |
|
} elsif ($ARGV[0] eq "-o") { |
|
shift(@ARGV); |
|
if (not defined $ARGV[0]) { |
|
die "conlleval: -o requires delimiter character"; |
|
} |
|
$oTag = shift(@ARGV); |
|
} else { die "conlleval: unknown argument $ARGV[0]\n"; } |
|
} |
|
if (@ARGV) { die "conlleval: unexpected command line argument\n"; } |
|
|
|
while (<STDIN>) { |
|
chomp($line = $_); |
|
@features = split(/$delimiter/,$line); |
|
if ($nbrOfFeatures < 0) { $nbrOfFeatures = $ |
|
elsif ($nbrOfFeatures != $ |
|
printf STDERR "unexpected number of features: %d (%d)\n", |
|
$ |
|
exit(1); |
|
} |
|
if (@features == 0 or |
|
$features[0] eq $boundary) { @features = ($boundary,"O","O"); } |
|
if (@features < 2) { |
|
die "conlleval: unexpected number of features in line $line\n"; |
|
} |
|
if ($raw) { |
|
if ($features[$ |
|
if ($features[$ |
|
if ($features[$ |
|
$features[$ |
|
} |
|
if ($features[$ |
|
$features[$ |
|
} |
|
} |
|
|
|
if ($features[$ |
|
$guessed = $1; |
|
$guessedType = $2; |
|
} else { |
|
$guessed = $features[$ |
|
$guessedType = ""; |
|
} |
|
pop(@features); |
|
if ($features[$ |
|
$correct = $1; |
|
$correctType = $2; |
|
} else { |
|
$correct = $features[$ |
|
$correctType = ""; |
|
} |
|
pop(@features); |
|
|
|
|
|
$guessedType = $guessedType ? $guessedType : ""; |
|
$correctType = $correctType ? $correctType : ""; |
|
$firstItem = shift(@features); |
|
|
|
|
|
if ( $firstItem eq $boundary ) { $guessed = "O"; } |
|
|
|
if ($inCorrect) { |
|
if ( &endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and |
|
&endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and |
|
$lastGuessedType eq $lastCorrectType) { |
|
$inCorrect=$false; |
|
$correctChunk++; |
|
$correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ? |
|
$correctChunk{$lastCorrectType}+1 : 1; |
|
} elsif ( |
|
&endOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) != |
|
&endOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) or |
|
$guessedType ne $correctType ) { |
|
$inCorrect=$false; |
|
} |
|
} |
|
|
|
if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) and |
|
&startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) and |
|
$guessedType eq $correctType) { $inCorrect = $true; } |
|
|
|
if ( &startOfChunk($lastCorrect,$correct,$lastCorrectType,$correctType) ) { |
|
$foundCorrect++; |
|
$foundCorrect{$correctType} = $foundCorrect{$correctType} ? |
|
$foundCorrect{$correctType}+1 : 1; |
|
} |
|
if ( &startOfChunk($lastGuessed,$guessed,$lastGuessedType,$guessedType) ) { |
|
$foundGuessed++; |
|
$foundGuessed{$guessedType} = $foundGuessed{$guessedType} ? |
|
$foundGuessed{$guessedType}+1 : 1; |
|
} |
|
if ( $firstItem ne $boundary ) { |
|
if ( $correct eq $guessed and $guessedType eq $correctType ) { |
|
$correctTags++; |
|
} |
|
$tokenCounter++; |
|
} |
|
|
|
$lastGuessed = $guessed; |
|
$lastCorrect = $correct; |
|
$lastGuessedType = $guessedType; |
|
$lastCorrectType = $correctType; |
|
} |
|
if ($inCorrect) { |
|
$correctChunk++; |
|
$correctChunk{$lastCorrectType} = $correctChunk{$lastCorrectType} ? |
|
$correctChunk{$lastCorrectType}+1 : 1; |
|
} |
|
|
|
if (not $latex) { |
|
|
|
$precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0); |
|
$recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0); |
|
$FB1 = 2*$precision*$recall/($precision+$recall) |
|
if ($precision+$recall > 0); |
|
|
|
|
|
printf "processed $tokenCounter tokens with $foundCorrect phrases; "; |
|
printf "found: $foundGuessed phrases; correct: $correctChunk.\n"; |
|
if ($tokenCounter>0) { |
|
printf "accuracy: %6.2f%%; ",100*$correctTags/$tokenCounter; |
|
printf "precision: %6.2f%%; ",$precision; |
|
printf "recall: %6.2f%%; ",$recall; |
|
printf "FB1: %6.2f\n",$FB1; |
|
} |
|
} |
|
|
|
|
|
undef($lastType); |
|
@sortedTypes = (); |
|
foreach $i (sort (keys %foundCorrect,keys %foundGuessed)) { |
|
if (not($lastType) or $lastType ne $i) { |
|
push(@sortedTypes,($i)); |
|
} |
|
$lastType = $i; |
|
} |
|
|
|
if (not $latex) { |
|
for $i (@sortedTypes) { |
|
$correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0; |
|
if (not($foundGuessed{$i})) { $foundGuessed{$i} = 0; $precision = 0.0; } |
|
else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; } |
|
if (not($foundCorrect{$i})) { $recall = 0.0; } |
|
else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; } |
|
if ($precision+$recall == 0.0) { $FB1 = 0.0; } |
|
else { $FB1 = 2*$precision*$recall/($precision+$recall); } |
|
printf "%17s: ",$i; |
|
printf "precision: %6.2f%%; ",$precision; |
|
printf "recall: %6.2f%%; ",$recall; |
|
printf "FB1: %6.2f %d\n",$FB1,$foundGuessed{$i}; |
|
} |
|
} else { |
|
print " & Precision & Recall & F\$_{\\beta=1} \\\\\\hline"; |
|
for $i (@sortedTypes) { |
|
$correctChunk{$i} = $correctChunk{$i} ? $correctChunk{$i} : 0; |
|
if (not($foundGuessed{$i})) { $precision = 0.0; } |
|
else { $precision = 100*$correctChunk{$i}/$foundGuessed{$i}; } |
|
if (not($foundCorrect{$i})) { $recall = 0.0; } |
|
else { $recall = 100*$correctChunk{$i}/$foundCorrect{$i}; } |
|
if ($precision+$recall == 0.0) { $FB1 = 0.0; } |
|
else { $FB1 = 2*$precision*$recall/($precision+$recall); } |
|
printf "\n%-7s & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\", |
|
$i,$precision,$recall,$FB1; |
|
} |
|
print "\\hline\n"; |
|
$precision = 0.0; |
|
$recall = 0; |
|
$FB1 = 0.0; |
|
$precision = 100*$correctChunk/$foundGuessed if ($foundGuessed > 0); |
|
$recall = 100*$correctChunk/$foundCorrect if ($foundCorrect > 0); |
|
$FB1 = 2*$precision*$recall/($precision+$recall) |
|
if ($precision+$recall > 0); |
|
printf "Overall & %6.2f\\%% & %6.2f\\%% & %6.2f \\\\\\hline\n", |
|
$precision,$recall,$FB1; |
|
} |
|
|
|
exit 0; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub endOfChunk { |
|
my $prevTag = shift(@_); |
|
my $tag = shift(@_); |
|
my $prevType = shift(@_); |
|
my $type = shift(@_); |
|
my $chunkEnd = $false; |
|
|
|
if ( $prevTag eq "B" and $tag eq "B" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "B" and $tag eq "O" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "B" and $tag eq "S" ) { $chunkEnd = $true; } |
|
|
|
if ( $prevTag eq "I" and $tag eq "B" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "I" and $tag eq "S" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "I" and $tag eq "O" ) { $chunkEnd = $true; } |
|
|
|
if ( $prevTag eq "E" and $tag eq "E" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "E" and $tag eq "I" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "E" and $tag eq "O" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "E" and $tag eq "S" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "E" and $tag eq "B" ) { $chunkEnd = $true; } |
|
|
|
if ( $prevTag eq "S" and $tag eq "E" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "S" and $tag eq "I" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "S" and $tag eq "O" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "S" and $tag eq "S" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "S" and $tag eq "B" ) { $chunkEnd = $true; } |
|
|
|
|
|
if ($prevTag ne "O" and $prevTag ne "." and $prevType ne $type) { |
|
$chunkEnd = $true; |
|
} |
|
|
|
|
|
if ( $prevTag eq "]" ) { $chunkEnd = $true; } |
|
if ( $prevTag eq "[" ) { $chunkEnd = $true; } |
|
|
|
return($chunkEnd); |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sub startOfChunk { |
|
my $prevTag = shift(@_); |
|
my $tag = shift(@_); |
|
my $prevType = shift(@_); |
|
my $type = shift(@_); |
|
my $chunkStart = $false; |
|
|
|
if ( $prevTag eq "B" and $tag eq "B" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "I" and $tag eq "B" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "O" and $tag eq "B" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "S" and $tag eq "B" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "E" and $tag eq "B" ) { $chunkStart = $true; } |
|
|
|
if ( $prevTag eq "B" and $tag eq "S" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "I" and $tag eq "S" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "O" and $tag eq "S" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "S" and $tag eq "S" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "E" and $tag eq "S" ) { $chunkStart = $true; } |
|
|
|
if ( $prevTag eq "O" and $tag eq "I" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "S" and $tag eq "I" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "E" and $tag eq "I" ) { $chunkStart = $true; } |
|
|
|
if ( $prevTag eq "S" and $tag eq "E" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "E" and $tag eq "E" ) { $chunkStart = $true; } |
|
if ( $prevTag eq "O" and $tag eq "E" ) { $chunkStart = $true; } |
|
|
|
if ($tag ne "O" and $tag ne "." and $prevType ne $type) { |
|
$chunkStart = $true; |
|
} |
|
|
|
|
|
if ( $tag eq "[" ) { $chunkStart = $true; } |
|
if ( $tag eq "]" ) { $chunkStart = $true; } |
|
|
|
return($chunkStart); |
|
} |
|
|