|
|
|
|
|
#include <cassert> |
|
#include <iomanip> |
|
#include <iostream> |
|
#include <fstream> |
|
#include <sstream> |
|
#include <stdexcept> |
|
#include <string> |
|
#include <list> |
|
#include <vector> |
|
#include <set> |
|
#include <algorithm> |
|
#include <cstring> |
|
#include "cmd.h" |
|
|
|
using namespace std; |
|
|
|
const int MAX_WORD = 10000; |
|
const int MAX_M = 400; |
|
const int MAX_N = 400; |
|
|
|
enum Alignment { |
|
UNION = 1, |
|
INTERSECT, |
|
GROW, |
|
SRCTOTGT, |
|
TGTTOSRC, |
|
}; |
|
|
|
const Enum_T END_ENUM = {0, 0}; |
|
|
|
namespace |
|
{ |
|
Enum_T AlignEnum [] = { |
|
{ "union", UNION }, |
|
{ "u", UNION }, |
|
{ "intersect", INTERSECT}, |
|
{ "i", INTERSECT}, |
|
{ "grow", GROW }, |
|
{ "g", GROW }, |
|
{ "srctotgt", SRCTOTGT }, |
|
{ "s2t", SRCTOTGT }, |
|
{ "tgttosrc", TGTTOSRC }, |
|
{ "t2s", TGTTOSRC }, |
|
END_ENUM |
|
}; |
|
|
|
Enum_T BoolEnum [] = { |
|
{ "true", true }, |
|
{ "yes", true }, |
|
{ "y", true }, |
|
{ "false", false }, |
|
{ "no", false }, |
|
{ "n", false }, |
|
END_ENUM |
|
}; |
|
|
|
|
|
|
|
int* fa; |
|
int* ea; |
|
int** A; |
|
|
|
int verbose=0; |
|
|
|
|
|
|
|
int lc = 0; |
|
|
|
int getals(istream& inp,int& m, int *a,int& n, int *b) |
|
{ |
|
char w[MAX_WORD], dummy[10]; |
|
int i,j,freq; |
|
if (inp >> freq) { |
|
++lc; |
|
|
|
inp >> n; |
|
assert(n<MAX_N); |
|
for (i=1; i<=n; i++) { |
|
inp >> setw(MAX_WORD) >> w; |
|
if (strlen(w)>=MAX_WORD-1) { |
|
cerr << lc << ": target len=" << strlen(w) << " is not less than MAX_WORD-1=" |
|
<< MAX_WORD-1 << endl; |
|
assert(strlen(w)<MAX_WORD-1); |
|
} |
|
} |
|
|
|
inp >> dummy; |
|
|
|
for (i=1; i<=n; i++) inp >> b[i]; |
|
|
|
|
|
inp >> m; |
|
assert(m<MAX_M); |
|
for (j=1; j<=m; j++) { |
|
inp >> setw(MAX_WORD) >> w; |
|
if (strlen(w)>=MAX_WORD-1) { |
|
cerr << lc << ": source len=" << strlen(w) << " is not less than MAX_WORD-1=" |
|
<< MAX_WORD-1 << endl; |
|
assert(strlen(w)<MAX_WORD-1); |
|
} |
|
} |
|
|
|
inp >> dummy; |
|
|
|
|
|
for (j=1; j<=m; j++) { |
|
inp >> a[j]; |
|
assert(0<=a[j] && a[j]<=n); |
|
} |
|
|
|
|
|
for (i=1; i<=n; i++) |
|
assert(0<=b[i] && b[i]<=m); |
|
|
|
return 1; |
|
|
|
} else |
|
return 0; |
|
} |
|
|
|
|
|
|
|
int prunionalignment(ostream& out,int m,int *a,int n,int* b) |
|
{ |
|
|
|
ostringstream sout; |
|
|
|
for (int j=1; j<=m; j++) |
|
if (a[j]) |
|
sout << j-1 << "-" << a[j]-1 << " "; |
|
|
|
for (int i=1; i<=n; i++) |
|
if (b[i] && a[b[i]]!=i) |
|
sout << b[i]-1 << "-" << i-1 << " "; |
|
|
|
|
|
string str = sout.str(); |
|
if (str.length() == 0) |
|
str = "\n"; |
|
else |
|
str.replace(str.length()-1,1,"\n"); |
|
|
|
out << str; |
|
out.flush(); |
|
|
|
return 1; |
|
} |
|
|
|
|
|
|
|
|
|
int printersect(ostream& out,int m,int *a,int n,int* b) |
|
{ |
|
|
|
ostringstream sout; |
|
|
|
for (int j=1; j<=m; j++) |
|
if (a[j] && b[a[j]]==j) |
|
sout << j-1 << "-" << a[j]-1 << " "; |
|
|
|
|
|
string str = sout.str(); |
|
if (str.length() == 0) |
|
str = "\n"; |
|
else |
|
str.replace(str.length()-1,1,"\n"); |
|
|
|
out << str; |
|
out.flush(); |
|
|
|
return 1; |
|
} |
|
|
|
|
|
|
|
int printtgttosrc(ostream& out,int m,int *a,int n,int* b) |
|
{ |
|
|
|
ostringstream sout; |
|
|
|
for (int i=1; i<=n; i++) |
|
if (b[i]) |
|
sout << b[i]-1 << "-" << i-1 << " "; |
|
|
|
|
|
string str = sout.str(); |
|
if (str.length() == 0) |
|
str = "\n"; |
|
else |
|
str.replace(str.length()-1,1,"\n"); |
|
|
|
out << str; |
|
out.flush(); |
|
|
|
return 1; |
|
} |
|
|
|
|
|
|
|
int printsrctotgt(ostream& out,int m,int *a,int n,int* b) |
|
{ |
|
|
|
ostringstream sout; |
|
|
|
for (int j=1; j<=m; j++) |
|
if (a[j]) |
|
sout << j-1 << "-" << a[j]-1 << " "; |
|
|
|
|
|
string str = sout.str(); |
|
if (str.length() == 0) |
|
str = "\n"; |
|
else |
|
str.replace(str.length()-1,1,"\n"); |
|
|
|
out << str; |
|
out.flush(); |
|
|
|
return 1; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool isfinal=false,bool bothuncovered=false) |
|
{ |
|
|
|
ostringstream sout; |
|
|
|
vector <pair <int,int> > neighbors; |
|
|
|
pair <int,int> entry; |
|
|
|
neighbors.push_back(make_pair(-1,-0)); |
|
neighbors.push_back(make_pair(0,-1)); |
|
neighbors.push_back(make_pair(1,0)); |
|
neighbors.push_back(make_pair(0,1)); |
|
|
|
|
|
if (diagonal) { |
|
neighbors.push_back(make_pair(-1,-1)); |
|
neighbors.push_back(make_pair(-1,1)); |
|
neighbors.push_back(make_pair(1,-1)); |
|
neighbors.push_back(make_pair(1,1)); |
|
} |
|
|
|
|
|
int i,j; |
|
size_t o; |
|
|
|
|
|
|
|
|
|
memset(fa,0,(m+1)*sizeof(int)); |
|
memset(ea,0,(n+1)*sizeof(int)); |
|
|
|
|
|
|
|
|
|
for (int i=1; i<=n; i++) memset(A[i],0,(m+1)*sizeof(int)); |
|
|
|
set <pair <int,int> > currentpoints; |
|
set <pair <int,int> > unionalignment; |
|
|
|
pair <int,int> point; |
|
set<pair <int,int> >::const_iterator k; |
|
|
|
|
|
for (j=1; j<=m; j++) { |
|
if (a[j]) { |
|
unionalignment.insert(make_pair(a[j],j)); |
|
if (b[a[j]]==j) { |
|
fa[j]=1; |
|
ea[a[j]]=1; |
|
A[a[j]][j]=2; |
|
currentpoints.insert(make_pair(a[j],j)); |
|
} else |
|
A[a[j]][j]=-1; |
|
} |
|
} |
|
|
|
for (i=1; i<=n; i++) |
|
if (b[i] && a[b[i]]!=i) { |
|
unionalignment.insert(make_pair(i,b[i])); |
|
A[i][b[i]]=1; |
|
} |
|
|
|
|
|
int added=1; |
|
|
|
while (added) { |
|
added=0; |
|
|
|
for (k=currentpoints.begin(); k!=currentpoints.end(); k++) { |
|
|
|
for (o=0; o<neighbors.size(); o++) { |
|
|
|
point.first=k->first+neighbors[o].first; |
|
point.second=k->second+neighbors[o].second; |
|
|
|
|
|
if (point.first>0 && point.first <=n && point.second>0 && point.second<=m) |
|
|
|
if (b[point.first]==point.second || a[point.second]==point.first) { |
|
|
|
|
|
if (!(ea[point.first] && fa[point.second])) { |
|
|
|
currentpoints.insert(point); |
|
A[point.first][point.second]=2; |
|
ea[point.first]=1; |
|
fa[point.second]=1; |
|
added=1; |
|
|
|
} |
|
} |
|
} |
|
} |
|
} |
|
|
|
if (isfinal) { |
|
for (k=unionalignment.begin(); k!=unionalignment.end(); k++) |
|
if (A[k->first][k->second]==1) { |
|
point.first=k->first; |
|
point.second=k->second; |
|
|
|
|
|
if ((bothuncovered && !ea[point.first] && !fa[point.second]) || |
|
(!bothuncovered && !(ea[point.first] && fa[point.second]))) { |
|
|
|
currentpoints.insert(point); |
|
A[point.first][point.second]=2; |
|
|
|
ea[point.first]=1; |
|
fa[point.second]=1; |
|
|
|
|
|
|
|
} |
|
} |
|
|
|
for (k=unionalignment.begin(); k!=unionalignment.end(); k++) |
|
if (A[k->first][k->second]==-1) { |
|
point.first=k->first; |
|
point.second=k->second; |
|
|
|
|
|
if ((bothuncovered && !ea[point.first] && !fa[point.second]) || |
|
(!bothuncovered && !(ea[point.first] && fa[point.second]))) { |
|
|
|
currentpoints.insert(point); |
|
A[point.first][point.second]=2; |
|
|
|
ea[point.first]=1; |
|
fa[point.second]=1; |
|
|
|
|
|
|
|
} |
|
} |
|
} |
|
|
|
|
|
for (k=currentpoints.begin(); k!=currentpoints.end(); k++) |
|
sout << k->second-1 << "-" << k->first-1 << " "; |
|
|
|
|
|
|
|
string str = sout.str(); |
|
if (str.length() == 0) |
|
str = "\n"; |
|
else |
|
str.replace(str.length()-1,1,"\n"); |
|
|
|
out << str; |
|
out.flush(); |
|
return 1; |
|
|
|
return 1; |
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char** argv) |
|
{ |
|
|
|
int alignment=0; |
|
char* input= NULL; |
|
char* output= NULL; |
|
int diagonal=false; |
|
int isfinal=false; |
|
int bothuncovered=false; |
|
|
|
|
|
DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum, |
|
"alignment", CMDENUMTYPE, &alignment, AlignEnum, |
|
"d", CMDENUMTYPE, &diagonal, BoolEnum, |
|
"diagonal", CMDENUMTYPE, &diagonal, BoolEnum, |
|
"f", CMDENUMTYPE, &isfinal, BoolEnum, |
|
"final", CMDENUMTYPE, &isfinal, BoolEnum, |
|
"b", CMDENUMTYPE, &bothuncovered, BoolEnum, |
|
"both", CMDENUMTYPE, &bothuncovered, BoolEnum, |
|
"i", CMDSTRINGTYPE, &input, |
|
"o", CMDSTRINGTYPE, &output, |
|
"v", CMDENUMTYPE, &verbose, BoolEnum, |
|
"verbose", CMDENUMTYPE, &verbose, BoolEnum, |
|
|
|
NULL); |
|
|
|
GetParams(&argc, &argv, NULL); |
|
|
|
if (alignment==0) { |
|
cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n" |
|
<< "Input file or std must be in .bal format (see script giza2bal.pl).\n"; |
|
|
|
exit(1); |
|
} |
|
|
|
istream *inp = &std::cin; |
|
ostream *out = &std::cout; |
|
|
|
try { |
|
if (input) { |
|
fstream *fin = new fstream(input,ios::in); |
|
if (!fin->is_open()) throw runtime_error("cannot open " + string(input)); |
|
inp = fin; |
|
} |
|
|
|
if (output) { |
|
fstream *fout = new fstream(output,ios::out); |
|
if (!fout->is_open()) throw runtime_error("cannot open " + string(output)); |
|
out = fout; |
|
} |
|
|
|
int a[MAX_M],b[MAX_N],m,n; |
|
fa=new int[MAX_M+1]; |
|
ea=new int[MAX_N+1]; |
|
|
|
|
|
int sents = 0; |
|
A=new int *[MAX_N+1]; |
|
for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1]; |
|
|
|
switch (alignment) { |
|
case UNION: |
|
cerr << "symal: computing union alignment\n"; |
|
while(getals(*inp,m,a,n,b)) { |
|
prunionalignment(*out,m,a,n,b); |
|
sents++; |
|
} |
|
cerr << "Sents: " << sents << endl; |
|
break; |
|
case INTERSECT: |
|
cerr << "symal: computing intersect alignment\n"; |
|
while(getals(*inp,m,a,n,b)) { |
|
printersect(*out,m,a,n,b); |
|
sents++; |
|
} |
|
cerr << "Sents: " << sents << endl; |
|
break; |
|
case GROW: |
|
cerr << "symal: computing grow alignment: diagonal (" |
|
<< diagonal << ") final ("<< isfinal << ")" |
|
<< "both-uncovered (" << bothuncovered <<")\n"; |
|
|
|
while(getals(*inp,m,a,n,b)) |
|
printgrow(*out,m,a,n,b,diagonal,isfinal,bothuncovered); |
|
|
|
break; |
|
case TGTTOSRC: |
|
cerr << "symal: computing target-to-source alignment\n"; |
|
|
|
while(getals(*inp,m,a,n,b)) { |
|
printtgttosrc(*out,m,a,n,b); |
|
sents++; |
|
} |
|
cerr << "Sents: " << sents << endl; |
|
break; |
|
case SRCTOTGT: |
|
cerr << "symal: computing source-to-target alignment\n"; |
|
|
|
while(getals(*inp,m,a,n,b)) { |
|
printsrctotgt(*out,m,a,n,b); |
|
sents++; |
|
} |
|
cerr << "Sents: " << sents << endl; |
|
break; |
|
default: |
|
throw runtime_error("Unknown alignment"); |
|
} |
|
|
|
delete [] fa; |
|
delete [] ea; |
|
for (int i=1; i<=MAX_N; i++) delete [] A[i]; |
|
delete [] A; |
|
|
|
if (inp != &std::cin) { |
|
delete inp; |
|
} |
|
if (out != &std::cout) { |
|
delete inp; |
|
} |
|
} catch (const std::exception &e) { |
|
cerr << e.what() << std::endl; |
|
exit(1); |
|
} |
|
|
|
exit(0); |
|
} |
|
|