| #include <fstream> |
| #include <iostream> |
| #include <vector> |
| #include <sstream> |
| #include <map> |
| #include <set> |
| #include <cstdlib> |
|
|
|
|
| using namespace std; |
|
|
|
|
| int stringToInteger(string s) |
| { |
|
|
| istringstream buffer(s); |
| int some_int; |
| buffer >> some_int; |
| return some_int; |
| } |
|
|
| void loadInput(const char * fileName, vector <string> & input) |
| { |
|
|
| ifstream sr (fileName); |
| string line; |
|
|
| if(sr.is_open()) { |
| while(getline(sr , line )) { |
| input.push_back(line); |
| } |
|
|
| sr.close(); |
| } else { |
| cout<<"Unable to read "<<fileName<<endl; |
| exit(1); |
| } |
|
|
| } |
|
|
| void getWords(string s, vector <string> & currInput) |
| { |
| istringstream iss(s); |
| currInput.clear(); |
| do { |
| string sub; |
| iss >> sub; |
| currInput.push_back(sub); |
|
|
| } while (iss); |
|
|
| currInput.pop_back(); |
| } |
|
|
| string getTranslation(int index, vector < pair <string , vector <int> > > & gCepts , vector <string> & currF , map <string,int> & singletons) |
| { |
|
|
| string translation = ""; |
|
|
| vector <int> fSide = gCepts[index].second; |
| vector <int> :: iterator iter; |
|
|
| for (iter = fSide.begin(); iter != fSide.end(); iter++) { |
| if (iter != fSide.begin()) |
| translation += "^_^"; |
|
|
| translation+= currF[*iter]; |
| } |
|
|
| if(singletons.find(translation)==singletons.end()) { |
| return "_TRANS_" + gCepts[index].first + "_TO_" + translation + " "; |
| } |
|
|
| else { |
|
|
| return "_TRANS_SLF_ "; |
| } |
| } |
|
|
|
|
|
|
| int closestGap(map <int,string> gap,int j1, int & gp) |
| { |
|
|
| int dist=1172; |
| int value=-1; |
| int temp=0; |
| gp=0; |
| int opGap=0; |
|
|
| map <int,string> :: iterator iter; |
|
|
| iter=gap.end(); |
|
|
| do { |
| iter--; |
| |
|
|
| if(iter->first==j1 and iter->second=="Unfilled") { |
| opGap++; |
| gp = opGap; |
| return j1; |
| } |
|
|
| if(iter->second =="Unfilled") { |
| opGap++; |
| temp = iter->first - j1; |
|
|
| if(temp<0) |
| temp=temp * -1; |
|
|
| if(dist>temp && iter->first < j1) { |
| dist=temp; |
| value=iter->first; |
| gp=opGap; |
| } |
| } |
|
|
|
|
| } while(iter!=gap.begin()); |
|
|
| |
| return value; |
| } |
|
|
|
|
| void generateStory(vector <pair <string , vector <int> > > & gCepts, set <int> & targetNullWords, vector<string> & currF, map <string,int> & singletons) |
| { |
|
|
| int fl = 0; |
| int i = 0; |
| int j = 0; |
| int N = gCepts.size(); |
| int k = 0; |
| int E = 0; |
| int j1 = 0; |
| int Li =0; |
| int Lj=0; |
| map <int,int > generated; |
| map <int,string> gap; |
| map <int,int> :: iterator iter; |
| int gp=0; |
| |
|
|
| while (targetNullWords.find(j) != targetNullWords.end()) { |
| cout<<"_INS_"<<currF[j]<<" "; |
| generated[j]=-1; |
| j=j+1; |
| } |
|
|
| while (i < gCepts.size() && gCepts[i].second.size() == 0) { |
| cout<<"_DEL_"<<gCepts[i].first<<" "; |
| i=i+1; |
| } |
|
|
| E=j; |
|
|
| while (i<N) { |
|
|
| |
| |
|
|
| Li = gCepts[i].second.size(); |
| j1 = gCepts[i].second[k]; |
|
|
| |
|
|
| if(j<j1) { |
| iter = generated.find(j); |
| if( iter == generated.end()) { |
| cout<<"_INS_GAP_ "; |
| gap[j] = "Unfilled"; |
| } |
|
|
| if (j==E) { |
| j=j1; |
| } else { |
| cout<<"_JMP_FWD_ "; |
| j=E; |
| } |
|
|
| } |
|
|
| if(j1<j) { |
| iter = generated.find(j); |
| if(j<E && iter == generated.end()) { |
|
|
| cout<<"_INS_GAP_ "; |
| gap[j]="Unfilled"; |
| } |
|
|
| j=closestGap(gap,j1,gp); |
| |
| cout<<"_JMP_BCK_"<<gp<<" "; |
|
|
| if(j==j1) |
| gap[j]="Filled"; |
|
|
| } |
|
|
| if(j<j1) { |
| cout<<"_INS_GAP_ "; |
| gap[j] = "Unfilled"; |
| j=j1; |
| } |
|
|
| if(k==0) { |
| cout<<getTranslation(i, gCepts,currF,singletons); |
| } else { |
| cout<<"_CONT_CEPT_ "; |
| } |
| generated[j]=i; |
| j=j+1; |
| k=k+1; |
|
|
| while(targetNullWords.find(j) != targetNullWords.end()) { |
| |
| cout<<"_INS_"<<currF[j]<<" "; |
| generated[j]=-1; |
| j=j+1; |
| } |
|
|
| if(E<j) |
| E=j; |
| |
| if(k==Li) { |
| i=i+1; |
| k=0; |
|
|
| while(i < gCepts.size() && gCepts[i].second.size() == 0) { |
| cout<<"_DEL_"<<gCepts[i].first<<" "; |
| i=i+1; |
|
|
| } |
|
|
| } |
|
|
| } |
|
|
| cout<<endl; |
| } |
|
|
|
|
|
|
| void ceptsInGenerativeStoryFormat(vector < pair < set <int> , set <int> > > & ceptsInPhrase , vector < pair < string , vector <int> > > & gCepts , set <int> & sourceNullWords, vector <string> & currE) |
| { |
|
|
| gCepts.clear(); |
| set <int> eSide; |
| set <int> fSide; |
| std::set <int> :: iterator iter; |
| string english; |
| vector <int> germanIndex; |
| int engIndex = 0; |
| int prev=0; |
| int curr; |
| set <int> engDone; |
|
|
|
|
| for (int i = 0; i< ceptsInPhrase.size(); i++) { |
| english = ""; |
| germanIndex.clear(); |
| fSide = ceptsInPhrase[i].first; |
| eSide = ceptsInPhrase[i].second; |
|
|
|
|
| while(engIndex < *eSide.begin()) { |
| |
|
|
| while(engDone.find(engIndex) != engDone.end()) |
| engIndex++; |
|
|
| while(sourceNullWords.find(engIndex) != sourceNullWords.end()) { |
| english = currE[engIndex]; |
| engIndex++; |
| gCepts.push_back(make_pair (english , germanIndex)); |
| english = ""; |
| } |
| } |
|
|
| for (iter = eSide.begin(); iter != eSide.end(); iter++) { |
| curr = *iter; |
|
|
| if(iter != eSide.begin()) { |
| english += "^_^"; |
|
|
| if (prev == curr-1) { |
| prev++; |
| engIndex++; |
| } else |
| engDone.insert(curr); |
| } else { |
| prev = curr; |
| |
| engIndex = prev+1; |
| } |
| english +=currE[curr]; |
|
|
| } |
|
|
| for (iter = fSide.begin(); iter != fSide.end(); iter++) { |
| germanIndex.push_back(*iter); |
| } |
|
|
| gCepts.push_back(make_pair (english , germanIndex)); |
| |
|
|
| } |
|
|
| english = ""; |
| germanIndex.clear(); |
|
|
| |
| |
|
|
| while(engIndex < currE.size()) { |
| |
| while(engDone.find(engIndex) != engDone.end()) |
| engIndex++; |
|
|
| while(sourceNullWords.find(engIndex) != sourceNullWords.end()) { |
| english = currE[engIndex]; |
| |
| engIndex++; |
| gCepts.push_back(make_pair (english , germanIndex)); |
| english = ""; |
| } |
| } |
|
|
| } |
|
|
| void printCepts(vector < pair < string , vector <int> > > & gCepts , vector <string> & currF) |
| { |
|
|
| string eSide; |
| vector <int> fSide; |
|
|
| for (int i = 0; i < gCepts.size(); i++) { |
|
|
| fSide = gCepts[i].second; |
| eSide = gCepts[i].first; |
|
|
| cout<<eSide; |
| cout<<" <---> "; |
|
|
| for (int j = 0; j < fSide.size(); j++) { |
| cout<<currF[fSide[j]]<<" "; |
| } |
|
|
| cout<<endl; |
| } |
|
|
| } |
|
|
| void getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT) |
| { |
| set <int> :: iterator iter; |
|
|
| int sz = eSide.size(); |
| vector <int> t; |
|
|
| for (iter = eSide.begin(); iter != eSide.end(); iter++) { |
| t = tS[*iter]; |
|
|
| for (int i = 0; i < t.size(); i++) { |
| fSide.insert(t[i]); |
| } |
|
|
| } |
|
|
| for (iter = fSide.begin(); iter != fSide.end(); iter++) { |
|
|
| t = sT[*iter]; |
|
|
| for (int i = 0 ; i<t.size(); i++) { |
| eSide.insert(t[i]); |
| } |
|
|
| } |
|
|
| if (eSide.size () > sz) { |
| getMeCepts(eSide,fSide,tS,sT); |
| } |
|
|
| } |
|
|
| void constructCepts(vector < pair < set <int> , set <int> > > & ceptsInPhrase, set <int> & sourceNullWords, set <int> & targetNullWords, vector <string> & alignment, int eSize, int fSize) |
| { |
|
|
| ceptsInPhrase.clear(); |
| sourceNullWords.clear(); |
| targetNullWords.clear(); |
|
|
| vector <int> align; |
|
|
| std::map <int , vector <int> > sT; |
| std::map <int , vector <int> > tS; |
| std::set <int> eSide; |
| std::set <int> fSide; |
| std::set <int> :: iterator iter; |
| std :: map <int , vector <int> > :: iterator iter2; |
| std :: pair < set <int> , set <int> > cept; |
| int src; |
| int tgt; |
| ceptsInPhrase.clear(); |
|
|
| for (int j=0; j<alignment.size(); j+=2) { |
| align.push_back(stringToInteger(alignment[j+1])); |
| align.push_back(stringToInteger(alignment[j])); |
| } |
|
|
| for (int i = 0; i < align.size(); i+=2) { |
| src = align[i]; |
| tgt = align[i+1]; |
| tS[tgt].push_back(src); |
| sT[src].push_back(tgt); |
| } |
|
|
| for (int i = 0; i< fSize; i++) { |
| if (sT.find(i) == sT.end()) { |
| targetNullWords.insert(i); |
| } |
| } |
|
|
| for (int i = 0; i< eSize; i++) { |
| if (tS.find(i) == tS.end()) { |
| sourceNullWords.insert(i); |
| } |
| } |
|
|
|
|
| while (tS.size() != 0 && sT.size() != 0) { |
|
|
| iter2 = tS.begin(); |
|
|
| eSide.clear(); |
| fSide.clear(); |
| eSide.insert (iter2->first); |
|
|
| getMeCepts(eSide, fSide, tS , sT); |
|
|
| for (iter = eSide.begin(); iter != eSide.end(); iter++) { |
| iter2 = tS.find(*iter); |
| tS.erase(iter2); |
| } |
|
|
| for (iter = fSide.begin(); iter != fSide.end(); iter++) { |
| iter2 = sT.find(*iter); |
| sT.erase(iter2); |
| } |
|
|
| cept = make_pair (fSide , eSide); |
| ceptsInPhrase.push_back(cept); |
| } |
|
|
| } |
|
|
| int main(int argc, char * argv[]) |
| { |
|
|
| vector <string> e; |
| vector <string> f; |
| vector <string> a; |
| vector <string> singletons; |
| map <string,int> sTons; |
| vector < pair < set <int> , set <int> > > ceptsInPhrase; |
| vector < pair < string , vector <int> > > gCepts; |
|
|
| set <int> sourceNullWords; |
| set <int> targetNullWords; |
|
|
| vector <string> currE; |
| vector <string> currF; |
| vector <string> currA; |
|
|
| loadInput(argv[4],singletons); |
|
|
| for(int i=0; i<singletons.size(); i++) |
| sTons[singletons[i]]=i; |
|
|
| loadInput(argv[1],e); |
| loadInput(argv[2],f); |
| loadInput(argv[3],a); |
|
|
|
|
| for (int i=0; i<a.size(); i++) { |
|
|
|
|
| getWords(e[i],currE); |
| getWords(f[i],currF); |
| getWords(a[i],currA); |
|
|
| constructCepts(ceptsInPhrase, sourceNullWords , targetNullWords, currA , currE.size(), currF.size()); |
| |
| ceptsInGenerativeStoryFormat(ceptsInPhrase , gCepts , sourceNullWords, currE); |
| |
| |
| generateStory(gCepts, targetNullWords ,currF,sTons); |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| } |
|
|
|
|
| return 0; |
|
|
| } |
|
|