Upload folder using huggingface_hub

41f6dd8 verified about 1 year ago

31.1 kB

	/**
	* Common lossy counting phrase extraction functionality implementation.
	*
	* Note: The bulk of this unit is based on Philipp Koehn's code from
	* phrase-extract/extract.cpp.
	*
	* (C) Moses: http://www.statmt.org/moses/
	* (C) Ceslav Przywara, UFAL MFF UK, 2011
	*
	* $Id$
	*/

	#include <iostream>
	#include <iomanip>
	#include <sstream>

	#include "phrase-extract.h"
	#include "ISS.h"
	// I'm using my own version of SafeGetline (without "using namespace std;"):
	#include "SafeGetline.h"


	#define LINE_MAX_LENGTH 60000


	//////// Helping functions ////////

	// For sorted output.
	typedef std::pair<indexed_phrases_pair_t, PhrasePairsLossyCounter::frequency_t> output_pair_t;
	typedef std::vector<output_pair_t> output_vector_t;

	class PhraseComp {
	/** @var If true, sort by target phrase first. */
	bool _inverted;

	bool compareAlignments(const indexed_phrases_pair_t& a, const indexed_phrases_pair_t& b);

	int comparePhrases(const indexed_phrases_pair_t::phrase_t& a, const indexed_phrases_pair_t::phrase_t& b);

	public:
	PhraseComp(bool inverted): _inverted(inverted) {}

	bool operator()(const output_pair_t& a, const output_pair_t& b);
	};

	void processSortedOutput(OutputProcessor& processor);

	void processUnsortedOutput(OutputProcessor& processor);

	void flushPhrasePair(OutputProcessor& processor, const indexed_phrases_pair_t& indexedPhrasePair, PhrasePairsLossyCounter::frequency_t frequency, int mode);


	//////// Define variables declared as extern in the header /////////////////////
	bool allModelsOutputFlag = false;

	bool wordModel = false; // IBM word model.
	REO_MODEL_TYPE wordType = REO_MSD;
	bool phraseModel = false; // Std phrase-based model.
	REO_MODEL_TYPE phraseType = REO_MSD;
	bool hierModel = false; // Hierarchical model.
	REO_MODEL_TYPE hierType = REO_MSD;

	int maxPhraseLength = 0; // Eg. 7
	bool translationFlag = true; // Generate extract and extract.inv
	bool orientationFlag = false; // Ordering info needed?
	bool sortedOutput = false; // Sort output?

	LossyCountersVector lossyCounters;

	#ifdef GET_COUNTS_ONLY
	std::vector<size_t> phrasePairsCounters;
	#endif


	//////// Internal module variables /////////////////////////////////////////////

	IndexedStringsStorage<word_index_t> strings;
	IndexedStringsStorage<orientation_info_index_t> orientations;


	//////// Untouched Philipp Koehn's code :) /////////////////////////////////////

	REO_POS getOrientWordModel(SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
	bool connectedLeftTop, bool connectedRightTop,
	int startF, int endF, int startE, int endE, int countF, int zero, int unit,
	bool (ge)(int, int), bool (lt)(int, int) )
	{

	if( connectedLeftTop && !connectedRightTop)
	return LEFT;
	if(modelType == REO_MONO)
	return UNKNOWN;
	if (!connectedLeftTop && connectedRightTop)
	return RIGHT;
	if(modelType == REO_MSD)
	return UNKNOWN;
	for(int indexF=startF-2unit; (ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
	connectedLeftTop = isAligned(sentence, indexF, startE-unit);
	for(int indexF=endF+2unit; (lt)(indexF,countF) && !connectedRightTop; indexF=indexF+unit)
	connectedRightTop = isAligned(sentence, indexF, startE-unit);
	if(connectedLeftTop && !connectedRightTop)
	return DRIGHT;
	else if(!connectedLeftTop && connectedRightTop)
	return DLEFT;
	return UNKNOWN;
	}

	// to be called with countF-1 instead of countF
	REO_POS getOrientPhraseModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
	bool connectedLeftTop, bool connectedRightTop,
	int startF, int endF, int startE, int endE, int countF, int zero, int unit,
	bool (ge)(int, int), bool (lt)(int, int),
	const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft)
	{

	HSentenceVertices::const_iterator it;

	if((connectedLeftTop && !connectedRightTop) \|\|
	//(startE == 0 && startF == 0) \|\|
	//(startE == sentence.target.size()-1 && startF == sentence.source.size()-1) \|\|
	((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
	it->second.find(startF-unit) != it->second.end()))
	return LEFT;
	if(modelType == REO_MONO)
	return UNKNOWN;
	if((!connectedLeftTop && connectedRightTop) \|\|
	((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() && it->second.find(endF + unit) != it->second.end()))
	return RIGHT;
	if(modelType == REO_MSD)
	return UNKNOWN;
	connectedLeftTop = false;
	for(int indexF=startF-2unit; (ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit)
	if(connectedLeftTop = (it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
	it->second.find(indexF) != it->second.end())
	return DRIGHT;
	connectedRightTop = false;
	for(int indexF=endF+2unit; (lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit)
	if(connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomRight.end() &&
	it->second.find(indexF) != it->second.end())
	return DLEFT;
	return UNKNOWN;
	}

	// to be called with countF-1 instead of countF
	REO_POS getOrientHierModel (SentenceAlignment & sentence, REO_MODEL_TYPE modelType,
	bool connectedLeftTop, bool connectedRightTop,
	int startF, int endF, int startE, int endE, int countF, int zero, int unit,
	bool (ge)(int, int), bool (lt)(int, int),
	const HSentenceVertices & inBottomRight, const HSentenceVertices & inBottomLeft,
	const HSentenceVertices & outBottomRight, const HSentenceVertices & outBottomLeft,
	REO_POS phraseOrient)
	{

	HSentenceVertices::const_iterator it;

	if(phraseOrient == LEFT \|\|
	(connectedLeftTop && !connectedRightTop) \|\|
	// (startE == 0 && startF == 0) \|\|
	//(startE == sentence.target.size()-1 && startF == sentence.source.size()-1) \|\|
	((it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
	it->second.find(startF-unit) != it->second.end()) \|\|
	((it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
	it->second.find(startF-unit) != it->second.end()))
	return LEFT;
	if(modelType == REO_MONO)
	return UNKNOWN;
	if(phraseOrient == RIGHT \|\|
	(!connectedLeftTop && connectedRightTop) \|\|
	((it = inBottomLeft.find(startE - unit)) != inBottomLeft.end() &&
	it->second.find(endF + unit) != it->second.end()) \|\|
	((it = outBottomLeft.find(startE - unit)) != outBottomLeft.end() &&
	it->second.find(endF + unit) != it->second.end()))
	return RIGHT;
	if(modelType == REO_MSD)
	return UNKNOWN;
	if(phraseOrient != UNKNOWN)
	return phraseOrient;
	connectedLeftTop = false;
	for(int indexF=startF-2unit; (ge)(indexF, zero) && !connectedLeftTop; indexF=indexF-unit) {
	if((connectedLeftTop = (it = inBottomRight.find(startE - unit)) != inBottomRight.end() &&
	it->second.find(indexF) != it->second.end()) \|\|
	(connectedLeftTop = (it = outBottomRight.find(startE - unit)) != outBottomRight.end() &&
	it->second.find(indexF) != it->second.end()))
	return DRIGHT;
	}
	connectedRightTop = false;
	for(int indexF=endF+2unit; (lt)(indexF, countF) && !connectedRightTop; indexF=indexF+unit) {
	if((connectedRightTop = (it = inBottomLeft.find(startE - unit)) != inBottomRight.end() &&
	it->second.find(indexF) != it->second.end()) \|\|
	(connectedRightTop = (it = outBottomLeft.find(startE - unit)) != outBottomRight.end() &&
	it->second.find(indexF) != it->second.end()))
	return DLEFT;
	}
	return UNKNOWN;
	}

	void insertVertex( HSentenceVertices & corners, int x, int y )
	{
	std::set<int> tmp;
	tmp.insert(x);
	std::pair< HSentenceVertices::iterator, bool > ret = corners.insert( std::pair<int, std::set<int> > (y, tmp) );
	if(ret.second == false) {
	ret.first->second.insert(x);
	}
	}

	void insertPhraseVertices(
	HSentenceVertices & topLeft,
	HSentenceVertices & topRight,
	HSentenceVertices & bottomLeft,
	HSentenceVertices & bottomRight,
	int startF, int startE, int endF, int endE)
	{

	insertVertex(topLeft, startF, startE);
	insertVertex(topRight, endF, startE);
	insertVertex(bottomLeft, startF, endE);
	insertVertex(bottomRight, endF, endE);
	}

	std::string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
	{
	switch(orient) {
	case LEFT:
	return "mono";
	break;
	case RIGHT:
	return "swap";
	break;
	case DRIGHT:
	return "dright";
	break;
	case DLEFT:
	return "dleft";
	break;
	case UNKNOWN:
	switch(modelType) {
	case REO_MONO:
	return "nomono";
	break;
	case REO_MSD:
	return "other";
	break;
	case REO_MSLR:
	return "dright";
	break;
	}
	break;
	}
	}

	bool ge(int first, int second)
	{
	return first >= second;
	}

	bool le(int first, int second)
	{
	return first <= second;
	}

	bool lt(int first, int second)
	{
	return first < second;
	}

	bool isAligned ( SentenceAlignment &sentence, int fi, int ei )
	{
	if (ei == -1 && fi == -1)
	return true;
	if (ei <= -1 \|\| fi <= -1)
	return false;
	if (ei == sentence.target.size() && fi == sentence.source.size())
	return true;
	if (ei >= sentence.target.size() \|\| fi >= sentence.source.size())
	return false;
	for(int i=0; i<sentence.alignedToT[ei].size(); i++)
	if (sentence.alignedToT[ei][i] == fi)
	return true;
	return false;
	}

	//////// END OF untouched Philipp Koehn's code :) //////////////////////////////


	/////// Slightly modified Philipp Koehn's code :) //////////////////////////////

	void extract(SentenceAlignment &sentence) {

	int countE = sentence.target.size();
	int countF = sentence.source.size();

	HPhraseVector inboundPhrases;

	HSentenceVertices inTopLeft;
	HSentenceVertices inTopRight;
	HSentenceVertices inBottomLeft;
	HSentenceVertices inBottomRight;

	HSentenceVertices outTopLeft;
	HSentenceVertices outTopRight;
	HSentenceVertices outBottomLeft;
	HSentenceVertices outBottomRight;

	HSentenceVertices::const_iterator it;

	bool relaxLimit = hierModel;
	bool buildExtraStructure = phraseModel \|\| hierModel;

	// check alignments for target phrase startE...endE
	// loop over extracted phrases which are compatible with the word-alignments
	for (int startE = 0; startE < countE; startE++) {
	for (
	int endE = startE;
	((endE < countE) && (relaxLimit \|\| (endE < (startE + maxPhraseLength))));
	endE++
	) {

	int minF = 9999;
	int maxF = -1;
	std::vector< int > usedF = sentence.alignedCountS;

	for (int ei = startE; ei <= endE; ei++) {
	for (int i = 0; i < sentence.alignedToT[ei].size(); i++) {
	int fi = sentence.alignedToT[ei][i];
	if (fi < minF) {
	minF = fi;
	}
	if (fi > maxF) {
	maxF = fi;
	}
	usedF[ fi ]--;
	}
	}

	if (maxF >= 0 && // aligned to any source words at all
	(relaxLimit \|\| maxF-minF < maxPhraseLength)) { // source phrase within limits

	// check if source words are aligned to out of bound target words
	bool out_of_bounds = false;

	for (int fi=minF; fi<=maxF && !out_of_bounds; fi++) {
	if (usedF[fi]>0) {
	// cout << "ouf of bounds: " << fi << "\n";
	out_of_bounds = true;
	}
	}

	// cout << "doing if for ( " << minF << "-" << maxF << ", " << startE << "," << endE << ")\n";
	if (!out_of_bounds) {
	// start point of source phrase may retreat over unaligned
	for (int startF=minF;
	(startF>=0 &&
	(relaxLimit \|\| startF>maxF-maxPhraseLength) && // within length limit
	(startF==minF \|\| sentence.alignedCountS[startF]==0)); // unaligned
	startF--
	)
	// end point of source phrase may advance over unaligned
	for (int endF=maxF;
	(endF<countF &&
	(relaxLimit \|\| endF<startF+maxPhraseLength) && // within length limit
	(endF==maxF \|\| sentence.alignedCountS[endF]==0)); // unaligned
	endF++
	) { // at this point we have extracted a phrase
	if (buildExtraStructure) { // phrase \|\| hier
	if (endE-startE < maxPhraseLength && endF-startF < maxPhraseLength) { // within limit
	inboundPhrases.push_back(
	HPhrase(HPhraseVertex(startF,startE), HPhraseVertex(endF,endE))
	);
	insertPhraseVertices(
	inTopLeft, inTopRight, inBottomLeft, inBottomRight,
	startF, startE, endF, endE
	);
	} else {
	insertPhraseVertices(
	outTopLeft, outTopRight, outBottomLeft, outBottomRight,
	startF, startE, endF, endE
	);
	}
	} else {
	std::string orientationInfo = "";
	if (orientationFlag && wordModel) { // Added orientationFlag check.
	REO_POS wordPrevOrient, wordNextOrient;
	bool connectedLeftTopP = isAligned( sentence, startF-1, startE-1 );
	bool connectedRightTopP = isAligned( sentence, endF+1, startE-1 );
	bool connectedLeftTopN = isAligned( sentence, endF+1, endE+1 );
	bool connectedRightTopN = isAligned( sentence, startF-1, endE+1 );
	wordPrevOrient = getOrientWordModel(sentence, wordType, connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
	wordNextOrient = getOrientWordModel(sentence, wordType, connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
	orientationInfo += getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType);
	}
	addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
	}
	}
	}
	}
	}
	} // end of main for loop

	if (buildExtraStructure) { // phrase \|\| hier
	std::string orientationInfo = "";
	REO_POS wordPrevOrient, wordNextOrient, phrasePrevOrient, phraseNextOrient, hierPrevOrient, hierNextOrient;

	for (int i = 0; i < inboundPhrases.size(); i++) {
	int startF = inboundPhrases[i].first.first;
	int startE = inboundPhrases[i].first.second;
	int endF = inboundPhrases[i].second.first;
	int endE = inboundPhrases[i].second.second;

	if ( orientationFlag ) { // Added orientationFlag check.

	bool connectedLeftTopP = isAligned( sentence, startF-1, startE-1 );
	bool connectedRightTopP = isAligned( sentence, endF+1, startE-1 );
	bool connectedLeftTopN = isAligned( sentence, endF+1, endE+1 );
	bool connectedRightTopN = isAligned( sentence, startF-1, endE+1 );

	if (wordModel) {
	wordPrevOrient = getOrientWordModel(sentence, wordType,
	connectedLeftTopP, connectedRightTopP,
	startF, endF, startE, endE, countF, 0, 1,
	&ge, &lt);

	wordNextOrient = getOrientWordModel(sentence, wordType,
	connectedLeftTopN, connectedRightTopN,
	endF, startF, endE, startE, 0, countF, -1,
	&lt, &ge);
	}
	if (phraseModel) {
	phrasePrevOrient = getOrientPhraseModel(sentence, phraseType,
	connectedLeftTopP, connectedRightTopP,
	startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft);
	phraseNextOrient = getOrientPhraseModel(sentence, phraseType,
	connectedLeftTopN, connectedRightTopN,
	endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight);
	} else {
	phrasePrevOrient = phraseNextOrient = UNKNOWN;
	}
	if(hierModel) {
	hierPrevOrient = getOrientHierModel(sentence, hierType,
	connectedLeftTopP, connectedRightTopP,
	startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
	hierNextOrient = getOrientHierModel(sentence, hierType,
	connectedLeftTopN, connectedRightTopN,
	endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
	}

	orientationInfo = ((wordModel)? getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType) : "") + " \| " +
	((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : "") + " \| " +
	((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : "");
	}

	addPhrase(sentence, startE, endE, startF, endF, orientationInfo);

	} // end of for loop through inbound phrases

	} // end if buildExtraStructure

	} // end of extract()


	/**
	* @param sentence
	* @param startE
	* @param endE
	* @param startF
	* @param endF
	* @param orientationInfo
	*/
	void addPhrase(SentenceAlignment &sentence, int startE, int endE, int startF, int endF, std::string &orientationInfo) {

	#ifdef GET_COUNTS_ONLY
	// Just get the length of phrase pair (which is now defined as maximum of the two).
	phrasePairsCounters[std::max(endF - startF, endE - startE) + 1] += 1; // Don't forget +1 (span is inclusive)!
	#else
	alignment_t alignment;

	// alignment
	for (int ei = startE; ei <= endE; ++ei) {
	for (int i = 0; i < sentence.alignedToT[ei].size(); ++i) {
	int fi = sentence.alignedToT[ei][i];
	alignment.push_back(alignment_t::value_type(fi-startF, ei-startE));
	}
	}

	indexed_phrases_pair_t::phrase_t srcPhraseIndices, tgtPhraseIndices;

	// source phrase
	for (int fi = startF; fi <= endF; ++fi) {
	srcPhraseIndices.push_back(strings.put(sentence.source[fi].c_str()));
	}

	// target phrase
	for (int ei = startE; ei <= endE; ++ei) {
	tgtPhraseIndices.push_back(strings.put(sentence.target[ei].c_str()));
	}

	// TODO: Allow for switching between min and max here.
	size_t idx = std::max(srcPhraseIndices.size(), tgtPhraseIndices.size());

	// Add phrase pair.
	lossyCounters[idx]->lossyCounter.add(indexed_phrases_pair_t(srcPhraseIndices, tgtPhraseIndices, orientations.put(orientationInfo.c_str()), alignment));
	//
	if ( lossyCounters[idx]->lossyCounter.aboutToPrune() ) {
	// Next addition will lead to pruning, inform:
	std::cerr << 'P' << idx << std::flush;
	}
	#endif
	} // end of addPhrase()


	/////// Lossy Counting related code ////////////////////////////////////////////

	void readInput(std::istream& eFile, std::istream& fFile, std::istream& aFile) {

	// Note: moved out of the loop.
	char englishString[LINE_MAX_LENGTH];
	char foreignString[LINE_MAX_LENGTH];
	char alignmentString[LINE_MAX_LENGTH];

	int i = 0;

	while(true) {
	// Report progress?
	if (++i%10000 == 0) std::cerr << "." << std::flush;

	SAFE_GETLINE(eFile, englishString, LINE_MAX_LENGTH, '\n', __FILE__);
	if (eFile.eof()) break;
	SAFE_GETLINE(fFile, foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
	SAFE_GETLINE(aFile, alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);

	SentenceAlignment sentence;

	if (sentence.create(englishString, foreignString, alignmentString, i)) {
	extract(sentence);
	}
	}

	}


	void processOutput(OutputProcessor& processor) {
	if ( sortedOutput ) {
	processSortedOutput(processor);
	}
	else {
	processUnsortedOutput(processor);
	}
	}


	bool PhraseComp::operator()(const output_pair_t& a, const output_pair_t& b) {

	int cmp = _inverted ? comparePhrases(a.first.tgtPhrase(), b.first.tgtPhrase()) : comparePhrases(a.first.srcPhrase(), b.first.srcPhrase());

	if ( cmp == 0 ) {
	// First part of pairs matches, compare the second part.
	cmp = _inverted ? comparePhrases(a.first.srcPhrase(), b.first.srcPhrase()) : comparePhrases(a.first.tgtPhrase(), b.first.tgtPhrase());

	if ( cmp == 0 ) {
	// Also second part matches, compare alignments.
	return compareAlignments(a.first, b.first);
	}
	else {
	return cmp < 0;
	}
	}
	else {
	return cmp < 0;
	}

	}


	bool PhraseComp::compareAlignments(const indexed_phrases_pair_t& a, const indexed_phrases_pair_t& b) {

	size_t aSize = a.alignmentLength();
	size_t bSize = b.alignmentLength();
	size_t min = std::min(aSize, bSize);
	const indexed_phrases_pair_t::alignment_point_t * aAlignment = a.alignmentData();
	const indexed_phrases_pair_t::alignment_point_t * bAlignment = b.alignmentData();

	int cmp = 0;
	for ( size_t i = 0; i < min; ++i ) {
	// Important: alignments have to be eventually inverted as well!
	if ( _inverted ) {
	// Inverted = compare TGT phrase alignment points first.
	cmp = memcmp(aAlignment + i2 + 1, bAlignment + i2 + 1, sizeof(indexed_phrases_pair_t::alignment_point_t));
	}
	else{
	// NOT inverted = compare SRC phrase alignment points first.
	cmp = memcmp(aAlignment+ i2, bAlignment + i2, sizeof(indexed_phrases_pair_t::alignment_point_t));
	}
	if ( cmp == 0 ) {
	if ( _inverted ) {
	// Inverted = compare SRC phrase alignment points second.
	cmp = memcmp(aAlignment + i2, bAlignment + i2, sizeof(indexed_phrases_pair_t::alignment_point_t));
	}
	else{
	// NOT inverted = compare TGT phrase alignment points second.
	cmp = memcmp(aAlignment + i2 + 1, bAlignment + i2 + 1, sizeof(indexed_phrases_pair_t::alignment_point_t));
	}
	if ( cmp != 0 ) {
	return cmp < 0;
	} // Otherwise continue looping.
	}
	else {
	return cmp < 0;
	}
	}

	// Note: LC_ALL=C GNU sort treats shorter item as lesser than longer one.
	return (cmp == 0) ? (aSize < bSize) : (cmp < 0);

	}


	int PhraseComp::comparePhrases(const indexed_phrases_pair_t::phrase_t& a, const indexed_phrases_pair_t::phrase_t& b) {

	size_t aSize = a.size();
	size_t bSize = b.size();
	size_t min = std::min(aSize, bSize);
	int cmp = 0;

	for ( size_t i = 0; i < min; ++i ) {
	cmp = strcmp(strings.get(a[i]), strings.get(b[i]));
	if ( cmp != 0 ) {
	return cmp;
	}
	}

	if ( aSize == bSize ) {
	return 0;
	}

	if ( aSize < bSize ) {
	return strcmp("\|\|\|", strings.get(b[min]));
	}
	else {
	return strcmp(strings.get(a[min]), "\|\|\|");
	}

	}


	void processSortedOutput(OutputProcessor& processor) {

	output_vector_t output;

	LossyCountersVector::value_type current = NULL, prev = NULL;

	for ( size_t i = 1; i < lossyCounters.size(); ++i ) { // Intentionally skip 0.
	current = lossyCounters[i];
	if ( current != prev ) {
	PhrasePairsLossyCounter& lossyCounter = current->lossyCounter;
	for ( PhrasePairsLossyCounter::erasing_iterator phraseIter = lossyCounter.beginErase(); phraseIter != lossyCounter.endErase(); ++phraseIter ) {
	// Store and...
	output.push_back(std::make_pair(phraseIter.item(), phraseIter.frequency()));
	// ...update counters.
	current->outputMass += phraseIter.frequency();
	current->outputSize += 1;
	}
	//
	prev = current;
	//delete current;
	}
	}

	// Sort by source phrase.
	std::sort(output.begin(), output.end(), PhraseComp(false));

	// Print.
	for ( output_vector_t::const_iterator iter = output.begin(); iter != output.end(); ++iter ) {
	flushPhrasePair(processor, iter->first, iter->second, 1);
	}

	// Sort by target phrase.
	std::sort(output.begin(), output.end(), PhraseComp(true));

	// Print.
	for ( output_vector_t::const_iterator iter = output.begin(); iter != output.end(); ++iter ) {
	flushPhrasePair(processor, iter->first, iter->second, -1);
	}

	}


	void processUnsortedOutput(OutputProcessor& processor) {

	LossyCountersVector::value_type current = NULL, prev = NULL;

	for ( size_t i = 1; i < lossyCounters.size(); ++i ) { // Intentionally skip 0.

	current = lossyCounters[i];

	if ( current != prev ) {

	const PhrasePairsLossyCounter& lossyCounter = current->lossyCounter;

	for ( PhrasePairsLossyCounter::const_iterator phraseIter = lossyCounter.begin(); phraseIter != lossyCounter.end(); ++phraseIter ) {
	// Flush and...
	flushPhrasePair(processor, phraseIter.item(), phraseIter.frequency(), 0);
	// ...update counters.
	current->outputMass += phraseIter.frequency();
	current->outputSize += 1;
	}

	//
	prev = current;
	}
	}

	}


	void flushPhrasePair(OutputProcessor& processor, const indexed_phrases_pair_t& indexedPhrasePair, PhrasePairsLossyCounter::frequency_t frequency, int mode = 0) {

	const indexed_phrases_pair_t::phrase_t srcPhraseIndices = indexedPhrasePair.srcPhrase();
	const indexed_phrases_pair_t::phrase_t tgtPhraseIndices = indexedPhrasePair.tgtPhrase();

	std::string srcPhrase, tgtPhrase;

	for ( indexed_phrases_pair_t::phrase_t::const_iterator indexIter = srcPhraseIndices.begin(); indexIter != srcPhraseIndices.end(); ++indexIter ) {
	srcPhrase += std::string(strings.get(*indexIter)) + " ";
	}
	srcPhrase.resize(srcPhrase.size() - 1); // Trim the trailing " "

	for ( indexed_phrases_pair_t::phrase_t::const_iterator indexIter = tgtPhraseIndices.begin(); indexIter != tgtPhraseIndices.end(); ++indexIter ) {
	tgtPhrase += std::string(strings.get(*indexIter)) + " ";
	}
	tgtPhrase.resize(tgtPhrase.size() - 1); // Trim the trailing " "

	// Actual processing is done via call to functor:
	processor(srcPhrase, tgtPhrase, orientations.get(indexedPhrasePair.orientationInfo()), indexedPhrasePair.alignment(), frequency, mode);
	}


	void printStats(void) {

	// Total counters.
	size_t outputMass = 0, outputSize = 0, N = 0;

	const std::string hline = "####################################################################################################################";

	std::cerr << "Lossy Counting Phrase Extraction statistics:" << std::endl;

	// Print header: \| 3 \| 15 \| 15 \| 15 \| 7 \| 10 \| 10 \| 10 \|
	std::cerr
	<< hline << std::endl
	<< "# length # unique out # total out # total in (N) # out/in (%) # pos. thr. # neg. thr. # max. err. #" << std::endl
	<< hline << std::endl;

	LossyCountersVector::value_type current = NULL, prev = NULL;
	size_t from = 1, to = 1;

	for ( size_t i = 1; i <= lossyCounters.size(); ++i ) { // Intentionally skip 0, intentionally increment till == size().

	current = (i < lossyCounters.size()) ? lossyCounters[i] : NULL;

	if ( (current == NULL) \|\| ((current != prev) && (prev != NULL)) ) {
	// Time to print.
	to = i-1;

	// Increment overall stats.
	outputMass += prev->outputMass;
	outputSize += prev->outputSize;
	N += prev->lossyCounter.count();

	// Print.
	if ( from == to ) {
	std::cerr << "# " << std::setw(6) << to << " # ";
	}
	else {
	std::stringstream strStr;
	strStr << from << "-" << to;
	std::cerr << "# " << std::setw(6) << strStr.str() << " # ";
	}
	// Print the rest of record.
	std::cerr
	<< std::setw(15) << prev->outputSize << " # "
	<< std::setw(15) << prev->outputMass << " # "
	<< std::setw(15) << prev->lossyCounter.count() << " # "
	<< std::setw(10) << std::setprecision(4) << (static_cast<double>(prev->outputMass) / static_cast<double>(prev->lossyCounter.count())) * 100 << " # "
	<< std::setw(10) << prev->lossyCounter.threshold(true) << " # "
	<< std::setw(10) << prev->lossyCounter.threshold() << " # "
	<< std::setw(10) << prev->lossyCounter.maxError() << " #"
	<< std::endl << hline << std::endl;

	from = i;
	}

	prev = current;

	}

	// Print summary:
	std::cerr
	<< "# TOTAL # "
	<< std::setw(15) << outputSize << " # "
	<< std::setw(15) << outputMass << " # "
	<< std::setw(15) << N << " # "
	<< std::setw(10) << std::setprecision(4) << (static_cast<double>(outputMass) / static_cast<double>(N)) * 100 << " #"
	<< std::endl
	<< "#############################################################################" << std::endl;

	}