| #include "SpanLengthPhraseProperty.h" |
| #include "moses/Util.h" |
| #include "util/exception.hh" |
|
|
| using namespace std; |
|
|
| namespace Moses |
| { |
| SpanLengthPhraseProperty::SpanLengthPhraseProperty() |
| { |
| } |
|
|
| void SpanLengthPhraseProperty::ProcessValue(const std::string &value) |
| { |
| vector<string> toks; |
| Tokenize(toks, value); |
|
|
| set< vector<string> > indices; |
|
|
| for (size_t i = 0; i < toks.size(); ++i) { |
| const string &span = toks[i]; |
|
|
| |
| vector<string> toks; |
| Tokenize<string>(toks, span, ","); |
| UTIL_THROW_IF2(toks.size() != 1 && toks.size() != 3, "Incorrect format for SpanLength: " << span); |
|
|
| if (toks.size() == 1) { |
| float count = Scan<float>(toks[0]); |
| Populate(indices, count); |
|
|
| indices.clear(); |
| } else { |
| indices.insert(toks); |
| } |
| } |
|
|
| |
| CalcTotals(m_source); |
| CalcTotals(m_target); |
| } |
|
|
| void SpanLengthPhraseProperty::Populate(const set< vector<string> > &indices, float count) |
| { |
| set< vector<string> >::const_iterator iter; |
| for (iter = indices.begin(); iter != indices.end(); ++iter) { |
| const vector<string> &toksStr = *iter; |
| vector<size_t> toks = Scan<size_t>(toksStr); |
| UTIL_THROW_IF2(toks.size() != 3, "Incorrect format for SpanLength. Size is " << toks.size()); |
|
|
| Populate(toks, count); |
| } |
| } |
|
|
| void SpanLengthPhraseProperty::Populate(const std::vector<size_t> &toks, float count) |
| { |
| size_t ntInd = toks[0]; |
| size_t sourceLength = toks[1]; |
| size_t targetLength = toks[2]; |
| if (ntInd >= m_source.size() ) { |
| m_source.resize(ntInd + 1); |
| m_target.resize(ntInd + 1); |
| } |
|
|
| Map &sourceMap = m_source[ntInd].first; |
| Map &targetMap = m_target[ntInd].first; |
| Populate(sourceMap, sourceLength, count); |
| Populate(targetMap, targetLength, count); |
| } |
|
|
| void SpanLengthPhraseProperty::Populate(Map &map, size_t span, float count) |
| { |
| Map::iterator iter; |
| iter = map.find(span); |
| if (iter != map.end()) { |
| float &value = iter->second; |
| value += count; |
| } else { |
| map[span] = count; |
| } |
| } |
|
|
| void SpanLengthPhraseProperty::CalcTotals(Vec &vec) |
| { |
| for (size_t i = 0; i < vec.size(); ++i) { |
| float total = 0; |
|
|
| const Map &map = vec[i].first; |
| Map::const_iterator iter; |
| for (iter = map.begin(); iter != map.end(); ++iter) { |
| float count = iter->second; |
| total += count; |
| } |
|
|
| vec[i].second = total; |
| } |
| } |
|
|
| float SpanLengthPhraseProperty::GetProb(size_t ntInd, size_t sourceWidth, float smoothing) const |
| { |
| float count; |
|
|
| const std::pair<Map, float> &data = m_source[ntInd]; |
| const Map &map = data.first; |
|
|
| if (map.size() == 0) { |
| |
| return 1.0f; |
| } |
|
|
| Map::const_iterator iter = map.find(sourceWidth); |
| if (iter == map.end()) { |
| count = 0; |
| } else { |
| count = iter->second; |
| } |
| count += smoothing; |
|
|
| float total = data.second + smoothing * (float) map.size(); |
| float ret = count / total; |
| return ret; |
| } |
|
|
| } |
|
|