|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef MERT_HYPERGRAPH_H |
|
#define MERT_HYPERGRAPH_H |
|
|
|
#include <string> |
|
|
|
#include <boost/noncopyable.hpp> |
|
#include <boost/scoped_array.hpp> |
|
#include <boost/shared_ptr.hpp> |
|
#include <boost/functional/hash/hash.hpp> |
|
#include <boost/unordered_map.hpp> |
|
|
|
|
|
#include "util/exception.hh" |
|
#include "util/file_piece.hh" |
|
#include "util/murmur_hash.hh" |
|
#include "util/pool.hh" |
|
#include "util/string_piece.hh" |
|
|
|
#include "FeatureStats.h" |
|
|
|
namespace MosesTuning |
|
{ |
|
|
|
typedef unsigned int WordIndex; |
|
const WordIndex kMaxWordIndex = UINT_MAX; |
|
const FeatureStatsType kMinScore = -1e10; |
|
|
|
template <class T> class FixedAllocator : boost::noncopyable |
|
{ |
|
public: |
|
FixedAllocator() : current_(NULL), end_(NULL) {} |
|
|
|
void Init(std::size_t count) { |
|
assert(!current_); |
|
array_.reset(new T[count]); |
|
current_ = array_.get(); |
|
end_ = current_ + count; |
|
} |
|
|
|
T &operator[](std::size_t idx) { |
|
return array_.get()[idx]; |
|
} |
|
const T &operator[](std::size_t idx) const { |
|
return array_.get()[idx]; |
|
} |
|
|
|
T *New() { |
|
T *ret = current_++; |
|
UTIL_THROW_IF(ret >= end_, util::Exception, "Allocating past end"); |
|
return ret; |
|
} |
|
|
|
std::size_t Capacity() const { |
|
return end_ - array_.get(); |
|
} |
|
|
|
std::size_t Size() const { |
|
return current_ - array_.get(); |
|
} |
|
|
|
private: |
|
boost::scoped_array<T> array_; |
|
T *current_, *end_; |
|
}; |
|
|
|
|
|
class Vocab |
|
{ |
|
public: |
|
Vocab(); |
|
|
|
typedef std::pair<const char *const, WordIndex> Entry; |
|
|
|
const Entry &FindOrAdd(const StringPiece &str); |
|
|
|
const Entry& Bos() const { |
|
return bos_; |
|
} |
|
|
|
const Entry& Eos() const { |
|
return eos_; |
|
} |
|
|
|
private: |
|
util::Pool piece_backing_; |
|
|
|
struct Hash : public std::unary_function<const char *, std::size_t> { |
|
std::size_t operator()(StringPiece str) const { |
|
return util::MurmurHashNative(str.data(), str.size()); |
|
} |
|
}; |
|
|
|
struct Equals : public std::binary_function<const char *, const char *, bool> { |
|
bool operator()(StringPiece first, StringPiece second) const { |
|
return first == second; |
|
} |
|
}; |
|
|
|
typedef boost::unordered_map<const char *, WordIndex, Hash, Equals> Map; |
|
Map map_; |
|
Entry eos_; |
|
Entry bos_; |
|
|
|
}; |
|
|
|
typedef std::vector<const Vocab::Entry*> WordVec; |
|
|
|
class Vertex; |
|
|
|
|
|
typedef boost::shared_ptr<SparseVector> FeaturePtr; |
|
|
|
|
|
|
|
|
|
class Edge |
|
{ |
|
public: |
|
Edge() { |
|
features_.reset(new SparseVector()); |
|
} |
|
|
|
void AddWord(const Vocab::Entry *word) { |
|
words_.push_back(word); |
|
} |
|
|
|
void AddChild(size_t child) { |
|
children_.push_back(child); |
|
} |
|
|
|
void AddFeature(const StringPiece& name, FeatureStatsType value) { |
|
|
|
features_->set(name.as_string(),value); |
|
} |
|
|
|
|
|
const WordVec &Words() const { |
|
return words_; |
|
} |
|
|
|
const FeaturePtr& Features() const { |
|
return features_; |
|
} |
|
|
|
void SetFeatures(const FeaturePtr& features) { |
|
features_ = features; |
|
} |
|
|
|
const std::vector<size_t>& Children() const { |
|
return children_; |
|
} |
|
|
|
FeatureStatsType GetScore(const SparseVector& weights) const { |
|
return inner_product(*(features_.get()), weights); |
|
} |
|
|
|
private: |
|
|
|
std::vector<const Vocab::Entry*> words_; |
|
std::vector<size_t> children_; |
|
boost::shared_ptr<SparseVector> features_; |
|
}; |
|
|
|
|
|
|
|
|
|
class Vertex |
|
{ |
|
public: |
|
Vertex() : sourceCovered_(0) {} |
|
|
|
void AddEdge(const Edge* edge) { |
|
incoming_.push_back(edge); |
|
} |
|
|
|
void SetSourceCovered(size_t sourceCovered) { |
|
sourceCovered_ = sourceCovered; |
|
} |
|
|
|
const std::vector<const Edge*>& GetIncoming() const { |
|
return incoming_; |
|
} |
|
|
|
size_t SourceCovered() const { |
|
return sourceCovered_; |
|
} |
|
|
|
private: |
|
std::vector<const Edge*> incoming_; |
|
size_t sourceCovered_; |
|
}; |
|
|
|
|
|
class Graph : boost::noncopyable |
|
{ |
|
public: |
|
Graph(Vocab& vocab) : vocab_(vocab) {} |
|
|
|
void SetCounts(std::size_t vertices, std::size_t edges) { |
|
vertices_.Init(vertices); |
|
edges_.Init(edges); |
|
} |
|
|
|
Vocab &MutableVocab() { |
|
return vocab_; |
|
} |
|
|
|
Edge *NewEdge() { |
|
return edges_.New(); |
|
} |
|
|
|
Vertex *NewVertex() { |
|
return vertices_.New(); |
|
} |
|
|
|
const Vertex &GetVertex(std::size_t index) const { |
|
return vertices_[index]; |
|
} |
|
|
|
Edge &GetEdge(std::size_t index) { |
|
return edges_[index]; |
|
} |
|
|
|
|
|
|
|
|
|
void Prune(Graph* newGraph, const SparseVector& weights, size_t minEdgeCount) const; |
|
|
|
std::size_t VertexSize() const { |
|
return vertices_.Size(); |
|
} |
|
std::size_t EdgeSize() const { |
|
return edges_.Size(); |
|
} |
|
|
|
bool IsBoundary(const Vocab::Entry* word) const { |
|
return word->second == vocab_.Bos().second || word->second == vocab_.Eos().second; |
|
} |
|
|
|
private: |
|
FixedAllocator<Edge> edges_; |
|
FixedAllocator<Vertex> vertices_; |
|
Vocab& vocab_; |
|
}; |
|
|
|
class HypergraphException : public util::Exception |
|
{ |
|
public: |
|
HypergraphException() {} |
|
~HypergraphException() throw() {} |
|
}; |
|
|
|
|
|
void ReadGraph(util::FilePiece &from, Graph &graph); |
|
|
|
|
|
}; |
|
|
|
#endif |
|
|