File size: 1,952 Bytes
158b61b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
#pragma once
#include <boost/unordered_set.hpp>
#include <boost/unordered_map.hpp>
#include <cstdio>
#include <sstream>
#include <fstream>
#include <iostream>
#include <string>
#include <queue>
#include <sys/stat.h> //mkdir
#include "hash.h" //Includes line_splitter
#include "probing_hash_utils.h"
#include "vocabid.h"
#include "util/file_piece.hh"
#include "util/file.hh"
namespace probingpt
{
typedef std::vector<uint64_t> SourcePhrase;
class Node
{
typedef boost::unordered_map<uint64_t, Node> Children;
Children m_children;
public:
uint64_t key;
bool done;
Node()
:done(false)
{}
void Add(Table &table, const SourcePhrase &sourcePhrase, size_t pos = 0);
void Write(Table &table);
};
void createProbingPT(const std::string &phrasetable_path,
const std::string &basepath, int num_scores, int num_lex_scores,
bool log_prob, int max_cache_size, bool scfg);
uint64_t getKey(const std::vector<uint64_t> &source_phrase);
std::vector<uint64_t> CreatePrefix(const std::vector<uint64_t> &vocabid_source, size_t endPos);
template<typename T>
std::string Debug(const std::vector<T> &vec)
{
std::stringstream strm;
for (size_t i = 0; i < vec.size(); ++i) {
strm << vec[i] << " ";
}
return strm.str();
}
size_t countUniqueSource(const std::string &path);
class CacheItem
{
public:
std::string source;
uint64_t sourceKey;
float count;
CacheItem(const std::string &vSource, uint64_t vSourceKey, float vCount)
:source(vSource)
,sourceKey(vSourceKey)
,count(vCount) {
}
bool operator<(const CacheItem &other) const {
return count > other.count;
}
};
class CacheItemOrderer
{
public:
bool operator()(const CacheItem* a, const CacheItem* b) const {
return (*a) < (*b);
}
};
void serialize_cache(
std::priority_queue<CacheItem*, std::vector<CacheItem*>, CacheItemOrderer> &cache,
const std::string &path, float totalSourceCount);
}
|