| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #ifndef moses_MonotonicVector_h |
| | #define moses_MonotonicVector_h |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include <vector> |
| | #include <limits> |
| | #include <algorithm> |
| | #include <cstdio> |
| | #include <cassert> |
| |
|
| | #include "ThrowingFwrite.h" |
| | #include "ListCoders.h" |
| | #include "MmapAllocator.h" |
| |
|
| | namespace Moses |
| | { |
| |
|
| | template<typename PosT = size_t, typename NumT = size_t, PosT stepSize = 32, |
| | template <typename> class Allocator = std::allocator> |
| | class MonotonicVector |
| | { |
| | private: |
| | typedef std::vector<NumT, Allocator<NumT> > Anchors; |
| | typedef std::vector<unsigned int, Allocator<unsigned int> > Diffs; |
| |
|
| | Anchors m_anchors; |
| | Diffs m_diffs; |
| | std::vector<unsigned int> m_tempDiffs; |
| |
|
| | size_t m_size; |
| | PosT m_last; |
| | bool m_final; |
| |
|
| | public: |
| | typedef PosT value_type; |
| |
|
| | MonotonicVector() : m_size(0), m_last(0), m_final(false) {} |
| |
|
| | size_t size() const { |
| | return m_size + m_tempDiffs.size(); |
| | } |
| |
|
| | PosT at(size_t i) const { |
| | PosT s = stepSize; |
| | PosT j = m_anchors[i / s]; |
| | PosT r = i % s; |
| |
|
| | typename Diffs::const_iterator it = m_diffs.begin() + j; |
| |
|
| | PosT k = 0; |
| | k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1); |
| | if(i < m_size) |
| | k += Simple9::DecodeAndSum(it, m_diffs.end(), r); |
| | else if(i < m_size + m_tempDiffs.size()) |
| | for(size_t l = 0; l < r; l++) |
| | k += m_tempDiffs[l]; |
| |
|
| | return k; |
| | } |
| |
|
| | PosT operator[](PosT i) const { |
| | return at(i); |
| | } |
| |
|
| | PosT back() const { |
| | return at(size()-1); |
| | } |
| |
|
| | void push_back(PosT i) { |
| | assert(m_final != true); |
| |
|
| | if(m_anchors.size() == 0 && m_tempDiffs.size() == 0) { |
| | m_anchors.push_back(0); |
| | VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs)); |
| | m_last = i; |
| | m_size++; |
| |
|
| | return; |
| | } |
| |
|
| | if(m_tempDiffs.size() == stepSize-1) { |
| | Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(), |
| | std::back_inserter(m_diffs)); |
| | m_anchors.push_back(m_diffs.size()); |
| | VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs)); |
| |
|
| | m_size += m_tempDiffs.size() + 1; |
| | m_tempDiffs.clear(); |
| | } else { |
| | PosT last = m_last; |
| | PosT diff = i - last; |
| | m_tempDiffs.push_back(diff); |
| | } |
| | m_last = i; |
| | } |
| |
|
| | void commit() { |
| | assert(m_final != true); |
| | Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(), |
| | std::back_inserter(m_diffs)); |
| | m_size += m_tempDiffs.size(); |
| | m_tempDiffs.clear(); |
| | m_final = true; |
| | } |
| |
|
| | size_t usage() { |
| | return m_diffs.size() * sizeof(unsigned int) |
| | + m_anchors.size() * sizeof(NumT); |
| | } |
| |
|
| | size_t load(std::FILE* in, bool map = false) { |
| | size_t byteSize = 0; |
| |
|
| | byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool); |
| | byteSize += fread(&m_size, sizeof(size_t), 1, in) * sizeof(size_t); |
| | byteSize += fread(&m_last, sizeof(PosT), 1, in) * sizeof(PosT); |
| |
|
| | byteSize += loadVector(m_diffs, in, map); |
| | byteSize += loadVector(m_anchors, in, map); |
| |
|
| | return byteSize; |
| | } |
| |
|
| | template <typename ValueT> |
| | size_t loadVector(std::vector<ValueT, std::allocator<ValueT> >& v, |
| | std::FILE* in, bool map = false) { |
| | |
| | assert(map == false); |
| |
|
| | size_t byteSize = 0; |
| |
|
| | size_t valSize; |
| | byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t); |
| |
|
| | v.resize(valSize, 0); |
| | byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT); |
| |
|
| | return byteSize; |
| | } |
| |
|
| | template <typename ValueT> |
| | size_t loadVector(std::vector<ValueT, MmapAllocator<ValueT> >& v, |
| | std::FILE* in, bool map = false) { |
| | size_t byteSize = 0; |
| |
|
| | size_t valSize; |
| | byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t); |
| |
|
| | if(map == false) { |
| | |
| | |
| |
|
| | v.resize(valSize, 0); |
| | byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT); |
| | } else { |
| | |
| | |
| |
|
| | size_t valPos = std::ftell(in); |
| |
|
| | Allocator<ValueT> alloc(in, valPos); |
| | std::vector<ValueT, Allocator<ValueT> > vTemp(alloc); |
| | vTemp.resize(valSize); |
| | v.swap(vTemp); |
| |
|
| | std::fseek(in, valSize * sizeof(ValueT), SEEK_CUR); |
| | byteSize += valSize * sizeof(ValueT); |
| | } |
| |
|
| | return byteSize; |
| | } |
| |
|
| | size_t save(std::FILE* out) { |
| | if(!m_final) |
| | commit(); |
| |
|
| | bool byteSize = 0; |
| | byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool); |
| | byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out) * sizeof(size_t); |
| | byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT); |
| |
|
| | size_t size = m_diffs.size(); |
| | byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t); |
| | byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out) * sizeof(unsigned int); |
| |
|
| | size = m_anchors.size(); |
| | byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t); |
| | byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out) * sizeof(NumT); |
| |
|
| | return byteSize; |
| | } |
| |
|
| | void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv) { |
| | if(!m_final) |
| | commit(); |
| |
|
| | m_diffs.swap(mv.m_diffs); |
| | m_anchors.swap(mv.m_anchors); |
| | } |
| | }; |
| |
|
| | } |
| | #endif |
| |
|