#ifndef LM_FILTER_FORMAT_H
#define LM_FILTER_FORMAT_H

#include "arpa_io.hh"
#include "count_io.hh"

#include <boost/lexical_cast.hpp>
#include <boost/ptr_container/ptr_vector.hpp>

#include <iosfwd>

namespace lm {

template <class Single> class MultipleOutput {
  private:
    typedef boost::ptr_vector<Single> Singles;
    typedef typename Singles::iterator SinglesIterator;

  public:
    MultipleOutput(const char *prefix, size_t number) {
      files_.reserve(number);
      std::string tmp;
      for (unsigned int i = 0; i < number; ++i) {
        tmp = prefix;
        tmp += boost::lexical_cast<std::string>(i);
        files_.push_back(new Single(tmp.c_str()));
      }
    }

    void AddNGram(const StringPiece &line) {
      for (SinglesIterator i = files_.begin(); i != files_.end(); ++i)
        i->AddNGram(line);
    }

    template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
      for (SinglesIterator i = files_.begin(); i != files_.end(); ++i)
        i->AddNGram(begin, end, line);
    }

    void SingleAddNGram(size_t offset, const StringPiece &line) {
      files_[offset].AddNGram(line);
    }

    template <class Iterator> void SingleAddNGram(size_t offset, const Iterator &begin, const Iterator &end, const StringPiece &line) {
      files_[offset].AddNGram(begin, end, line);
    }

  protected:
    Singles files_;
};

class MultipleARPAOutput : public MultipleOutput<ARPAOutput> {
  public:
    MultipleARPAOutput(const char *prefix, size_t number) : MultipleOutput<ARPAOutput>(prefix, number) {}

    void ReserveForCounts(std::streampos reserve) {
      for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
        i->ReserveForCounts(reserve);
    }

    void BeginLength(unsigned int length) {
      for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
        i->BeginLength(length);
    }

    void EndLength(unsigned int length) {
      for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
        i->EndLength(length);
    }

    void Finish() {
      for (boost::ptr_vector<ARPAOutput>::iterator i = files_.begin(); i != files_.end(); ++i)
        i->Finish();
    }
};

template <class Filter, class Output> class DispatchInput {
  public:
    DispatchInput(Filter &filter, Output &output) : filter_(filter), output_(output) {}

/*    template <class Iterator> void AddNGram(const Iterator &begin, const Iterator &end, const StringPiece &line) {
      filter_.AddNGram(begin, end, line, output_);
    }*/

    void AddNGram(const StringPiece &ngram, const StringPiece &line) {
      filter_.AddNGram(ngram, line, output_);
    }

  protected:
    Filter &filter_;
    Output &output_;
};

template <class Filter, class Output> class DispatchARPAInput : public DispatchInput<Filter, Output> {
  private:
    typedef DispatchInput<Filter, Output> B;

  public:
    DispatchARPAInput(Filter &filter, Output &output) : B(filter, output) {}

    void ReserveForCounts(std::streampos reserve) { B::output_.ReserveForCounts(reserve); }
    void BeginLength(unsigned int length) { B::output_.BeginLength(length); }

    void EndLength(unsigned int length) {
      B::filter_.Flush();
      B::output_.EndLength(length);
    }
    void Finish() { B::output_.Finish(); }
};

struct ARPAFormat {
  typedef ARPAOutput Output;
  typedef MultipleARPAOutput Multiple;
  static void Copy(util::FilePiece &in, Output &out) {
    ReadARPA(in, out);
  }
  template <class Filter, class Out> static void RunFilter(util::FilePiece &in, Filter &filter, Out &output) {
    DispatchARPAInput<Filter, Out> dispatcher(filter, output);
    ReadARPA(in, dispatcher);
  }
};

struct CountFormat {
  typedef CountOutput Output;
  typedef MultipleOutput<Output> Multiple;
  static void Copy(util::FilePiece &in, Output &out) {
    ReadCount(in, out);
  }
  template <class Filter, class Out> static void RunFilter(util::FilePiece &in, Filter &filter, Out &output) {
    DispatchInput<Filter, Out> dispatcher(filter, output);
    ReadCount(in, dispatcher);
  }
};

/* For multithreading, the buffer classes hold batches of filter inputs and
 * outputs in memory.  The strings get reused a lot, so keep them around
 * instead of clearing each time.
 */
class InputBuffer {
  public:
    InputBuffer() : actual_(0) {}

    void Reserve(size_t size) { lines_.reserve(size); }

    template <class Output> void AddNGram(const StringPiece &ngram, const StringPiece &line, Output &output) {
      if (lines_.size() == actual_) lines_.resize(lines_.size() + 1);
      // TODO avoid this copy.
      std::string &copied = lines_[actual_].line;
      copied.assign(line.data(), line.size());
      lines_[actual_].ngram.set(copied.data() + (ngram.data() - line.data()), ngram.size());
      ++actual_;
    }

    template <class Filter, class Output> void CallFilter(Filter &filter, Output &output) const {
      for (std::vector<Line>::const_iterator i = lines_.begin(); i != lines_.begin() + actual_; ++i) {
        filter.AddNGram(i->ngram, i->line, output);
      }
    }

    void Clear() { actual_ = 0; }
    bool Empty() { return actual_ == 0; }
    size_t Size() { return actual_; }

  private:
    struct Line {
      std::string line;
      StringPiece ngram;
    };

    size_t actual_;

    std::vector<Line> lines_;
};

class BinaryOutputBuffer {
  public:
    BinaryOutputBuffer() {}

    void Reserve(size_t size) {
      lines_.reserve(size);
    }

    void AddNGram(const StringPiece &line) {
      lines_.push_back(line);
    }

    template <class Output> void Flush(Output &output) {
      for (std::vector<StringPiece>::const_iterator i = lines_.begin(); i != lines_.end(); ++i) {
        output.AddNGram(*i);
      }
      lines_.clear();
    }

  private:
    std::vector<StringPiece> lines_;
};

class MultipleOutputBuffer {
  public:
    MultipleOutputBuffer() : last_(NULL) {}

    void Reserve(size_t size) {
      annotated_.reserve(size);
    }

    void AddNGram(const StringPiece &line) {
      annotated_.resize(annotated_.size() + 1);
      annotated_.back().line = line;
    }

    void SingleAddNGram(size_t offset, const StringPiece &line) {
      if ((line.data() == last_.data()) && (line.length() == last_.length())) {
        annotated_.back().systems.push_back(offset);
      } else {
        annotated_.resize(annotated_.size() + 1);
        annotated_.back().systems.push_back(offset);
        annotated_.back().line = line;
        last_ = line;
      }
    }

    template <class Output> void Flush(Output &output) {
      for (std::vector<Annotated>::const_iterator i = annotated_.begin(); i != annotated_.end(); ++i) {
        if (i->systems.empty()) {
          output.AddNGram(i->line);
        } else {
          for (std::vector<size_t>::const_iterator j = i->systems.begin(); j != i->systems.end(); ++j) {
            output.SingleAddNGram(*j, i->line);
          }
        }
      }
      annotated_.clear();
    }

  private:
    struct Annotated {
      // If this is empty, send to all systems.
      // A filter should never send to all systems and send to a single one.
      std::vector<size_t> systems;
      StringPiece line;
    };

    StringPiece last_;

    std::vector<Annotated> annotated_;
};

} // namespace lm

#endif // LM_FILTER_FORMAT_H