|
#ifndef UTIL_FILE_PIECE_H |
|
#define UTIL_FILE_PIECE_H |
|
|
|
#include "ersatz_progress.hh" |
|
#include "exception.hh" |
|
#include "file.hh" |
|
#include "mmap.hh" |
|
#include "read_compressed.hh" |
|
#include "spaces.hh" |
|
#include "string_piece.hh" |
|
|
|
#include <cstddef> |
|
#include <iosfwd> |
|
#include <string> |
|
#include <cassert> |
|
#include <stdint.h> |
|
|
|
namespace util { |
|
|
|
class ParseNumberException : public Exception { |
|
public: |
|
explicit ParseNumberException(StringPiece value) throw(); |
|
~ParseNumberException() throw() {} |
|
}; |
|
|
|
class FilePiece; |
|
|
|
|
|
|
|
|
|
|
|
class LineIterator { |
|
public: |
|
LineIterator() : backing_(NULL) {} |
|
|
|
explicit LineIterator(FilePiece &f, char delim = '\n') : backing_(&f), delim_(delim) { |
|
++*this; |
|
} |
|
|
|
LineIterator &operator++(); |
|
|
|
bool operator==(const LineIterator &other) const { |
|
return backing_ == other.backing_; |
|
} |
|
|
|
bool operator!=(const LineIterator &other) const { |
|
return backing_ != other.backing_; |
|
} |
|
|
|
operator bool() const { return backing_ != NULL; } |
|
|
|
StringPiece operator*() const { return line_; } |
|
const StringPiece *operator->() const { return &line_; } |
|
|
|
private: |
|
FilePiece *backing_; |
|
StringPiece line_; |
|
char delim_; |
|
}; |
|
|
|
|
|
class FilePiece { |
|
public: |
|
|
|
explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); |
|
|
|
explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); |
|
|
|
|
|
|
|
|
|
|
|
|
|
explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576); |
|
|
|
LineIterator begin() { |
|
return LineIterator(*this); |
|
} |
|
|
|
LineIterator end() { |
|
return LineIterator(); |
|
} |
|
|
|
char peek() { |
|
if (position_ == position_end_) { |
|
Shift(); |
|
if (at_end_) throw EndOfFileException(); |
|
} |
|
return *position_; |
|
} |
|
|
|
char get() { |
|
char ret = peek(); |
|
++position_; |
|
return ret; |
|
} |
|
|
|
|
|
StringPiece ReadDelimited(const bool *delim = kSpaces) { |
|
SkipSpaces(delim); |
|
return Consume(FindDelimiterOrEOF(delim)); |
|
} |
|
|
|
|
|
bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) { |
|
assert(delim[static_cast<unsigned char>('\n')]); |
|
|
|
for (; ; ++position_) { |
|
if (position_ == position_end_) { |
|
try { |
|
Shift(); |
|
} catch (const util::EndOfFileException &) { return false; } |
|
|
|
if (position_ == position_end_) return false; |
|
} |
|
if (!delim[static_cast<unsigned char>(*position_)]) break; |
|
if (*position_ == '\n') return false; |
|
} |
|
|
|
to = Consume(FindDelimiterOrEOF(delim)); |
|
return true; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
StringPiece ReadLine(char delim = '\n', bool strip_cr = true); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool ReadLineOrEOF(StringPiece &to, char delim = '\n', bool strip_cr = true); |
|
|
|
float ReadFloat(); |
|
double ReadDouble(); |
|
long int ReadLong(); |
|
unsigned long int ReadULong(); |
|
|
|
|
|
void SkipSpaces(const bool *delim = kSpaces) { |
|
assert(position_ <= position_end_); |
|
for (; ; ++position_) { |
|
if (position_ == position_end_) { |
|
Shift(); |
|
|
|
if (position_ == position_end_) return; |
|
} |
|
assert(position_ < position_end_); |
|
if (!delim[static_cast<unsigned char>(*position_)]) return; |
|
} |
|
} |
|
|
|
uint64_t Offset() const { |
|
return position_ - data_.begin() + mapped_offset_; |
|
} |
|
|
|
const std::string &FileName() const { return file_name_; } |
|
|
|
|
|
void UpdateProgress(); |
|
|
|
private: |
|
void InitializeNoRead(const char *name, std::size_t min_buffer); |
|
|
|
void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer); |
|
|
|
template <class T> T ReadNumber(); |
|
|
|
StringPiece Consume(const char *to) { |
|
assert(to >= position_); |
|
StringPiece ret(position_, to - position_); |
|
position_ = to; |
|
return ret; |
|
} |
|
|
|
const char *FindDelimiterOrEOF(const bool *delim = kSpaces); |
|
|
|
void Shift(); |
|
|
|
void MMapShift(uint64_t desired_begin); |
|
|
|
void TransitionToRead(); |
|
void ReadShift(); |
|
|
|
const char *position_, *last_space_, *position_end_; |
|
|
|
scoped_fd file_; |
|
const uint64_t total_size_; |
|
|
|
std::size_t default_map_size_; |
|
uint64_t mapped_offset_; |
|
|
|
|
|
scoped_memory data_; |
|
|
|
bool at_end_; |
|
bool fallback_to_read_; |
|
|
|
ErsatzProgress progress_; |
|
|
|
std::string file_name_; |
|
|
|
ReadCompressed fell_back_; |
|
}; |
|
|
|
} |
|
|
|
#endif |
|
|