|
#ifndef UTIL_FILE_PIECE_H |
|
#define UTIL_FILE_PIECE_H |
|
|
|
#include "util/ersatz_progress.hh" |
|
#include "util/exception.hh" |
|
#include "util/file.hh" |
|
#include "util/mmap.hh" |
|
#include "util/read_compressed.hh" |
|
#include "util/string_piece.hh" |
|
|
|
#include <cstddef> |
|
#include <iosfwd> |
|
#include <string> |
|
|
|
#include <assert.h> |
|
#include <stdint.h> |
|
|
|
namespace util { |
|
|
|
class ParseNumberException : public Exception { |
|
public: |
|
explicit ParseNumberException(StringPiece value) throw(); |
|
~ParseNumberException() throw() {} |
|
}; |
|
|
|
extern const bool kSpaces[256]; |
|
|
|
|
|
class FilePiece { |
|
public: |
|
|
|
explicit FilePiece(const char *file, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); |
|
|
|
explicit FilePiece(int fd, const char *name = NULL, std::ostream *show_progress = NULL, std::size_t min_buffer = 1048576); |
|
|
|
|
|
|
|
|
|
|
|
|
|
explicit FilePiece(std::istream &stream, const char *name = NULL, std::size_t min_buffer = 1048576); |
|
|
|
~FilePiece(); |
|
|
|
char get() { |
|
if (position_ == position_end_) { |
|
Shift(); |
|
if (at_end_) throw EndOfFileException(); |
|
} |
|
return *(position_++); |
|
} |
|
|
|
|
|
StringPiece ReadDelimited(const bool *delim = kSpaces) { |
|
SkipSpaces(delim); |
|
return Consume(FindDelimiterOrEOF(delim)); |
|
} |
|
|
|
|
|
bool ReadWordSameLine(StringPiece &to, const bool *delim = kSpaces) { |
|
assert(delim[static_cast<unsigned char>('\n')]); |
|
|
|
for (; ; ++position_) { |
|
if (position_ == position_end_) { |
|
try { |
|
Shift(); |
|
} catch (const util::EndOfFileException &e) { return false; } |
|
|
|
if (position_ == position_end_) return false; |
|
} |
|
if (!delim[static_cast<unsigned char>(*position_)]) break; |
|
if (*position_ == '\n') return false; |
|
} |
|
|
|
to = Consume(FindDelimiterOrEOF(delim)); |
|
return true; |
|
} |
|
|
|
|
|
|
|
StringPiece ReadLine(char delim = '\n'); |
|
|
|
|
|
bool ReadLineOrEOF(StringPiece &to, char delim = '\n'); |
|
|
|
float ReadFloat(); |
|
double ReadDouble(); |
|
long int ReadLong(); |
|
unsigned long int ReadULong(); |
|
|
|
|
|
void SkipSpaces(const bool *delim = kSpaces) { |
|
assert(position_ <= position_end_); |
|
for (; ; ++position_) { |
|
if (position_ == position_end_) { |
|
Shift(); |
|
|
|
if (position_ == position_end_) return; |
|
} |
|
assert(position_ < position_end_); |
|
if (!delim[static_cast<unsigned char>(*position_)]) return; |
|
} |
|
} |
|
|
|
uint64_t Offset() const { |
|
return position_ - data_.begin() + mapped_offset_; |
|
} |
|
|
|
const std::string &FileName() const { return file_name_; } |
|
|
|
private: |
|
void InitializeNoRead(const char *name, std::size_t min_buffer); |
|
|
|
void Initialize(const char *name, std::ostream *show_progress, std::size_t min_buffer); |
|
|
|
template <class T> T ReadNumber(); |
|
|
|
StringPiece Consume(const char *to) { |
|
assert(to >= position_); |
|
StringPiece ret(position_, to - position_); |
|
position_ = to; |
|
return ret; |
|
} |
|
|
|
const char *FindDelimiterOrEOF(const bool *delim = kSpaces); |
|
|
|
void Shift(); |
|
|
|
void MMapShift(uint64_t desired_begin); |
|
|
|
void TransitionToRead(); |
|
void ReadShift(); |
|
|
|
const char *position_, *last_space_, *position_end_; |
|
|
|
scoped_fd file_; |
|
const uint64_t total_size_; |
|
const uint64_t page_; |
|
|
|
std::size_t default_map_size_; |
|
uint64_t mapped_offset_; |
|
|
|
|
|
scoped_memory data_; |
|
|
|
bool at_end_; |
|
bool fallback_to_read_; |
|
|
|
ErsatzProgress progress_; |
|
|
|
std::string file_name_; |
|
|
|
ReadCompressed fell_back_; |
|
}; |
|
|
|
} |
|
|
|
#endif |
|
|