Spaces:
Sleeping
Sleeping
| /* | |
| * Copyright 2021 Google LLC | |
| * | |
| * Licensed under the Apache License, Version 2.0 (the "License"); | |
| * you may not use this file except in compliance with the License. | |
| * You may obtain a copy of the License at | |
| * | |
| * http://www.apache.org/licenses/LICENSE-2.0 | |
| * | |
| * Unless required by applicable law or agreed to in writing, software | |
| * distributed under the License is distributed on an "AS IS" BASIS, | |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| * See the License for the specific language governing permissions and | |
| * limitations under the License. | |
| */ | |
| // The GZIP header (see RFC 1952): | |
| // +---+---+---+---+---+---+---+---+---+---+ | |
| // |ID1|ID2|CM |FLG| MTIME |XFL|OS | | |
| // +---+---+---+---+---+---+---+---+---+---+ | |
| // ID1 \037 | |
| // ID2 \213 | |
| // CM \010 (compression method == DEFLATE) | |
| // FLG \000 (special flags that we do not support) | |
| // MTIME Unix format modification time (0 means not available) | |
| // XFL 2-4? DEFLATE flags | |
| // OS ???? Operating system indicator (255 means unknown) | |
| // Header value we generate: | |
| // We use a #define so sizeof() works correctly | |
| namespace csrblocksparse { | |
| // We allow all kinds of bad footers when this flag is true. | |
| // Some web servers send bad pages corresponding to these cases | |
| // and IE is tolerant with it. | |
| // - Extra bytes after gzip footer (see bug 69126) | |
| // - No gzip footer (see bug 72896) | |
| // - Incomplete gzip footer (see bug 71871706) | |
| bool ZLib::should_be_flexible_with_gzip_footer_ = false; | |
| // Initialize the ZLib class | |
| ZLib::ZLib() | |
| : comp_init_(false), uncomp_init_(false), gzip_header_(new GZipHeader) { | |
| Reinit(); | |
| init_settings_ = settings_; | |
| } | |
| ZLib::~ZLib() { | |
| if (comp_init_) { | |
| deflateEnd(&comp_stream_); | |
| } | |
| if (uncomp_init_) { | |
| inflateEnd(&uncomp_stream_); | |
| } | |
| delete gzip_header_; | |
| } | |
| void ZLib::Reinit() { | |
| settings_.dictionary_ = nullptr; | |
| settings_.dict_len_ = 0; | |
| settings_.compression_level_ = Z_DEFAULT_COMPRESSION; | |
| settings_.window_bits_ = MAX_WBITS; | |
| settings_.mem_level_ = 8; // DEF_MEM_LEVEL | |
| settings_.no_header_mode_ = false; | |
| settings_.gzip_header_mode_ = false; | |
| settings_.dont_hide_zstream_end_ = false; | |
| if (comp_init_) { | |
| int err = deflateReset(&comp_stream_); | |
| if (err != Z_OK) { | |
| deflateEnd(&comp_stream_); | |
| comp_init_ = false; | |
| } | |
| } | |
| if (uncomp_init_) { | |
| // Use negative window bits size to indicate bare stream with no header. | |
| int wbits = (settings_.no_header_mode_ ? -MAX_WBITS : MAX_WBITS); | |
| int err = inflateReset2(&uncomp_stream_, wbits); | |
| if (err == Z_OK) { | |
| init_settings_.no_header_mode_ = settings_.no_header_mode_; | |
| } else { | |
| inflateEnd(&uncomp_stream_); | |
| uncomp_init_ = false; | |
| } | |
| } | |
| crc_ = 0; | |
| uncompressed_size_ = 0; | |
| gzip_header_->Reset(); | |
| gzip_footer_bytes_ = -1; | |
| first_chunk_ = true; | |
| } | |
| void ZLib::Reset() { | |
| first_chunk_ = true; | |
| gzip_header_->Reset(); | |
| } | |
| void ZLib::CheckValidParams() { | |
| if (settings_.dictionary_ != nullptr && | |
| (settings_.no_header_mode_ || settings_.gzip_header_mode_)) { | |
| LOG(FATAL) | |
| << "Incompatible params: require zlib headers with preset dictionary"; | |
| } | |
| } | |
| void ZLib::SetNoHeaderMode(bool no_header_mode) { | |
| settings_.no_header_mode_ = no_header_mode; | |
| if (init_settings_.no_header_mode_ != settings_.no_header_mode_) { | |
| // Once the header mode changes, we have to reinitialize all our streams | |
| if (comp_init_) { | |
| deflateEnd(&comp_stream_); | |
| comp_init_ = false; | |
| } | |
| if (uncomp_init_) { | |
| inflateEnd(&uncomp_stream_); | |
| uncomp_init_ = false; | |
| } | |
| } else { | |
| // Mode hasn't changed, but treat this as a reset request nevertheless | |
| Reset(); | |
| } | |
| CheckValidParams(); | |
| } | |
| void ZLib::SetGzipHeaderMode() { | |
| settings_.gzip_header_mode_ = true; | |
| SetNoHeaderMode(true); // we use gzip headers, not zlib headers | |
| CheckValidParams(); | |
| } | |
| void ZLib::SetDictionary(const char* initial_dict, unsigned int dict_len) { | |
| settings_.dictionary_ = (Bytef*)initial_dict; // NOLINT | |
| settings_.dict_len_ = dict_len; | |
| CheckValidParams(); | |
| } | |
| void ZLib::SetDontHideStreamEnd() { settings_.dont_hide_zstream_end_ = true; } | |
| int ZLib::MinFooterSize() const { | |
| int min_footer_size = 2; // Room for empty chunk. | |
| if (settings_.gzip_header_mode_) { | |
| min_footer_size += 8; // Room for actual footer. | |
| } | |
| return min_footer_size; | |
| } | |
| // --------- COMPRESS MODE | |
| // Initialization method to be called if we hit an error while | |
| // compressing. On hitting an error, call this method before returning | |
| // the error. | |
| void ZLib::CompressErrorInit() { | |
| if (comp_init_) { | |
| deflateEnd(&comp_stream_); | |
| comp_init_ = false; | |
| } | |
| Reset(); | |
| } | |
| // These probably return Z_OK, but may return Z_BUF_ERROR if outbuf is full | |
| int ZLib::WriteGzipHeader() { | |
| if (comp_stream_.avail_out < sizeof(GZIP_HEADER)) return Z_BUF_ERROR; | |
| memcpy(comp_stream_.next_out, GZIP_HEADER, sizeof(GZIP_HEADER) - 1); | |
| comp_stream_.next_out += sizeof(GZIP_HEADER) - 1; | |
| comp_stream_.avail_out -= sizeof(GZIP_HEADER) - 1; | |
| return Z_OK; | |
| } | |
| int ZLib::WriteGzipFooter(Bytef* dest, uLongf destLen) { | |
| if (destLen < 8) // not enough space for footer | |
| return Z_BUF_ERROR; | |
| *dest++ = (crc_ >> 0) & 255; | |
| *dest++ = (crc_ >> 8) & 255; | |
| *dest++ = (crc_ >> 16) & 255; | |
| *dest++ = (crc_ >> 24) & 255; | |
| *dest++ = (uncompressed_size_ >> 0) & 255; | |
| *dest++ = (uncompressed_size_ >> 8) & 255; | |
| *dest++ = (uncompressed_size_ >> 16) & 255; | |
| *dest++ = (uncompressed_size_ >> 24) & 255; | |
| return Z_OK; | |
| } | |
| int ZLib::DeflateInit() { | |
| int err = | |
| deflateInit2(&comp_stream_, settings_.compression_level_, Z_DEFLATED, | |
| (settings_.no_header_mode_ ? -settings_.window_bits_ | |
| : settings_.window_bits_), | |
| settings_.mem_level_, Z_DEFAULT_STRATEGY); | |
| if (err == Z_OK) { | |
| // Save parameters for later reusability checks | |
| init_settings_.compression_level_ = settings_.compression_level_; | |
| init_settings_.window_bits_ = settings_.window_bits_; | |
| init_settings_.mem_level_ = settings_.mem_level_; | |
| init_settings_.no_header_mode_ = settings_.no_header_mode_; | |
| } | |
| return err; | |
| } | |
| int ZLib::CompressInit(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong* sourceLen) { | |
| int err; | |
| comp_stream_.next_in = (Bytef*)source; // NOLINT | |
| comp_stream_.avail_in = (uInt)*sourceLen; | |
| // Check for sourceLen (unsigned long) to fit into avail_in (unsigned int). | |
| if ((uLong)comp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR; | |
| comp_stream_.next_out = dest; | |
| comp_stream_.avail_out = (uInt)*destLen; | |
| // Check for destLen (unsigned long) to fit into avail_out (unsigned int). | |
| if ((uLong)comp_stream_.avail_out != *destLen) return Z_BUF_ERROR; | |
| if (!first_chunk_) // only need to set up stream the first time through | |
| return Z_OK; | |
| // Force full reinit if properties have changed in a way we can't adjust. | |
| if (comp_init_ && | |
| (init_settings_.dictionary_ != settings_.dictionary_ || | |
| init_settings_.dict_len_ != settings_.dict_len_ || | |
| init_settings_.window_bits_ != settings_.window_bits_ || | |
| init_settings_.mem_level_ != settings_.mem_level_ || | |
| init_settings_.no_header_mode_ != settings_.no_header_mode_)) { | |
| deflateEnd(&comp_stream_); | |
| comp_init_ = false; | |
| } | |
| // Reuse if we've already initted the object. | |
| if (comp_init_) { // we've already initted it | |
| err = deflateReset(&comp_stream_); | |
| if (err != Z_OK) { | |
| deflateEnd(&comp_stream_); | |
| comp_init_ = false; | |
| } | |
| } | |
| // If compression level has changed, try to reconfigure instead of reinit | |
| if (comp_init_ && | |
| init_settings_.compression_level_ != settings_.compression_level_) { | |
| err = deflateParams(&comp_stream_, settings_.compression_level_, | |
| Z_DEFAULT_STRATEGY); | |
| if (err == Z_OK) { | |
| init_settings_.compression_level_ = settings_.compression_level_; | |
| } else { | |
| deflateEnd(&comp_stream_); | |
| comp_init_ = false; | |
| } | |
| } | |
| // First use or previous state was not reusable with current settings. | |
| if (!comp_init_) { | |
| comp_stream_.zalloc = (alloc_func)0; | |
| comp_stream_.zfree = (free_func)0; | |
| comp_stream_.opaque = (voidpf)0; | |
| err = DeflateInit(); | |
| if (err != Z_OK) return err; | |
| comp_init_ = true; | |
| } | |
| return Z_OK; | |
| } | |
| // In a perfect world we'd always have the full buffer to compress | |
| // when the time came, and we could just call Compress(). Alas, we | |
| // want to do chunked compression on our webserver. In this | |
| // application, we compress the header, send it off, then compress the | |
| // results, send them off, then compress the footer. Thus we need to | |
| // use the chunked compression features of zlib. | |
| int ZLib::CompressAtMostOrAll(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong* sourceLen, | |
| int flush_mode) { // Z_FULL_FLUSH or Z_FINISH | |
| int err; | |
| if ((err = CompressInit(dest, destLen, source, sourceLen)) != Z_OK) | |
| return err; | |
| // This is used to figure out how many bytes we wrote *this chunk* | |
| int compressed_size = comp_stream_.total_out; | |
| // Some setup happens only for the first chunk we compress in a run | |
| if (first_chunk_) { | |
| // Append the gzip header before we start compressing | |
| if (settings_.gzip_header_mode_) { | |
| if ((err = WriteGzipHeader()) != Z_OK) return err; | |
| compressed_size -= sizeof(GZIP_HEADER) - 1; // -= is right: adds to size | |
| crc_ = crc32(0, nullptr, 0); // initialize | |
| } | |
| // Initialize the dictionary just before we start compressing | |
| if (settings_.dictionary_) { | |
| err = deflateSetDictionary(&comp_stream_, settings_.dictionary_, | |
| settings_.dict_len_); | |
| if (err != Z_OK) return err; | |
| init_settings_.dictionary_ = settings_.dictionary_; | |
| init_settings_.dict_len_ = settings_.dict_len_; | |
| } | |
| uncompressed_size_ = 0; | |
| first_chunk_ = false; // so we don't do this again | |
| } | |
| // flush_mode is Z_FINISH for all mode, Z_SYNC_FLUSH for incremental | |
| // compression. | |
| err = deflate(&comp_stream_, flush_mode); | |
| const uLong source_bytes_consumed = *sourceLen - comp_stream_.avail_in; | |
| *sourceLen = comp_stream_.avail_in; | |
| if ((err == Z_STREAM_END || err == Z_OK) && comp_stream_.avail_in == 0 && | |
| comp_stream_.avail_out != 0) { | |
| // we processed everything ok and the output buffer was large enough. | |
| {} | |
| } else if (err == Z_STREAM_END && comp_stream_.avail_in > 0) { | |
| return Z_BUF_ERROR; // should never happen | |
| } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) { | |
| // an error happened | |
| CompressErrorInit(); | |
| return err; | |
| } else if (comp_stream_.avail_out == 0) { // not enough space | |
| err = Z_BUF_ERROR; | |
| } | |
| assert(err == Z_OK || err == Z_STREAM_END || err == Z_BUF_ERROR); | |
| if (err == Z_STREAM_END) err = Z_OK; | |
| // update the crc and other metadata | |
| uncompressed_size_ += source_bytes_consumed; | |
| compressed_size = comp_stream_.total_out - compressed_size; // delta | |
| *destLen = compressed_size; | |
| if (settings_.gzip_header_mode_) // don't bother with crc else | |
| crc_ = crc32(crc_, source, source_bytes_consumed); | |
| return err; | |
| } | |
| int ZLib::CompressChunkOrAll(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong sourceLen, | |
| int flush_mode) { // Z_FULL_FLUSH or Z_FINISH | |
| const int ret = | |
| CompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode); | |
| if (ret == Z_BUF_ERROR) CompressErrorInit(); | |
| return ret; | |
| } | |
| int ZLib::CompressChunk(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong sourceLen) { | |
| return CompressChunkOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH); | |
| } | |
| int ZLib::CompressAtMost(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong* sourceLen) { | |
| return CompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH); | |
| } | |
| // This writes the gzip footer info, if necessary. | |
| // No matter what, we call Reset() so we can compress Chunks again. | |
| int ZLib::CompressChunkDone(Bytef* dest, uLongf* destLen) { | |
| // Make sure our buffer is of reasonable size. | |
| if (*destLen < MinFooterSize()) { | |
| *destLen = 0; | |
| return Z_BUF_ERROR; | |
| } | |
| // The underlying zlib library requires a non-nullptr source pointer, even if | |
| // the source length is zero, otherwise it will generate an (incorrect) zero- | |
| // valued CRC checksum. | |
| char dummy = '\0'; | |
| int err; | |
| assert(!first_chunk_ && comp_init_); | |
| const uLongf orig_destLen = *destLen; | |
| // NOLINTNEXTLINE | |
| if ((err = CompressChunkOrAll(dest, destLen, (const Bytef*)&dummy, 0, | |
| Z_FINISH)) != Z_OK) { | |
| Reset(); // we assume they won't retry on error | |
| return err; | |
| } | |
| // Make sure that when we exit, we can start a new round of chunks later | |
| // (This must be set after the call to CompressChunkOrAll() above.) | |
| Reset(); | |
| // Write gzip footer if necessary. They're explicitly in little-endian order | |
| if (settings_.gzip_header_mode_) { | |
| if ((err = WriteGzipFooter(dest + *destLen, orig_destLen - *destLen)) != | |
| Z_OK) | |
| return err; | |
| *destLen += 8; // zlib footer took up another 8 bytes | |
| } | |
| return Z_OK; // stream_end is ok | |
| } | |
| // This routine only initializes the compression stream once. Thereafter, it | |
| // just does a deflateReset on the stream, which should be faster. | |
| int ZLib::Compress(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong sourceLen) { | |
| int err; | |
| const uLongf orig_destLen = *destLen; | |
| if ((err = CompressChunkOrAll(dest, destLen, source, sourceLen, Z_FINISH)) != | |
| Z_OK) | |
| return err; | |
| Reset(); // reset for next call to Compress | |
| if (settings_.gzip_header_mode_) { | |
| if ((err = WriteGzipFooter(dest + *destLen, orig_destLen - *destLen)) != | |
| Z_OK) | |
| return err; | |
| *destLen += 8; // zlib footer took up another 8 bytes | |
| } | |
| return Z_OK; | |
| } | |
| // --------- UNCOMPRESS MODE | |
| int ZLib::InflateInit() { | |
| // Use negative window bits size to indicate bare stream with no header. | |
| int wbits = (settings_.no_header_mode_ ? -MAX_WBITS : MAX_WBITS); | |
| int err = inflateInit2(&uncomp_stream_, wbits); | |
| if (err == Z_OK) { | |
| init_settings_.no_header_mode_ = settings_.no_header_mode_; | |
| } | |
| return err; | |
| } | |
| // Initialization method to be called if we hit an error while | |
| // uncompressing. On hitting an error, call this method before | |
| // returning the error. | |
| void ZLib::UncompressErrorInit() { | |
| if (uncomp_init_) { | |
| inflateEnd(&uncomp_stream_); | |
| uncomp_init_ = false; | |
| } | |
| Reset(); | |
| } | |
| int ZLib::UncompressInit(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong* sourceLen) { | |
| int err; | |
| uncomp_stream_.next_in = (Bytef*)source; // NOLINT | |
| uncomp_stream_.avail_in = (uInt)*sourceLen; | |
| // Check for sourceLen (unsigned long) to fit into avail_in (unsigned int). | |
| if ((uLong)uncomp_stream_.avail_in != *sourceLen) return Z_BUF_ERROR; | |
| uncomp_stream_.next_out = dest; | |
| uncomp_stream_.avail_out = (uInt)*destLen; | |
| // Check for destLen (unsigned long) to fit into avail_out (unsigned int). | |
| if ((uLong)uncomp_stream_.avail_out != *destLen) return Z_BUF_ERROR; | |
| if (!first_chunk_) // only need to set up stream the first time through | |
| return Z_OK; | |
| // Force full reinit if properties have changed in a way we can't adjust. | |
| if (uncomp_init_ && (init_settings_.dictionary_ != settings_.dictionary_ || | |
| init_settings_.dict_len_ != settings_.dict_len_)) { | |
| inflateEnd(&uncomp_stream_); | |
| uncomp_init_ = false; | |
| } | |
| // Reuse if we've already initted the object. | |
| if (uncomp_init_) { | |
| // Use negative window bits size to indicate bare stream with no header. | |
| int wbits = (settings_.no_header_mode_ ? -MAX_WBITS : MAX_WBITS); | |
| err = inflateReset2(&uncomp_stream_, wbits); | |
| if (err == Z_OK) { | |
| init_settings_.no_header_mode_ = settings_.no_header_mode_; | |
| } else { | |
| UncompressErrorInit(); | |
| } | |
| } | |
| // First use or previous state was not reusable with current settings. | |
| if (!uncomp_init_) { | |
| uncomp_stream_.zalloc = (alloc_func)0; | |
| uncomp_stream_.zfree = (free_func)0; | |
| uncomp_stream_.opaque = (voidpf)0; | |
| err = InflateInit(); | |
| if (err != Z_OK) return err; | |
| uncomp_init_ = true; | |
| } | |
| return Z_OK; | |
| } | |
| // If you compressed your data a chunk at a time, with CompressChunk, | |
| // you can uncompress it a chunk at a time with UncompressChunk. | |
| // Only difference bewteen chunked and unchunked uncompression | |
| // is the flush mode we use: Z_SYNC_FLUSH (chunked) or Z_FINISH (unchunked). | |
| int ZLib::UncompressAtMostOrAll(Bytef* dest, uLongf* destLen, | |
| const Bytef* source, uLong* sourceLen, | |
| int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH | |
| int err = Z_OK; | |
| if (first_chunk_) { | |
| gzip_footer_bytes_ = -1; | |
| if (settings_.gzip_header_mode_) { | |
| // If we haven't read our first chunk of actual compressed data, | |
| // and we're expecting gzip headers, then parse some more bytes | |
| // from the gzip headers. | |
| const Bytef* bodyBegin = nullptr; | |
| GZipHeader::Status status = gzip_header_->ReadMore( | |
| reinterpret_cast<const char*>(source), *sourceLen, | |
| reinterpret_cast<const char**>(&bodyBegin)); | |
| switch (status) { | |
| case GZipHeader::INCOMPLETE_HEADER: // don't have the complete header | |
| *destLen = 0; | |
| *sourceLen = 0; // GZipHeader used all the input | |
| return Z_OK; | |
| case GZipHeader::INVALID_HEADER: // bogus header | |
| Reset(); | |
| return Z_DATA_ERROR; | |
| case GZipHeader::COMPLETE_HEADER: // we have the full header | |
| *sourceLen -= (bodyBegin - source); // skip past header bytes | |
| source = bodyBegin; | |
| crc_ = crc32(0, nullptr, 0); // initialize CRC | |
| break; | |
| default: | |
| LOG(FATAL) << "Unexpected gzip header parsing result: " << status; | |
| } | |
| } | |
| } else if (gzip_footer_bytes_ >= 0) { | |
| // We're now just reading the gzip footer. We already read all the data. | |
| if (gzip_footer_bytes_ + *sourceLen > sizeof(gzip_footer_) && | |
| // When this flag is true, we allow some extra bytes after the | |
| // gzip footer. | |
| !should_be_flexible_with_gzip_footer_) { | |
| VLOG(1) << "UncompressChunkOrAll: Received " | |
| << (gzip_footer_bytes_ + *sourceLen - sizeof(gzip_footer_)) | |
| << " extra bytes after gzip footer: " | |
| << std::string(reinterpret_cast<const char*>(source), | |
| std::min(*sourceLen, 20UL)); | |
| Reset(); | |
| return Z_DATA_ERROR; | |
| } | |
| uLong len = sizeof(gzip_footer_) - gzip_footer_bytes_; | |
| if (len > *sourceLen) len = *sourceLen; | |
| if (len > 0) { | |
| memcpy(gzip_footer_ + gzip_footer_bytes_, source, len); | |
| gzip_footer_bytes_ += len; | |
| } | |
| *sourceLen -= len; | |
| *destLen = 0; | |
| return Z_OK; | |
| } | |
| if ((err = UncompressInit(dest, destLen, source, sourceLen)) != Z_OK) { | |
| LOG(WARNING) << "ZLib: UncompressInit: Error: " << err | |
| << "SourceLen: " << *sourceLen; | |
| return err; | |
| } | |
| // This is used to figure out how many output bytes we wrote *this chunk*: | |
| const uLong old_total_out = uncomp_stream_.total_out; | |
| // This is used to figure out how many input bytes we read *this chunk*: | |
| const uLong old_total_in = uncomp_stream_.total_in; | |
| // Some setup happens only for the first chunk we compress in a run | |
| if (first_chunk_) { | |
| // Initialize the dictionary just before we start compressing | |
| if (settings_.gzip_header_mode_ || settings_.no_header_mode_) { | |
| // In no_header_mode, we can just set the dictionary, since no | |
| // checking is done to advance past header bits to get us in the | |
| // dictionary setting mode. In settings_.gzip_header_mode_ we've already | |
| // removed headers, so this code works too. | |
| if (settings_.dictionary_) { | |
| err = inflateSetDictionary(&uncomp_stream_, settings_.dictionary_, | |
| settings_.dict_len_); | |
| if (err != Z_OK) { | |
| LOG(WARNING) << "inflateSetDictionary: Error: " << err | |
| << " dict_len: " << settings_.dict_len_; | |
| UncompressErrorInit(); | |
| return err; | |
| } | |
| init_settings_.dictionary_ = settings_.dictionary_; | |
| init_settings_.dict_len_ = settings_.dict_len_; | |
| } | |
| } | |
| first_chunk_ = false; // so we don't do this again | |
| // For the first chunk *only* (to avoid infinite troubles), we let | |
| // there be no actual data to uncompress. This sometimes triggers | |
| // when the input is only the gzip header, say. | |
| if (*sourceLen == 0) { | |
| *destLen = 0; | |
| return Z_OK; | |
| } | |
| } | |
| // We'll uncompress as much as we can. If we end OK great, otherwise | |
| // if we get an error that seems to be the gzip footer, we store the | |
| // gzip footer and return OK, otherwise we return the error. | |
| // flush_mode is Z_SYNC_FLUSH for chunked mode, Z_FINISH for all mode. | |
| err = inflate(&uncomp_stream_, flush_mode); | |
| if (settings_.dictionary_ && err == Z_NEED_DICT) { | |
| err = inflateSetDictionary(&uncomp_stream_, settings_.dictionary_, | |
| settings_.dict_len_); | |
| if (err != Z_OK) { | |
| LOG(WARNING) << "UncompressChunkOrAll: failed in inflateSetDictionary : " | |
| << err; | |
| UncompressErrorInit(); | |
| return err; | |
| } | |
| init_settings_.dictionary_ = settings_.dictionary_; | |
| init_settings_.dict_len_ = settings_.dict_len_; | |
| err = inflate(&uncomp_stream_, flush_mode); | |
| } | |
| // Figure out how many bytes of the input zlib slurped up: | |
| const uLong bytes_read = uncomp_stream_.total_in - old_total_in; | |
| CHECK_LE(source + bytes_read, source + *sourceLen); | |
| *sourceLen = uncomp_stream_.avail_in; | |
| // Next we look at the footer, if any. Note that we might currently | |
| // have just part of the footer (eg, if this data is arriving over a | |
| // socket). After looking for a footer, log a warning if there is | |
| // extra cruft. | |
| if ((err == Z_STREAM_END) && | |
| ((gzip_footer_bytes_ == -1) || | |
| (gzip_footer_bytes_ < sizeof(gzip_footer_))) && | |
| (uncomp_stream_.avail_in <= sizeof(gzip_footer_) || | |
| // When this flag is true, we allow some extra bytes after the | |
| // zlib footer. | |
| should_be_flexible_with_gzip_footer_)) { | |
| // Due to a bug in old versions of zlibwrapper, we appended the gzip | |
| // footer even in non-gzip mode. Thus we always allow a gzip footer | |
| // even if we're not in gzip mode, so we can continue to uncompress | |
| // the old data. :-( | |
| // Store gzip footer bytes so we can check for footer consistency | |
| // in UncompressChunkDone(). (If we have the whole footer, we | |
| // could do the checking here, but we don't to keep consistency | |
| // with CompressChunkDone().) | |
| gzip_footer_bytes_ = std::min(static_cast<size_t>(uncomp_stream_.avail_in), | |
| sizeof(gzip_footer_)); | |
| memcpy(gzip_footer_, source + bytes_read, gzip_footer_bytes_); | |
| *sourceLen -= gzip_footer_bytes_; | |
| } else if ((err == Z_STREAM_END || err == Z_OK) // everything went ok | |
| && uncomp_stream_.avail_in == 0) { // and we read it all | |
| {} | |
| } else if (err == Z_STREAM_END && uncomp_stream_.avail_in > 0) { | |
| VLOG(1) << "UncompressChunkOrAll: Received some extra data, bytes total: " | |
| << uncomp_stream_.avail_in << " bytes: " | |
| << std::string( | |
| reinterpret_cast<const char*>(uncomp_stream_.next_in), | |
| std::min(static_cast<int>(uncomp_stream_.avail_in), 20)); | |
| UncompressErrorInit(); | |
| return Z_DATA_ERROR; // what's the extra data for? | |
| } else if (err != Z_OK && err != Z_STREAM_END && err != Z_BUF_ERROR) { | |
| // an error happened | |
| VLOG(1) << "UncompressChunkOrAll: Error: " << err | |
| << " avail_out: " << uncomp_stream_.avail_out; | |
| UncompressErrorInit(); | |
| return err; | |
| } else if (uncomp_stream_.avail_out == 0) { | |
| err = Z_BUF_ERROR; | |
| } | |
| assert(err == Z_OK || err == Z_BUF_ERROR || err == Z_STREAM_END); | |
| if (err == Z_STREAM_END && !settings_.dont_hide_zstream_end_) err = Z_OK; | |
| // update the crc and other metadata | |
| uncompressed_size_ = uncomp_stream_.total_out; | |
| *destLen = uncomp_stream_.total_out - old_total_out; // size for this call | |
| if (settings_.gzip_header_mode_) crc_ = crc32(crc_, dest, *destLen); | |
| return err; | |
| } | |
| int ZLib::UncompressChunkOrAll(Bytef* dest, uLongf* destLen, | |
| const Bytef* source, uLong sourceLen, | |
| int flush_mode) { // Z_SYNC_FLUSH or Z_FINISH | |
| const int ret = | |
| UncompressAtMostOrAll(dest, destLen, source, &sourceLen, flush_mode); | |
| if (ret == Z_BUF_ERROR) UncompressErrorInit(); | |
| return ret; | |
| } | |
| int ZLib::UncompressAtMost(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong* sourceLen) { | |
| return UncompressAtMostOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH); | |
| } | |
| int ZLib::UncompressChunk(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong sourceLen) { | |
| return UncompressChunkOrAll(dest, destLen, source, sourceLen, Z_SYNC_FLUSH); | |
| } | |
| // We make sure we've uncompressed everything, that is, the current | |
| // uncompress stream is at a compressed-buffer-EOF boundary. In gzip | |
| // mode, we also check the gzip footer to make sure we pass the gzip | |
| // consistency checks. We RETURN true iff both types of checks pass. | |
| bool ZLib::UncompressChunkDone() { | |
| if (first_chunk_ || !uncomp_init_) { | |
| return false; | |
| } | |
| // Make sure we're at the end-of-compressed-data point. This means | |
| // if we call inflate with Z_FINISH we won't consume any input or | |
| // write any output | |
| Bytef dummyin, dummyout; | |
| uLongf dummylen = 0; | |
| if (UncompressChunkOrAll(&dummyout, &dummylen, &dummyin, 0, Z_FINISH) != | |
| Z_OK) { | |
| return false; | |
| } | |
| // Make sure that when we exit, we can start a new round of chunks later | |
| Reset(); | |
| // We don't need to check footer when this flag is true. | |
| if (should_be_flexible_with_gzip_footer_) { | |
| return true; | |
| } | |
| // Whether we were hoping for a gzip footer or not, we allow a gzip | |
| // footer. (See the note above about bugs in old zlibwrappers.) But | |
| // by the time we've seen all the input, it has to be either a | |
| // complete gzip footer, or no footer at all. | |
| if ((gzip_footer_bytes_ != -1) && (gzip_footer_bytes_ != 0) && | |
| (gzip_footer_bytes_ != sizeof(gzip_footer_))) | |
| return false; | |
| if (!settings_.gzip_header_mode_) return true; | |
| return IsGzipFooterValid(); | |
| } | |
| bool ZLib::IsGzipFooterValid() const { | |
| // If we were expecting a gzip footer, and didn't get a full one, | |
| // that's an error. | |
| if (gzip_footer_bytes_ == -1 || gzip_footer_bytes_ < sizeof(gzip_footer_)) | |
| return false; | |
| // The footer holds the lower four bytes of the length. | |
| uLong uncompressed_size = 0; | |
| uncompressed_size += static_cast<uLong>(gzip_footer_[7]) << 24; | |
| uncompressed_size += gzip_footer_[6] << 16; | |
| uncompressed_size += gzip_footer_[5] << 8; | |
| uncompressed_size += gzip_footer_[4] << 0; | |
| if (uncompressed_size != (uncompressed_size_ & 0xffffffff)) { | |
| return false; | |
| } | |
| uLong checksum = 0; | |
| checksum += static_cast<uLong>(gzip_footer_[3]) << 24; | |
| checksum += gzip_footer_[2] << 16; | |
| checksum += gzip_footer_[1] << 8; | |
| checksum += gzip_footer_[0] << 0; | |
| if (crc_ != checksum) return false; | |
| return true; | |
| } | |
| // Uncompresses the source buffer into the destination buffer. | |
| // The destination buffer must be long enough to hold the entire | |
| // decompressed contents. | |
| // | |
| // We only initialize the uncomp_stream once. Thereafter, we use | |
| // inflateReset2, which should be faster. | |
| // | |
| // Returns Z_OK on success, otherwise, it returns a zlib error code. | |
| int ZLib::Uncompress(Bytef* dest, uLongf* destLen, const Bytef* source, | |
| uLong sourceLen) { | |
| int err; | |
| if ((err = UncompressChunkOrAll(dest, destLen, source, sourceLen, | |
| Z_FINISH)) != Z_OK) { | |
| Reset(); // let us try to compress again | |
| return err; | |
| } | |
| if (!UncompressChunkDone()) // calls Reset() | |
| return Z_DATA_ERROR; | |
| return Z_OK; // stream_end is ok | |
| } | |
| // read uncompress length from gzip footer | |
| uLongf ZLib::GzipUncompressedLength(const Bytef* source, uLong len) { | |
| if (len <= 4) return 0; // malformed data. | |
| return (static_cast<uLongf>(source[len - 1]) << 24) + | |
| (static_cast<uLongf>(source[len - 2]) << 16) + | |
| (static_cast<uLongf>(source[len - 3]) << 8) + | |
| (static_cast<uLongf>(source[len - 4]) << 0); | |
| } | |
| int ZLib::UncompressGzipAndAllocate(Bytef** dest, uLongf* destLen, | |
| const Bytef* source, uLong sourceLen) { | |
| *dest = nullptr; // until we successfully allocate | |
| if (!settings_.gzip_header_mode_) return Z_VERSION_ERROR; // *shrug* | |
| uLongf uncompress_length = GzipUncompressedLength(source, sourceLen); | |
| // Do not trust the uncompress size reported by the compressed buffer. | |
| if (uncompress_length > *destLen) { | |
| if (!HasGzipHeader(reinterpret_cast<const char*>(source), sourceLen)) { | |
| VLOG(1) << "Attempted to un-gzip data that is not gzipped."; | |
| return Z_DATA_ERROR; | |
| } | |
| VLOG(1) << "Uncompressed size " << uncompress_length | |
| << " exceeds maximum expected size " << *destLen; | |
| return Z_MEM_ERROR; // probably a corrupted gzip buffer | |
| } | |
| *destLen = uncompress_length; | |
| *dest = (Bytef*)malloc(*destLen); // NOLINT | |
| if (*dest == nullptr) // probably a corrupted gzip buffer | |
| return Z_MEM_ERROR; | |
| const int retval = Uncompress(*dest, destLen, source, sourceLen); | |
| if (retval != Z_OK) { // just to make life easier for them | |
| free(*dest); | |
| *dest = nullptr; | |
| } | |
| return retval; | |
| } | |
| // Convenience method to check if a bytestream has a header. This | |
| // is intended as a quick test: "Is this likely a GZip file?" | |
| bool ZLib::HasGzipHeader(const char* source, int sourceLen) { | |
| GZipHeader gzh; | |
| const char* ptr = nullptr; | |
| return gzh.ReadMore(source, sourceLen, &ptr) == GZipHeader::COMPLETE_HEADER; | |
| } | |
| } // namespace csrblocksparse | |