package org.apache.tika.parser.pkg;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import java.util.Set;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;
import org.apache.commons.compress.PasswordRequiredException;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveException;
import org.apache.commons.compress.archivers.ArchiveInputStream;
import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.commons.compress.archivers.StreamingNotSupportedException;
import org.apache.commons.compress.archivers.ar.ArArchiveInputStream;
import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
import org.apache.commons.compress.archivers.dump.DumpArchiveInputStream;
import org.apache.commons.compress.archivers.jar.JarArchiveInputStream;
import org.apache.commons.compress.archivers.sevenz.SevenZFile;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.io.input.CloseShieldInputStream;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.extractor.EmbeddedDocumentUtil;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.PasswordProvider;
import org.apache.tika.sax.XHTMLContentHandler;
import org.codehaus.plexus.util.LineOrientedInterpolatingReader;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: input_file:org/apache/tika/parser/pkg/PackageParser.class */
public class PackageParser extends AbstractParser {
    private static final long serialVersionUID = -5331043266963888708L;
    private static final MediaType ZIP = MediaType.APPLICATION_ZIP;
    private static final MediaType JAR = MediaType.application("java-archive");
    private static final MediaType AR = MediaType.application("x-archive");
    private static final MediaType CPIO = MediaType.application("x-cpio");
    private static final MediaType DUMP = MediaType.application("x-tika-unix-dump");
    private static final MediaType TAR = MediaType.application("x-tar");
    private static final MediaType SEVENZ = MediaType.application("x-7z-compressed");
    private static final Set<MediaType> SUPPORTED_TYPES = MediaType.set(ZIP, JAR, AR, CPIO, DUMP, TAR, SEVENZ);

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/parser/pkg/PackageParser$SevenZWrapper.class */
    public static class SevenZWrapper extends ArchiveInputStream {
        private SevenZFile file;

        private SevenZWrapper(SevenZFile sevenZFile) {
            this.file = sevenZFile;
        }

        @Override // org.apache.commons.compress.archivers.ArchiveInputStream, java.io.InputStream
        public int read() throws IOException {
            return this.file.read();
        }

        @Override // java.io.InputStream
        public int read(byte[] bArr) throws IOException {
            return this.file.read(bArr);
        }

        @Override // java.io.InputStream
        public int read(byte[] bArr, int i, int i2) throws IOException {
            return this.file.read(bArr, i, i2);
        }

        @Override // org.apache.commons.compress.archivers.ArchiveInputStream
        public ArchiveEntry getNextEntry() throws IOException {
            return this.file.getNextEntry();
        }

        @Override // java.io.InputStream, java.io.Closeable, java.lang.AutoCloseable
        public void close() throws IOException {
            this.file.close();
        }
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static MediaType getMediaType(ArchiveInputStream archiveInputStream) {
        return archiveInputStream instanceof JarArchiveInputStream ? JAR : archiveInputStream instanceof ZipArchiveInputStream ? ZIP : archiveInputStream instanceof ArArchiveInputStream ? AR : archiveInputStream instanceof CpioArchiveInputStream ? CPIO : archiveInputStream instanceof DumpArchiveInputStream ? DUMP : archiveInputStream instanceof TarArchiveInputStream ? TAR : archiveInputStream instanceof SevenZWrapper ? SEVENZ : MediaType.OCTET_STREAM;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static boolean isZipArchive(MediaType mediaType) {
        return mediaType.equals(ZIP) || mediaType.equals(JAR);
    }

    @Override // org.apache.tika.parser.Parser
    public Set<MediaType> getSupportedTypes(ParseContext parseContext) {
        return SUPPORTED_TYPES;
    }

    @Override // org.apache.tika.parser.Parser
    public void parse(InputStream inputStream, ContentHandler contentHandler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException {
        ArchiveInputStream sevenZWrapper;
        if (!inputStream.markSupported()) {
            inputStream = new BufferedInputStream(inputStream);
        }
        TemporaryResources temporaryResources = new TemporaryResources();
        try {
            sevenZWrapper = ((ArchiveStreamFactory) parseContext.get(ArchiveStreamFactory.class, new ArchiveStreamFactory())).createArchiveInputStream(new CloseShieldInputStream(inputStream));
        } catch (StreamingNotSupportedException e) {
            if (!e.getFormat().equals(ArchiveStreamFactory.SEVEN_Z)) {
                temporaryResources.close();
                throw new TikaException("Unknown non-streaming format " + e.getFormat(), e);
            }
            inputStream.reset();
            TikaInputStream tikaInputStream = TikaInputStream.get(inputStream, temporaryResources);
            PasswordProvider passwordProvider = (PasswordProvider) parseContext.get(PasswordProvider.class);
            String password = passwordProvider != null ? passwordProvider.getPassword(metadata) : null;
            sevenZWrapper = new SevenZWrapper(password == null ? new SevenZFile(tikaInputStream.getFile()) : new SevenZFile(tikaInputStream.getFile(), password.getBytes("UnicodeLittleUnmarked")));
        } catch (ArchiveException e2) {
            temporaryResources.close();
            throw new TikaException("Unable to unpack document stream", e2);
        }
        MediaType mediaType = getMediaType(sevenZWrapper);
        if (!mediaType.equals(MediaType.OCTET_STREAM)) {
            metadata.set("Content-Type", mediaType.toString());
        }
        EmbeddedDocumentExtractor embeddedDocumentExtractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(parseContext);
        XHTMLContentHandler xHTMLContentHandler = new XHTMLContentHandler(contentHandler, metadata);
        xHTMLContentHandler.startDocument();
        try {
            try {
                for (ArchiveEntry nextEntry = sevenZWrapper.getNextEntry(); nextEntry != null; nextEntry = sevenZWrapper.getNextEntry()) {
                    if (!nextEntry.isDirectory()) {
                        parseEntry(sevenZWrapper, nextEntry, embeddedDocumentExtractor, xHTMLContentHandler);
                    }
                }
                sevenZWrapper.close();
                temporaryResources.close();
            } catch (PasswordRequiredException e3) {
                throw new EncryptedDocumentException(e3);
            } catch (UnsupportedZipFeatureException e4) {
                if (e4.getFeature() == UnsupportedZipFeatureException.Feature.ENCRYPTION) {
                    throw new EncryptedDocumentException(e4);
                }
                sevenZWrapper.close();
                temporaryResources.close();
            }
            xHTMLContentHandler.endDocument();
        } catch (Throwable th) {
            sevenZWrapper.close();
            temporaryResources.close();
            throw th;
        }
    }

    private void parseEntry(ArchiveInputStream archiveInputStream, ArchiveEntry archiveEntry, EmbeddedDocumentExtractor embeddedDocumentExtractor, XHTMLContentHandler xHTMLContentHandler) throws SAXException, IOException, TikaException {
        String name = archiveEntry.getName();
        if (!archiveInputStream.canReadEntryData(archiveEntry)) {
            if (name == null || name.length() <= 0) {
                return;
            }
            xHTMLContentHandler.element(WindowFeatureGenerator.PREV_PREFIX, name);
            return;
        }
        Metadata handleEntryMetadata = handleEntryMetadata(name, null, archiveEntry.getLastModifiedDate(), Long.valueOf(archiveEntry.getSize()), xHTMLContentHandler);
        if (embeddedDocumentExtractor.shouldParseEmbedded(handleEntryMetadata)) {
            TemporaryResources temporaryResources = new TemporaryResources();
            try {
                embeddedDocumentExtractor.parseEmbedded(TikaInputStream.get(archiveInputStream, temporaryResources), xHTMLContentHandler, handleEntryMetadata, true);
                temporaryResources.dispose();
            } catch (Throwable th) {
                temporaryResources.dispose();
                throw th;
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static Metadata handleEntryMetadata(String str, Date date, Date date2, Long l, XHTMLContentHandler xHTMLContentHandler) throws SAXException, IOException, TikaException {
        Metadata metadata = new Metadata();
        if (date != null) {
            metadata.set(TikaCoreProperties.CREATED, date);
        }
        if (date2 != null) {
            metadata.set(TikaCoreProperties.MODIFIED, date2);
        }
        if (l != null) {
            metadata.set("Content-Length", Long.toString(l.longValue()));
        }
        if (str != null && str.length() > 0) {
            String replace = str.replace(LineOrientedInterpolatingReader.DEFAULT_ESCAPE_SEQ, "/");
            metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, replace);
            AttributesImpl attributesImpl = new AttributesImpl();
            attributesImpl.addAttribute("", "class", "class", "CDATA", "embedded");
            attributesImpl.addAttribute("", "id", "id", "CDATA", replace);
            xHTMLContentHandler.startElement("div", attributesImpl);
            xHTMLContentHandler.endElement("div");
            metadata.set(TikaMetadataKeys.EMBEDDED_RELATIONSHIP_ID, replace);
        }
        return metadata;
    }
}
