|
FROM ubuntu:22.04 |
|
|
|
RUN useradd -m -s /bin/bash myuser |
|
ENV HOME=/home/myuser |
|
|
|
ENV JAVA_TOOL_OPTIONS="-Dhttps.protocols=TLSv1.2" |
|
ENV PDF2JSON_HOME=$HOME/app/src/s2orc-doc2json |
|
|
|
|
|
RUN apt-get -yqq update && \ |
|
apt-get -yqq install software-properties-common curl wget zip screen git gcc build-essential openjdk-8-jdk |
|
|
|
USER myuser |
|
|
|
WORKDIR $HOME |
|
RUN curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ |
|
bash Miniconda3-latest-Linux-x86_64.sh -p $HOME/miniconda -b && \ |
|
rm Miniconda3-latest-Linux-x86_64.sh |
|
ENV PATH=$HOME/miniconda/bin:${PATH} |
|
|
|
|
|
RUN conda create -n my_env python=3.10 |
|
|
|
SHELL ["conda", "run", "-n", "my_env", "/bin/bash", "-c"] |
|
|
|
WORKDIR $HOME/app/src |
|
COPY ./requirements.txt . |
|
RUN pip install -r requirements.txt |
|
|
|
WORKDIR $PDF2JSON_HOME |
|
COPY ./s2orc-doc2json/ . |
|
|
|
USER root |
|
RUN chown -R myuser:myuser $HOME/app && chmod -R 777 $HOME/app |
|
|
|
USER myuser |
|
RUN python setup.py develop |
|
|
|
WORKDIR $HOME |
|
RUN wget https://github.com/kermitt2/grobid/archive/0.6.1.zip && \ |
|
unzip 0.6.1.zip && \ |
|
rm 0.6.1.zip |
|
|
|
WORKDIR $HOME/grobid-0.6.1 |
|
RUN ./gradlew clean install && \ |
|
cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/config.yaml $HOME/grobid-0.6.1/grobid-service/config/config.yaml && \ |
|
cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/grobid.properties $HOME/grobid-0.6.1/grobid-home/config/grobid.properties |
|
|
|
WORKDIR $HOME/app/models/ |
|
|
|
RUN python -c "from huggingface_hub import snapshot_download; model_folder = './'; snapshot_download('nianlong/memsum-word-embedding', local_dir = model_folder + 'word_embedding'); snapshot_download('nianlong/memsum-arxiv-summarization', local_dir = model_folder + 'memsum_arxiv' )" |
|
|
|
WORKDIR $HOME/app/src |
|
COPY ./Dockerfile . |
|
|
|
WORKDIR $HOME/app/src/services |
|
RUN git clone https://github.com/nianlonggu/MemSum |
|
|
|
COPY ./services/ . |
|
|
|
|
|
|
|
CMD [ "bash", "./start_service.sh" ] |