FROM ubuntu:22.04 RUN useradd -m -s /bin/bash myuser ENV HOME=/home/myuser ENV JAVA_TOOL_OPTIONS="-Dhttps.protocols=TLSv1.2" ENV PDF2JSON_HOME=$HOME/app/src/s2orc-doc2json # install system-wide deps for python and node RUN apt-get -yqq update && \ apt-get -yqq install software-properties-common curl wget zip screen git gcc build-essential openjdk-8-jdk USER myuser # Install Miniconda WORKDIR $HOME RUN curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3-latest-Linux-x86_64.sh -p $HOME/miniconda -b && \ rm Miniconda3-latest-Linux-x86_64.sh ENV PATH=$HOME/miniconda/bin:${PATH} # Create a Python 3.10 environment RUN conda create -n my_env python=3.10 SHELL ["conda", "run", "-n", "my_env", "/bin/bash", "-c"] WORKDIR $HOME/app/src COPY ./requirements.txt . RUN pip install -r requirements.txt WORKDIR $PDF2JSON_HOME COPY ./s2orc-doc2json/ . USER root RUN chown -R myuser:myuser $HOME/app && chmod -R 777 $HOME/app USER myuser RUN python setup.py develop WORKDIR $HOME RUN wget https://github.com/kermitt2/grobid/archive/0.6.1.zip && \ unzip 0.6.1.zip && \ rm 0.6.1.zip WORKDIR $HOME/grobid-0.6.1 RUN ./gradlew clean install && \ cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/config.yaml $HOME/grobid-0.6.1/grobid-service/config/config.yaml && \ cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/grobid.properties $HOME/grobid-0.6.1/grobid-home/config/grobid.properties WORKDIR $HOME/app/models/ # Download necessary model checkpoint RUN python -c "from huggingface_hub import snapshot_download; model_folder = './'; snapshot_download('nianlong/memsum-word-embedding', local_dir = model_folder + 'word_embedding'); snapshot_download('nianlong/memsum-arxiv-summarization', local_dir = model_folder + 'memsum_arxiv' )" WORKDIR $HOME/app/src COPY ./Dockerfile . WORKDIR $HOME/app/src/services RUN git clone https://github.com/nianlonggu/MemSum COPY ./services/ . # start app # will use the pure bash, ignoring the bash environment specified by SHELL command above CMD [ "bash", "./start_service.sh" ]