FROM nvcr.io/nvidia/pytorch:22.01-py3 ENV HOME=/root ENV LC_ALL C.UTF-8 ENV LANG C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive # APP_PATH: assumed to be volume mounted with host ENV APP_PATH=$HOME/np_app_text_retrieval_inference ENV PYTHONPATH $APP_PATH # EXTERNAL_PATH: assumed having system specific files, not volume mounted with host ENV EXTERNAL_PATH=/workspace/external RUN mkdir -p $EXTERNAL_PATH WORKDIR $APP_PATH COPY . $APP_PATH/ ### ubuntu package RUN apt-get update RUN apt-get install git vim unzip wget ffmpeg libsm6 libxext6 -y ### conda # note that retriever (based on pyserini) support cpu only for faiss RUN conda install -c conda-forge faiss-cpu ### pip # install editable mode RUN pip install --upgrade pip && \ pip install --no-binary :all: nmslib && \ pip install -e . # update java version >= 11 WORKDIR $EXTERNAL_PATH RUN cp $APP_PATH/download_oracle_jdk.sh $EXTERNAL_PATH/ RUN sh download_oracle_jdk.sh RUN ln -sf ${EXTERNAL_PATH}/jdk-20.0.2/bin/java /opt/conda/bin/java ENV JAVA_HOME=${EXTERNAL_PATH}/jdk-20.0.2 # anserini: need for sparse retrieval/indexing RUN conda install -c conda-forge maven RUN mvn -v RUN git clone https://github.com/castorini/anserini.git RUN cd anserini && \ mvn clean -Dmaven.test.skip=true package appassembler:assemble # copy fatjar from anserini to app # will be done at entrypoint #COPY $EXTERNAL_PATH/anserini/target/anserini-*-fatjar.jar $APP_PATH/pyserini/resources/jars ## entrypoint: call entrypoint.sh instead WORKDIR $APP_PATH #CMD ["bash"]