zhijian's picture
Create Dockerfile
a5403db
raw
history blame
No virus
483 Bytes
FROM python:3.8
RUN apt update && apt install -y default-jdk default-jre git
ENV JAVA_HOME=/usr/lib/jvm/default-java
RUN pip install git+https://github.com/alibaba/data-juicer.git
RUN pip install fsspec==2023.3.0 jsonlines
RUN useradd -m -u 1000 dj
USER dj
ENV HOME=/home/dj
RUN cd $HOME/ && git clone https://github.com/alibaba/data-juicer.git && cd ../
WORKDIR $HOME/data-juicer/demos/tool_dataset_splitting_by_language
RUN chown -R dj $HOME/
CMD ["streamlit", "run", "app.py"]