File size: 497 Bytes
a5403db
 
 
6a12082
 
a5403db
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
FROM python:3.8
RUN apt update && apt install -y default-jdk default-jre git  
ENV JAVA_HOME=/usr/lib/jvm/default-java
RUN pip install "py-data-juicer[all] @ git+https://github.com/alibaba/data-juicer.git"
RUN pip install fsspec==2023.3.0
RUN useradd -m -u 1000 dj
USER dj
ENV HOME=/home/dj 
RUN cd $HOME/ && git clone https://github.com/alibaba/data-juicer.git && cd ../
WORKDIR $HOME/data-juicer/demos/tool_dataset_splitting_by_language
RUN chown -R dj $HOME/
CMD ["streamlit", "run",  "app.py"]