nianlonggu commited on
Commit
f8d844a
1 Parent(s): 6f85b00
Files changed (2) hide show
  1. Dockerfile +13 -11
  2. services/app.py +2 -2
Dockerfile CHANGED
@@ -1,27 +1,29 @@
1
  FROM ubuntu:22.04
2
 
3
- # Set Environment Variable
4
- USER root
5
- ENV HOME="/root"
6
  ENV JAVA_TOOL_OPTIONS="-Dhttps.protocols=TLSv1.2"
7
- ENV PDF2JSON_HOME="/app/src/s2orc-doc2json"
8
 
9
  # install system-wide deps for python and node
10
  RUN apt-get -yqq update && \
11
  apt-get -yqq install software-properties-common curl wget zip screen git gcc build-essential openjdk-8-jdk
12
 
 
13
  # Install Miniconda
 
14
  RUN curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
15
- bash Miniconda3-latest-Linux-x86_64.sh -p /miniconda -b && \
16
  rm Miniconda3-latest-Linux-x86_64.sh
17
- ENV PATH=/miniconda/bin:${PATH}
18
 
19
  # Create a Python 3.10 environment
20
  RUN conda create -n my_env python=3.10
21
 
22
  SHELL ["conda", "run", "-n", "my_env", "/bin/bash", "-c"]
23
 
24
- WORKDIR /app/src
25
  COPY ./requirements.txt .
26
  RUN pip install -r requirements.txt
27
 
@@ -39,14 +41,14 @@ RUN ./gradlew clean install && \
39
  cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/config.yaml $HOME/grobid-0.6.1/grobid-service/config/config.yaml && \
40
  cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/grobid.properties $HOME/grobid-0.6.1/grobid-home/config/grobid.properties
41
 
42
- WORKDIR /app/models/
43
  # Download necessary model checkpoint
44
- RUN python -c "from huggingface_hub import snapshot_download; model_folder = '/app/models/'; snapshot_download('nianlong/memsum-word-embedding', local_dir = model_folder + 'word_embedding'); snapshot_download('nianlong/memsum-arxiv-summarization', local_dir = model_folder + 'memsum_arxiv' )"
45
 
46
- WORKDIR /app/src
47
  COPY ./Dockerfile .
48
 
49
- WORKDIR /app/src/services
50
  RUN git clone https://github.com/nianlonggu/MemSum
51
 
52
  COPY ./services/ .
 
1
  FROM ubuntu:22.04
2
 
3
+ RUN useradd -m -s /bin/bash myuser
4
+ ENV HOME=/home/myuser
5
+
6
  ENV JAVA_TOOL_OPTIONS="-Dhttps.protocols=TLSv1.2"
7
+ ENV PDF2JSON_HOME=$HOME/app/src/s2orc-doc2json
8
 
9
  # install system-wide deps for python and node
10
  RUN apt-get -yqq update && \
11
  apt-get -yqq install software-properties-common curl wget zip screen git gcc build-essential openjdk-8-jdk
12
 
13
+ USER myuser
14
  # Install Miniconda
15
+ WORKDIR $HOME
16
  RUN curl -LO https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
17
+ bash Miniconda3-latest-Linux-x86_64.sh -p $HOME/miniconda -b && \
18
  rm Miniconda3-latest-Linux-x86_64.sh
19
+ ENV PATH=$HOME/miniconda/bin:${PATH}
20
 
21
  # Create a Python 3.10 environment
22
  RUN conda create -n my_env python=3.10
23
 
24
  SHELL ["conda", "run", "-n", "my_env", "/bin/bash", "-c"]
25
 
26
+ WORKDIR $HOME/app/src
27
  COPY ./requirements.txt .
28
  RUN pip install -r requirements.txt
29
 
 
41
  cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/config.yaml $HOME/grobid-0.6.1/grobid-service/config/config.yaml && \
42
  cp $PDF2JSON_HOME/doc2json/grobid2json/grobid/grobid.properties $HOME/grobid-0.6.1/grobid-home/config/grobid.properties
43
 
44
+ WORKDIR $HOME/app/models/
45
  # Download necessary model checkpoint
46
+ RUN python -c "from huggingface_hub import snapshot_download; model_folder = './'; snapshot_download('nianlong/memsum-word-embedding', local_dir = model_folder + 'word_embedding'); snapshot_download('nianlong/memsum-arxiv-summarization', local_dir = model_folder + 'memsum_arxiv' )"
47
 
48
+ WORKDIR $HOME/app/src
49
  COPY ./Dockerfile .
50
 
51
+ WORKDIR $HOME/app/src/services
52
  RUN git clone https://github.com/nianlonggu/MemSum
53
 
54
  COPY ./services/ .
services/app.py CHANGED
@@ -45,8 +45,8 @@ def summarize_paper(memsum, paper_info):
45
 
46
  @st.cache_resource
47
  def load_models():
48
- memsum = MemSum( "/app/models/memsum_arxiv/model.pt",
49
- "/app/models/word_embedding/vocabulary_200dim.pkl",
50
  gpu = None, max_doc_len = 500 )
51
  return memsum
52
 
 
45
 
46
  @st.cache_resource
47
  def load_models():
48
+ memsum = MemSum( "/home/myuser/app/models/memsum_arxiv/model.pt",
49
+ "/home/myuser/app/models/word_embedding/vocabulary_200dim.pkl",
50
  gpu = None, max_doc_len = 500 )
51
  return memsum
52