zhichyu Kevin Hu commited on
Commit
a694851
·
1 Parent(s): 809dc5c

Added doc on dev-slim (#2627)

Browse files

Added doc on dev-slim

### Type of change

- [x] Documentation Update
- [x] Refactoring

---------

Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>

Dockerfile.scratch → Dockerfile.slim RENAMED
@@ -2,6 +2,8 @@
2
  FROM ubuntu:24.04 AS base
3
  USER root
4
 
 
 
5
  WORKDIR /ragflow
6
 
7
  RUN rm -f /etc/apt/apt.conf.d/docker-clean \
@@ -43,7 +45,11 @@ RUN cd web && npm i --force && npm run build
43
  COPY pyproject.toml poetry.toml poetry.lock ./
44
 
45
  RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
46
- /root/.local/bin/poetry install --sync --no-cache --no-root
 
 
 
 
47
 
48
  # production stage
49
  FROM base AS production
@@ -77,9 +83,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
77
  # Download nltk data
78
  RUN python3 -m nltk.downloader wordnet punkt punkt_tab
79
 
80
- # Copy models downloaded via download_deps.sh
81
- # COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/
82
-
83
  ENV PYTHONPATH=/ragflow/
84
 
85
  COPY docker/entrypoint.sh ./entrypoint.sh
 
2
  FROM ubuntu:24.04 AS base
3
  USER root
4
 
5
+ ENV LIGHTEN=1
6
+
7
  WORKDIR /ragflow
8
 
9
  RUN rm -f /etc/apt/apt.conf.d/docker-clean \
 
45
  COPY pyproject.toml poetry.toml poetry.lock ./
46
 
47
  RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
48
+ if [ "$LIGHTEN" -eq 0 ]; then \
49
+ /root/.local/bin/poetry install --sync --no-cache --no-root --with=full; \
50
+ else \
51
+ /root/.local/bin/poetry install --sync --no-cache --no-root; \
52
+ fi
53
 
54
  # production stage
55
  FROM base AS production
 
83
  # Download nltk data
84
  RUN python3 -m nltk.downloader wordnet punkt punkt_tab
85
 
 
 
 
86
  ENV PYTHONPATH=/ragflow/
87
 
88
  COPY docker/entrypoint.sh ./entrypoint.sh
docs/guides/develop/build_docker_image.md CHANGED
@@ -31,13 +31,22 @@ To build a RAGFlow Docker image from source code:
31
 
32
  ```bash
33
  git clone https://github.com/infiniflow/ragflow.git
 
34
  ```
35
 
36
  ### Build the Docker Image
37
 
38
  Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.
39
 
 
 
 
 
 
 
 
40
  ```bash
41
  cd ragflow/
42
- docker build -f Dockerfile.scratch -t infiniflow/ragflow:dev .
43
- ```
 
 
31
 
32
  ```bash
33
  git clone https://github.com/infiniflow/ragflow.git
34
+ cd ragflow
35
  ```
36
 
37
  ### Build the Docker Image
38
 
39
  Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.
40
 
41
+ #### Build image `ragflow:dev-slim`
42
+ ```bash
43
+ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
44
+ ```
45
+ This image's size is about 1GB. It relies external LLM services since it doesn't contain embedding models.
46
+
47
+ #### Build image `ragflow:dev`
48
  ```bash
49
  cd ragflow/
50
+ docker build -f Dockerfile -t infiniflow/ragflow:dev .
51
+ ```
52
+ This image's size is about 11GB. It contains embedding models, and can inference via local CPU/GPU or external LLM services.
poetry.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pyproject.toml CHANGED
@@ -17,7 +17,6 @@ azure-storage-file-datalake = "12.16.0"
17
  anthropic = "=0.34.1"
18
  arxiv = "2.1.3"
19
  aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
20
- bcembedding = "0.1.3"
21
  bio = "1.7.1"
22
  boto3 = "1.34.140"
23
  botocore = "1.34.140"
@@ -34,10 +33,8 @@ editdistance = "0.8.1"
34
  elastic-transport = "8.12.0"
35
  elasticsearch = "8.12.1"
36
  elasticsearch-dsl = "8.12.0"
37
- fastembed = "^0.3.6"
38
  fasttext = "0.9.3"
39
  filelock = "3.15.4"
40
- flagembedding = "1.2.10"
41
  flask = "3.0.3"
42
  flask-cors = "5.0.0"
43
  flask-login = "0.6.3"
@@ -58,7 +55,6 @@ nltk = "3.9.1"
58
  numpy = "1.26.4"
59
  ollama = "0.2.1"
60
  onnxruntime = "1.17.3"
61
- onnxruntime-gpu = { version = "^1.17.1", markers = "platform_machine == 'x86_64'" }
62
  openai = "1.12.0"
63
  opencv-python = "4.9.0.80"
64
  opencv-python-headless = "4.9.0.80"
@@ -97,8 +93,6 @@ tabulate = "0.9.0"
97
  tencentcloud-sdk-python = "3.0.1215"
98
  tika = "2.6.0"
99
  tiktoken = "0.6.0"
100
- torch = "2.3.0"
101
- transformers = "4.38.1"
102
  umap_learn = "0.5.6"
103
  vertexai = "1.64.0"
104
  volcengine = "1.0.146"
@@ -107,7 +101,7 @@ webdriver-manager = "4.0.1"
107
  werkzeug = "3.0.3"
108
  wikipedia = "1.4.0"
109
  word2number = "1.1"
110
- xgboost = "2.1.0"
111
  xpinyin = "0.7.6"
112
  yfinance = "0.1.96"
113
  zhipuai = "2.0.1"
@@ -117,12 +111,18 @@ python-docx = "^1.1.2"
117
  pypdf2 = "^3.0.1"
118
  graspologic = "^3.4.1"
119
  pymysql = "^1.1.1"
120
- mini-racer = "^0.12.4"
121
 
122
- [[tool.poetry.source]]
123
- name = "tsinghua"
124
- url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
125
- priority = "primary"
 
 
 
 
 
 
 
126
 
127
  [build-system]
128
  requires = ["poetry-core"]
 
17
  anthropic = "=0.34.1"
18
  arxiv = "2.1.3"
19
  aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
 
20
  bio = "1.7.1"
21
  boto3 = "1.34.140"
22
  botocore = "1.34.140"
 
33
  elastic-transport = "8.12.0"
34
  elasticsearch = "8.12.1"
35
  elasticsearch-dsl = "8.12.0"
 
36
  fasttext = "0.9.3"
37
  filelock = "3.15.4"
 
38
  flask = "3.0.3"
39
  flask-cors = "5.0.0"
40
  flask-login = "0.6.3"
 
55
  numpy = "1.26.4"
56
  ollama = "0.2.1"
57
  onnxruntime = "1.17.3"
 
58
  openai = "1.12.0"
59
  opencv-python = "4.9.0.80"
60
  opencv-python-headless = "4.9.0.80"
 
93
  tencentcloud-sdk-python = "3.0.1215"
94
  tika = "2.6.0"
95
  tiktoken = "0.6.0"
 
 
96
  umap_learn = "0.5.6"
97
  vertexai = "1.64.0"
98
  volcengine = "1.0.146"
 
101
  werkzeug = "3.0.3"
102
  wikipedia = "1.4.0"
103
  word2number = "1.1"
104
+ xgboost = "1.5.0"
105
  xpinyin = "0.7.6"
106
  yfinance = "0.1.96"
107
  zhipuai = "2.0.1"
 
111
  pypdf2 = "^3.0.1"
112
  graspologic = "^3.4.1"
113
  pymysql = "^1.1.1"
 
114
 
115
+
116
+ [tool.poetry.group.full]
117
+ optional = true
118
+
119
+ [tool.poetry.group.full.dependencies]
120
+ bcembedding = "0.1.3"
121
+ fastembed = "^0.3.6"
122
+ flagembedding = "1.2.10"
123
+ mini-racer = "^0.12.4"
124
+ torch = "2.3.0"
125
+ transformers = "4.38.1"
126
 
127
  [build-system]
128
  requires = ["poetry-core"]
rag/settings.py CHANGED
@@ -14,6 +14,7 @@
14
  # limitations under the License.
15
  #
16
  import os
 
17
  from api.utils import get_base_config, decrypt_database_config
18
  from api.utils.file_utils import get_project_base_directory
19
  from api.utils.log_utils import LoggerFactory, getLogger
@@ -48,10 +49,16 @@ minio_logger = getLogger("minio")
48
  s3_logger = getLogger("s3")
49
  azure_logger = getLogger("azure")
50
  cron_logger = getLogger("cron_logger")
51
- cron_logger.setLevel(20)
52
  chunk_logger = getLogger("chunk_logger")
53
  database_logger = getLogger("database")
54
 
 
 
 
 
 
 
 
55
  SVR_QUEUE_NAME = "rag_flow_svr_queue"
56
  SVR_QUEUE_RETENTION = 60*60
57
  SVR_QUEUE_MAX_LEN = 1024
 
14
  # limitations under the License.
15
  #
16
  import os
17
+ import logging
18
  from api.utils import get_base_config, decrypt_database_config
19
  from api.utils.file_utils import get_project_base_directory
20
  from api.utils.log_utils import LoggerFactory, getLogger
 
49
  s3_logger = getLogger("s3")
50
  azure_logger = getLogger("azure")
51
  cron_logger = getLogger("cron_logger")
 
52
  chunk_logger = getLogger("chunk_logger")
53
  database_logger = getLogger("database")
54
 
55
+ for logger in [es_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]:
56
+ logger.basicConfig(
57
+ level=logging.INFO,
58
+ format="%(asctime)-15s %(levelname)-8s (%(process)d) %(message)s",
59
+ )
60
+
61
+
62
  SVR_QUEUE_NAME = "rag_flow_svr_queue"
63
  SVR_QUEUE_RETENTION = 60*60
64
  SVR_QUEUE_MAX_LEN = 1024