Added doc on dev-slim (#2627)
Browse filesAdded doc on dev-slim
### Type of change
- [x] Documentation Update
- [x] Refactoring
---------
Co-authored-by: Kevin Hu <kevinhu.sh@gmail.com>
- Dockerfile.scratch → Dockerfile.slim +7 -4
- docs/guides/develop/build_docker_image.md +11 -2
- poetry.lock +0 -0
- pyproject.toml +12 -12
- rag/settings.py +8 -1
Dockerfile.scratch → Dockerfile.slim
RENAMED
@@ -2,6 +2,8 @@
|
|
2 |
FROM ubuntu:24.04 AS base
|
3 |
USER root
|
4 |
|
|
|
|
|
5 |
WORKDIR /ragflow
|
6 |
|
7 |
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
|
@@ -43,7 +45,11 @@ RUN cd web && npm i --force && npm run build
|
|
43 |
COPY pyproject.toml poetry.toml poetry.lock ./
|
44 |
|
45 |
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
|
48 |
# production stage
|
49 |
FROM base AS production
|
@@ -77,9 +83,6 @@ ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"
|
|
77 |
# Download nltk data
|
78 |
RUN python3 -m nltk.downloader wordnet punkt punkt_tab
|
79 |
|
80 |
-
# Copy models downloaded via download_deps.sh
|
81 |
-
# COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/
|
82 |
-
|
83 |
ENV PYTHONPATH=/ragflow/
|
84 |
|
85 |
COPY docker/entrypoint.sh ./entrypoint.sh
|
|
|
2 |
FROM ubuntu:24.04 AS base
|
3 |
USER root
|
4 |
|
5 |
+
ENV LIGHTEN=1
|
6 |
+
|
7 |
WORKDIR /ragflow
|
8 |
|
9 |
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
|
|
|
45 |
COPY pyproject.toml poetry.toml poetry.lock ./
|
46 |
|
47 |
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
|
48 |
+
if [ "$LIGHTEN" -eq 0 ]; then \
|
49 |
+
/root/.local/bin/poetry install --sync --no-cache --no-root --with=full; \
|
50 |
+
else \
|
51 |
+
/root/.local/bin/poetry install --sync --no-cache --no-root; \
|
52 |
+
fi
|
53 |
|
54 |
# production stage
|
55 |
FROM base AS production
|
|
|
83 |
# Download nltk data
|
84 |
RUN python3 -m nltk.downloader wordnet punkt punkt_tab
|
85 |
|
|
|
|
|
|
|
86 |
ENV PYTHONPATH=/ragflow/
|
87 |
|
88 |
COPY docker/entrypoint.sh ./entrypoint.sh
|
docs/guides/develop/build_docker_image.md
CHANGED
@@ -31,13 +31,22 @@ To build a RAGFlow Docker image from source code:
|
|
31 |
|
32 |
```bash
|
33 |
git clone https://github.com/infiniflow/ragflow.git
|
|
|
34 |
```
|
35 |
|
36 |
### Build the Docker Image
|
37 |
|
38 |
Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.
|
39 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
```bash
|
41 |
cd ragflow/
|
42 |
-
docker build -f Dockerfile
|
43 |
-
```
|
|
|
|
31 |
|
32 |
```bash
|
33 |
git clone https://github.com/infiniflow/ragflow.git
|
34 |
+
cd ragflow
|
35 |
```
|
36 |
|
37 |
### Build the Docker Image
|
38 |
|
39 |
Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.
|
40 |
|
41 |
+
#### Build image `ragflow:dev-slim`
|
42 |
+
```bash
|
43 |
+
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
|
44 |
+
```
|
45 |
+
This image's size is about 1GB. It relies external LLM services since it doesn't contain embedding models.
|
46 |
+
|
47 |
+
#### Build image `ragflow:dev`
|
48 |
```bash
|
49 |
cd ragflow/
|
50 |
+
docker build -f Dockerfile -t infiniflow/ragflow:dev .
|
51 |
+
```
|
52 |
+
This image's size is about 11GB. It contains embedding models, and can inference via local CPU/GPU or external LLM services.
|
poetry.lock
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
CHANGED
@@ -17,7 +17,6 @@ azure-storage-file-datalake = "12.16.0"
|
|
17 |
anthropic = "=0.34.1"
|
18 |
arxiv = "2.1.3"
|
19 |
aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
|
20 |
-
bcembedding = "0.1.3"
|
21 |
bio = "1.7.1"
|
22 |
boto3 = "1.34.140"
|
23 |
botocore = "1.34.140"
|
@@ -34,10 +33,8 @@ editdistance = "0.8.1"
|
|
34 |
elastic-transport = "8.12.0"
|
35 |
elasticsearch = "8.12.1"
|
36 |
elasticsearch-dsl = "8.12.0"
|
37 |
-
fastembed = "^0.3.6"
|
38 |
fasttext = "0.9.3"
|
39 |
filelock = "3.15.4"
|
40 |
-
flagembedding = "1.2.10"
|
41 |
flask = "3.0.3"
|
42 |
flask-cors = "5.0.0"
|
43 |
flask-login = "0.6.3"
|
@@ -58,7 +55,6 @@ nltk = "3.9.1"
|
|
58 |
numpy = "1.26.4"
|
59 |
ollama = "0.2.1"
|
60 |
onnxruntime = "1.17.3"
|
61 |
-
onnxruntime-gpu = { version = "^1.17.1", markers = "platform_machine == 'x86_64'" }
|
62 |
openai = "1.12.0"
|
63 |
opencv-python = "4.9.0.80"
|
64 |
opencv-python-headless = "4.9.0.80"
|
@@ -97,8 +93,6 @@ tabulate = "0.9.0"
|
|
97 |
tencentcloud-sdk-python = "3.0.1215"
|
98 |
tika = "2.6.0"
|
99 |
tiktoken = "0.6.0"
|
100 |
-
torch = "2.3.0"
|
101 |
-
transformers = "4.38.1"
|
102 |
umap_learn = "0.5.6"
|
103 |
vertexai = "1.64.0"
|
104 |
volcengine = "1.0.146"
|
@@ -107,7 +101,7 @@ webdriver-manager = "4.0.1"
|
|
107 |
werkzeug = "3.0.3"
|
108 |
wikipedia = "1.4.0"
|
109 |
word2number = "1.1"
|
110 |
-
xgboost = "
|
111 |
xpinyin = "0.7.6"
|
112 |
yfinance = "0.1.96"
|
113 |
zhipuai = "2.0.1"
|
@@ -117,12 +111,18 @@ python-docx = "^1.1.2"
|
|
117 |
pypdf2 = "^3.0.1"
|
118 |
graspologic = "^3.4.1"
|
119 |
pymysql = "^1.1.1"
|
120 |
-
mini-racer = "^0.12.4"
|
121 |
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
[build-system]
|
128 |
requires = ["poetry-core"]
|
|
|
17 |
anthropic = "=0.34.1"
|
18 |
arxiv = "2.1.3"
|
19 |
aspose-slides = { version = "^24.9.0", markers = "platform_machine == 'x86_64'" }
|
|
|
20 |
bio = "1.7.1"
|
21 |
boto3 = "1.34.140"
|
22 |
botocore = "1.34.140"
|
|
|
33 |
elastic-transport = "8.12.0"
|
34 |
elasticsearch = "8.12.1"
|
35 |
elasticsearch-dsl = "8.12.0"
|
|
|
36 |
fasttext = "0.9.3"
|
37 |
filelock = "3.15.4"
|
|
|
38 |
flask = "3.0.3"
|
39 |
flask-cors = "5.0.0"
|
40 |
flask-login = "0.6.3"
|
|
|
55 |
numpy = "1.26.4"
|
56 |
ollama = "0.2.1"
|
57 |
onnxruntime = "1.17.3"
|
|
|
58 |
openai = "1.12.0"
|
59 |
opencv-python = "4.9.0.80"
|
60 |
opencv-python-headless = "4.9.0.80"
|
|
|
93 |
tencentcloud-sdk-python = "3.0.1215"
|
94 |
tika = "2.6.0"
|
95 |
tiktoken = "0.6.0"
|
|
|
|
|
96 |
umap_learn = "0.5.6"
|
97 |
vertexai = "1.64.0"
|
98 |
volcengine = "1.0.146"
|
|
|
101 |
werkzeug = "3.0.3"
|
102 |
wikipedia = "1.4.0"
|
103 |
word2number = "1.1"
|
104 |
+
xgboost = "1.5.0"
|
105 |
xpinyin = "0.7.6"
|
106 |
yfinance = "0.1.96"
|
107 |
zhipuai = "2.0.1"
|
|
|
111 |
pypdf2 = "^3.0.1"
|
112 |
graspologic = "^3.4.1"
|
113 |
pymysql = "^1.1.1"
|
|
|
114 |
|
115 |
+
|
116 |
+
[tool.poetry.group.full]
|
117 |
+
optional = true
|
118 |
+
|
119 |
+
[tool.poetry.group.full.dependencies]
|
120 |
+
bcembedding = "0.1.3"
|
121 |
+
fastembed = "^0.3.6"
|
122 |
+
flagembedding = "1.2.10"
|
123 |
+
mini-racer = "^0.12.4"
|
124 |
+
torch = "2.3.0"
|
125 |
+
transformers = "4.38.1"
|
126 |
|
127 |
[build-system]
|
128 |
requires = ["poetry-core"]
|
rag/settings.py
CHANGED
@@ -14,6 +14,7 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
import os
|
|
|
17 |
from api.utils import get_base_config, decrypt_database_config
|
18 |
from api.utils.file_utils import get_project_base_directory
|
19 |
from api.utils.log_utils import LoggerFactory, getLogger
|
@@ -48,10 +49,16 @@ minio_logger = getLogger("minio")
|
|
48 |
s3_logger = getLogger("s3")
|
49 |
azure_logger = getLogger("azure")
|
50 |
cron_logger = getLogger("cron_logger")
|
51 |
-
cron_logger.setLevel(20)
|
52 |
chunk_logger = getLogger("chunk_logger")
|
53 |
database_logger = getLogger("database")
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
SVR_QUEUE_NAME = "rag_flow_svr_queue"
|
56 |
SVR_QUEUE_RETENTION = 60*60
|
57 |
SVR_QUEUE_MAX_LEN = 1024
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
import os
|
17 |
+
import logging
|
18 |
from api.utils import get_base_config, decrypt_database_config
|
19 |
from api.utils.file_utils import get_project_base_directory
|
20 |
from api.utils.log_utils import LoggerFactory, getLogger
|
|
|
49 |
s3_logger = getLogger("s3")
|
50 |
azure_logger = getLogger("azure")
|
51 |
cron_logger = getLogger("cron_logger")
|
|
|
52 |
chunk_logger = getLogger("chunk_logger")
|
53 |
database_logger = getLogger("database")
|
54 |
|
55 |
+
for logger in [es_logger, minio_logger, s3_logger, azure_logger, cron_logger, chunk_logger, database_logger]:
|
56 |
+
logger.basicConfig(
|
57 |
+
level=logging.INFO,
|
58 |
+
format="%(asctime)-15s %(levelname)-8s (%(process)d) %(message)s",
|
59 |
+
)
|
60 |
+
|
61 |
+
|
62 |
SVR_QUEUE_NAME = "rag_flow_svr_queue"
|
63 |
SVR_QUEUE_RETENTION = 60*60
|
64 |
SVR_QUEUE_MAX_LEN = 1024
|