XuBailing commited on
Commit
feed15f
1 Parent(s): 5b331e7

Upload 17 files

Browse files
Files changed (17) hide show
  1. .gitignore +177 -0
  2. CONTRIBUTING.md +22 -0
  3. Dockerfile +36 -0
  4. Dockerfile-cuda +14 -0
  5. LICENSE +201 -0
  6. README-lang.md +261 -0
  7. README_lang-en.md +247 -0
  8. Xubailing Log.txt +16 -0
  9. api.py +552 -0
  10. cli.bat +2 -0
  11. cli.py +88 -0
  12. cli.sh +2 -0
  13. cli_demo.py +88 -0
  14. release.py +50 -0
  15. requirements.txt +40 -0
  16. webui.py +562 -0
  17. webui_st.py +386 -0
.gitignore ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ */**/__pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # poetry
99
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
+ #poetry.lock
104
+
105
+ # pdm
106
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
+ #pdm.lock
108
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
+ # in version control.
110
+ # https://pdm.fming.dev/#use-with-ide
111
+ .pdm.toml
112
+
113
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114
+ __pypackages__/
115
+
116
+ # Celery stuff
117
+ celerybeat-schedule
118
+ celerybeat.pid
119
+
120
+ # SageMath parsed files
121
+ *.sage.py
122
+
123
+ # Environments
124
+ .env
125
+ .venv
126
+ env/
127
+ venv/
128
+ ENV/
129
+ env.bak/
130
+ venv.bak/
131
+
132
+ # Spyder project settings
133
+ .spyderproject
134
+ .spyproject
135
+
136
+ # Rope project settings
137
+ .ropeproject
138
+
139
+ # mkdocs documentation
140
+ /site
141
+
142
+ # mypy
143
+ .mypy_cache/
144
+ .dmypy.json
145
+ dmypy.json
146
+
147
+ # Pyre type checker
148
+ .pyre/
149
+
150
+ # pytype static type analyzer
151
+ .pytype/
152
+
153
+ # Cython debug symbols
154
+ cython_debug/
155
+
156
+ # PyCharm
157
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
160
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161
+ .idea/
162
+
163
+ # Other files
164
+ output/*
165
+ log/*
166
+ .chroma
167
+ vector_store/*
168
+ content/*
169
+ api_content/*
170
+ knowledge_base/*
171
+
172
+ llm/*
173
+ embedding/*
174
+
175
+ pyrightconfig.json
176
+ loader/tmp_files
177
+ flagged/*
CONTRIBUTING.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 贡献指南
2
+
3
+ 欢迎!我们是一个非常友好的社区,非常高兴您想要帮助我们让这个应用程序变得更好。但是,请您遵循一些通用准则以保持组织有序。
4
+
5
+ 1. 确保为您要修复的错误或要添加的功能创建了一个[问题](https://github.com/imClumsyPanda/langchain-ChatGLM/issues),尽可能保持它们小。
6
+ 2. 请使用 `git pull --rebase` 来拉取和衍合上游的更新。
7
+ 3. 将提交合并为格式良好的提交。在提交说明中单独一行提到要解决的问题,如`Fix #<bug>`(有关更多可以使用的关键字,请参见[将拉取请求链接到问题](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue))。
8
+ 4. 推送到`dev`。在说明中提到正在解决的问题。
9
+
10
+ ---
11
+
12
+ # Contribution Guide
13
+
14
+ Welcome! We're a pretty friendly community, and we're thrilled that you want to help make this app even better. However, we ask that you follow some general guidelines to keep things organized around here.
15
+
16
+ 1. Make sure an [issue](https://github.com/imClumsyPanda/langchain-ChatGLM/issues) is created for the bug you're about to fix, or feature you're about to add. Keep them as small as possible.
17
+
18
+ 2. Please use `git pull --rebase` to fetch and merge updates from the upstream.
19
+
20
+ 3. Rebase commits into well-formatted commits. Mention the issue being resolved in the commit message on a line all by itself like `Fixes #<bug>` (refer to [Linking a pull request to an issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue) for more keywords you can use).
21
+
22
+ 4. Push into `dev`. Mention which bug is being resolved in the description.
Dockerfile ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8
2
+
3
+ MAINTAINER "chatGLM"
4
+
5
+ COPY agent /chatGLM/agent
6
+
7
+ COPY chains /chatGLM/chains
8
+
9
+ COPY configs /chatGLM/configs
10
+
11
+ COPY content /chatGLM/content
12
+
13
+ COPY models /chatGLM/models
14
+
15
+ COPY nltk_data /chatGLM/content
16
+
17
+ COPY requirements.txt /chatGLM/
18
+
19
+ COPY cli_demo.py /chatGLM/
20
+
21
+ COPY textsplitter /chatGLM/
22
+
23
+ COPY webui.py /chatGLM/
24
+
25
+ WORKDIR /chatGLM
26
+
27
+ RUN pip install --user torch torchvision tensorboard cython -i https://pypi.tuna.tsinghua.edu.cn/simple
28
+ # RUN pip install --user 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
29
+
30
+ # RUN pip install --user 'git+https://github.com/facebookresearch/fvcore'
31
+ # install detectron2
32
+ # RUN git clone https://github.com/facebookresearch/detectron2
33
+
34
+ RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple/ --trusted-host pypi.tuna.tsinghua.edu.cn
35
+
36
+ CMD ["python","-u", "webui.py"]
Dockerfile-cuda ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
2
+ LABEL MAINTAINER="chatGLM"
3
+
4
+ COPY . /chatGLM/
5
+
6
+ WORKDIR /chatGLM
7
+
8
+ RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime && echo "Asia/Shanghai" > /etc/timezone
9
+ RUN apt-get update -y && apt-get install python3 python3-pip curl libgl1 libglib2.0-0 -y && apt-get clean
10
+ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py
11
+
12
+ RUN pip3 install -r requirements.txt -i https://pypi.mirrors.ustc.edu.cn/simple/ && rm -rf `pip3 cache dir`
13
+
14
+ CMD ["python3","-u", "webui.py"]
LICENSE ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for reasonable and customary use in describing the
141
+ origin of the Work and reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may choose to offer,
167
+ and charge a fee for, acceptance of support, warranty, indemnity,
168
+ or other liability obligations and/or rights consistent with this
169
+ License. However, in accepting such obligations, You may act only
170
+ on Your own behalf and on Your sole responsibility, not on behalf
171
+ of any other Contributor, and only if You agree to indemnify,
172
+ defend, and hold each Contributor harmless for any liability
173
+ incurred by, or claims asserted against, such Contributor by reason
174
+ of your accepting any such warranty or additional liability.
175
+
176
+ END OF TERMS AND CONDITIONS
177
+
178
+ APPENDIX: How to apply the Apache License to your work.
179
+
180
+ To apply the Apache License to your work, attach the following
181
+ boilerplate notice, with the fields enclosed by brackets "[]"
182
+ replaced with your own identifying information. (Don't include
183
+ the brackets!) The text should be enclosed in the appropriate
184
+ comment syntax for the file format. We also recommend that a
185
+ file or class name and description of purpose be included on the
186
+ same "printed page" as the copyright notice for easier
187
+ identification within third-party archives.
188
+
189
+ Copyright [yyyy] [name of copyright owner]
190
+
191
+ Licensed under the Apache License, Version 2.0 (the "License");
192
+ you may not use this file except in compliance with the License.
193
+ You may obtain a copy of the License at
194
+
195
+ http://www.apache.org/licenses/LICENSE-2.0
196
+
197
+ Unless required by applicable law or agreed to in writing, software
198
+ distributed under the License is distributed on an "AS IS" BASIS,
199
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200
+ See the License for the specific language governing permissions and
201
+ limitations under the License.
README-lang.md ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 基于本地知识库的 ChatGLM 等大语言模型应用实现
2
+
3
+ ## 介绍
4
+
5
+ 🌍 [_READ THIS IN ENGLISH_](README_en.md)
6
+
7
+ 🤖️ 一种利用 [langchain](https://github.com/hwchase17/langchain) 思想实现的基于本地知识库的问答应用,目标期望建立一套对中文场景与开源模型支持友好、可离线运行的知识库问答解决方案。
8
+
9
+ 💡 受 [GanymedeNil](https://github.com/GanymedeNil) 的项目 [document.ai](https://github.com/GanymedeNil/document.ai) 和 [AlexZhangji](https://github.com/AlexZhangji) 创建的 [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216) 启发,建立了全流程可使用开源模型实现的本地知识库问答应用。现已支持使用 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) 等大语言模型直接接入,或通过 [fastchat](https://github.com/lm-sys/FastChat) api 形式接入 Vicuna, Alpaca, LLaMA, Koala, RWKV 等模型。
10
+
11
+ ✅ 本项目中 Embedding 默认选用的是 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main),LLM 默认选用的是 [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B)。依托上述模型,本项目可实现全部使用**开源**模型**离线私有部署**。
12
+
13
+ ⛓️ 本项目实现原理如下图所示,过程包括加载文件 -> 读取文本 -> 文本分割 -> 文本向量化 -> 问句向量化 -> 在文本向量中匹配出与问句向量最相似的`top k`个 -> 匹配出的文本作为上下文和问题一起添加到`prompt`中 -> 提交给`LLM`生成回答。
14
+
15
+ 📺 [原理介绍视频](https://www.bilibili.com/video/BV13M4y1e7cN/?share_source=copy_web&vd_source=e6c5aafe684f30fbe41925d61ca6d514)
16
+
17
+ ![实现原理图](img/langchain+chatglm.png)
18
+
19
+ 从文档处理角度来看,实现流程如下:
20
+
21
+ ![实现原理图2](img/langchain+chatglm2.png)
22
+
23
+
24
+ 🚩 本项目未涉及微调、训练过程,但可利用微调或训练对本项目效果进行优化。
25
+
26
+ 🐳 Docker镜像:registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0 (感谢 @InkSong🌲 )
27
+
28
+ 💻 运行方式:docker run -d -p 80:7860 --gpus all registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0 
29
+
30
+ 🌐 [AutoDL 镜像](https://www.codewithgpu.com/i/imClumsyPanda/langchain-ChatGLM/langchain-ChatGLM)
31
+
32
+ 📓 [ModelWhale 在线运行项目](https://www.heywhale.com/mw/project/643977aa446c45f4592a1e59)
33
+
34
+ ## 变更日志
35
+
36
+ 参见 [版本更新日志](https://github.com/imClumsyPanda/langchain-ChatGLM/releases)。
37
+
38
+ ## 硬件需求
39
+
40
+ - ChatGLM-6B 模型硬件需求
41
+
42
+ 注:如未将模型下载至本地,请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间,模型文件下载至本地需要 15 GB 存储空间。
43
+ 注:一些其它的可选启动项见[项目启动选项](docs/StartOption.md)
44
+ 模型下载方法可参考 [常见问题](docs/FAQ.md) 中 Q8。
45
+
46
+ | **量化等级** | **最低 GPU 显存**(推理) | **最低 GPU 显存**(高效参数微调) |
47
+ | -------------- | ------------------------- | --------------------------------- |
48
+ | FP16(无量化) | 13 GB | 14 GB |
49
+ | INT8 | 8 GB | 9 GB |
50
+ | INT4 | 6 GB | 7 GB |
51
+
52
+ - MOSS 模型硬件需求
53
+
54
+ 注:如未将模型下载至本地,请执行前检查`$HOME/.cache/huggingface/`文件夹剩余空间,模型文件下载至本地需要 70 GB 存储空间
55
+
56
+ 模型下载方法可参考 [常见问题](docs/FAQ.md) 中 Q8。
57
+
58
+ | **量化等级** | **最低 GPU 显存**(推理) | **最低 GPU 显存**(高效参数微调) |
59
+ |-------------------|-----------------------| --------------------------------- |
60
+ | FP16(无量化) | 68 GB | - |
61
+ | INT8 | 20 GB | - |
62
+
63
+ - Embedding 模型硬件需求
64
+
65
+ 本项目中默认选用的 Embedding 模型 [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) 约占用显存 3GB,也可修改为在 CPU 中运行。
66
+
67
+ ## Docker 整合包
68
+ 🐳 Docker镜像地址:`registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0 `🌲
69
+
70
+ 💻 一行命令运行:
71
+ ```shell
72
+ docker run -d -p 80:7860 --gpus all registry.cn-beijing.aliyuncs.com/isafetech/chatmydata:1.0
73
+ ```
74
+
75
+ - 该版本镜像大小`25.2G`,使用[v0.1.16](https://github.com/imClumsyPanda/langchain-ChatGLM/releases/tag/v0.1.16),以`nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04`为基础镜像
76
+ - 该版本内置两个`embedding`模型:`m3e-base`,`text2vec-large-chinese`,内置`fastchat+chatglm-6b`
77
+ - 该版本目标为方便一键部署使用,请确保您已经在Linux发行版上安装了NVIDIA驱动程序
78
+ - 请注意,您不需要在主机系统上安装CUDA工具包,但需要安装`NVIDIA Driver`以及`NVIDIA Container Toolkit`,请参考[安装指南](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)
79
+ - 首次拉取和启动均需要一定时间,首次启动时请参照下图使用`docker logs -f <container id>`查看日志
80
+ - 如遇到启动过程卡在`Waiting..`步骤,建议使用`docker exec -it <container id> bash`进入`/logs/`目录查看对应阶段日志
81
+ ![](img/docker_logs.png)
82
+
83
+
84
+ ## Docker 部署
85
+ 为了能让容器使用主机GPU资源,需要在主机上安装 [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-container-toolkit)。具体安装步骤如下:
86
+ ```shell
87
+ sudo apt-get update
88
+ sudo apt-get install -y nvidia-container-toolkit-base
89
+ sudo systemctl daemon-reload
90
+ sudo systemctl restart docker
91
+ ```
92
+ 安装完成后,可以使用以下命令编译镜像和启动容器:
93
+ ```
94
+ docker build -f Dockerfile-cuda -t chatglm-cuda:latest .
95
+ docker run --gpus all -d --name chatglm -p 7860:7860 chatglm-cuda:latest
96
+
97
+ #若要使用离线模型,请配置好模型路径,然后此repo挂载到Container
98
+ docker run --gpus all -d --name chatglm -p 7860:7860 -v ~/github/langchain-ChatGLM:/chatGLM chatglm-cuda:latest
99
+ ```
100
+
101
+
102
+ ## 开发部署
103
+
104
+ ### 软件需求
105
+
106
+ 本项目已在 Python 3.8.1 - 3.10,CUDA 11.7 环境下完成测试。已在 Windows、ARM 架构的 macOS、Linux 系统中完成测试。
107
+
108
+ vue前端需要node18环境
109
+
110
+ ### 从本地加载模型
111
+
112
+ 请参考 [THUDM/ChatGLM-6B#从本地加载模型](https://github.com/THUDM/ChatGLM-6B#从本地加载模型)
113
+
114
+ ### 1. 安装环境
115
+
116
+ 参见 [安装指南](docs/INSTALL.md)。
117
+
118
+ ### 2. 设置模型默认参数
119
+
120
+ 在开始执行 Web UI 或命令行交互前,请先检查 [configs/model_config.py](configs/model_config.py) 中的各项模型参数设计是否符合需求。
121
+
122
+ 如需通过 fastchat 以 api 形式调用 llm,请参考 [fastchat 调用实现](docs/fastchat.md)
123
+
124
+ ### 3. 执行脚本体验 Web UI 或命令行交互
125
+
126
+ > 注:鉴于环境部署过程中可能遇到问题,建议首先测试命令行脚本。建议命令行脚本测试可正常运行后再运行 Web UI。
127
+
128
+ 执行 [cli_demo.py](cli_demo.py) 脚本体验**命令行交互**:
129
+ ```shell
130
+ $ python cli_demo.py
131
+ ```
132
+
133
+ 或执行 [webui.py](webui.py) 脚本体验 **Web 交互**
134
+
135
+ ```shell
136
+ $ python webui.py
137
+ ```
138
+
139
+ 或执行 [api.py](api.py) 利用 fastapi 部署 API
140
+ ```shell
141
+ $ python api.py
142
+ ```
143
+ 或成功部署 API 后,执行以下脚本体验基于 VUE 的前端页面
144
+ ```shell
145
+ $ cd views
146
+
147
+ $ pnpm i
148
+
149
+ $ npm run dev
150
+ ```
151
+
152
+ VUE 前端界面如下图所示:
153
+ 1. `对话` 界面
154
+ ![](img/vue_0521_0.png)
155
+ 2. `知识库问答` 界面
156
+ ![](img/vue_0521_1.png)
157
+ 3. `Bing搜索` 界面
158
+ ![](img/vue_0521_2.png)
159
+
160
+ WebUI 界面如下图所示:
161
+ 1. `对话` Tab 界面
162
+ ![](img/webui_0521_0.png)
163
+ 2. `知识库测试 Beta` Tab 界面
164
+ ![](img/webui_0510_1.png)
165
+ 3. `模型配置` Tab 界面
166
+ ![](img/webui_0510_2.png)
167
+
168
+ Web UI 可以实现如下功能:
169
+
170
+ 1. 运行前自动读取`configs/model_config.py`中`LLM`及`Embedding`模型枚举及默认模型设置运行模型,如需重新加载模型,可在 `模型配置` Tab 重新选择后点击 `重新加载模型` 进行模型加载;
171
+ 2. 可手动调节保留对话历史长度、匹配知识库文段数量,可根据显存大小自行调节;
172
+ 3. `对话` Tab 具备模式选择功能,可选择 `LLM对话` 与 `知识库问答` 模式进行对话,支持流式对话;
173
+ 4. 添加 `配置知识库` 功能,支持选择已有知识库或新建知识库,并可向知识库中**新增**上传文件/文件夹,使用文件上传组件选择好文件后点击 `上传文件并加载知识库`,会将所选上传文档数据加载至知识库中,并基于更新后知识库进行问答;
174
+ 5. 新增 `知识库测试 Beta` Tab,可用于测试不同文本切分方法与检索相关度阈值设置,暂不支持将测试参数作为 `对话` Tab 设置参数。
175
+ 6. 后续版本中将会增加对知识库的修改或删除,及知识库中已导入文件的查看。
176
+
177
+ ### 常见问题
178
+
179
+ 参见 [常见问题](docs/FAQ.md)。
180
+
181
+ ## Demo
182
+
183
+ 以问题`chatglm-6b 的局限性具体体现在哪里,如何实现改进`为例:
184
+
185
+ 未使用 langchain 接入本地文档时:
186
+
187
+ >ChatGLM-6B 是一个基于 GPT-3 的聊天机器人,它的局限性主要体现在以下几个方面:
188
+ >1. 语言模型的精度:GPT-3 是一种大型的语言模型,它的精度比较高,但是仍然有一些局限性。例如,它可能无法准确地理解某些方言、口音或者复杂的句子结构。
189
+ >2. 知识图谱的完整性:GPT-3 可以生成一些知识图谱,但是其完整性和准确性可能不够。例如,它可能无法准确地理解某些领域的专业知识和术语。
190
+ >3. 对话生成的连贯性:GPT-3 可以生成一些连贯的对话,但是其生成的对话可能不够自然。例如,它可能会生成一些虚假的、不连贯的对话或者误解用户的意图。
191
+ >
192
+ >为了改进 ChatGLM-6B,可以考虑以下几个方面:
193
+ >1. 提高语言模型的精度:可以通过增加训练数据、使用更好的语言模型架构、优化模型训练过程等方式来提高语言模型的精度。
194
+ >2. 增强知识图谱的完整性:可以通过增加知识图谱的维度、使用更好的知识图谱构建方法、优化知识图谱的匹配算法等方式来增强知识图谱的完整性。
195
+ >3. 提高对话生成的连贯性:可以通过增加对话生成模型的上下文信息、使用更好的上下文信息提取方法、优化对话生成模型的算法等方式来提高对话生成的连贯性。
196
+ >4. 引入更多的人类反馈:人类反馈可以帮助 ChatGLM-6B 更好地理解用户的意图和需求,从而提高对话生成的质量和准确性。可以通过引入更多的人类客服、使用人类反馈收集器等方式来引入更多的人类反馈。
197
+
198
+ 使用 langchain 接入 ChatGLM-6B 项目的 README.md 文件后 ChatGLM 的回答:
199
+ >ChatGLM-6B 是一个基于深度学习的自然语言处理模型,它在回答问题方面表现出色。但是,它也有一些局限性,具体如下:
200
+ >1. 语言理解有限:ChatGLM-6B 只学习了中文自然语言,对于其他语言可能会存在理解上的局限性。
201
+ >2. 知识储备不足:ChatGLM-6B 的训练数据集只包含了中文语料,因此它可能无法回答一些非中文的问题或者某些特定领域的问题。
202
+ >3. 数据量有限:ChatGLM-6B 的训练数据集只有几百万条记录,因此它可能无法回答一些非常具体或者复杂的问题。
203
+ >
204
+ >为了改进 ChatGLM-6B,可以考虑以下几个方面:
205
+ >1. 学习更多的语言知识:可以学习其他语言的自然语言处理技术,扩大语言理解的范围。
206
+ >2. 扩大知识储备:可以收集更多的中文语料,或者使用其他语言的数据集来扩充知识储备。
207
+ >3. 增加数据量:可以使用更大的数据集来训练 ChatGLM-6B,提高模型的表现。
208
+ >4. 引入更多的评估指标:可以引入更多的评估指标来评估模型的表现,从而发现 ChatGLM-6B 存在的不足和局限性。
209
+ >5. 改进模型架构:可以改进 ChatGLM-6B 的模型架构,提高模型的性能和表现。例如,可以使用更大的神经网络或者改进的卷积神经网络结构。
210
+
211
+ ## 路线图
212
+
213
+ - [ ] Langchain 应用
214
+ - [x] 接入非结构化文档(已支持 md、pdf、docx、txt 文件格式)
215
+ - [x] jpg 与 png 格式图片的 OCR 文字识别
216
+ - [x] 搜索引擎接入
217
+ - [ ] 本地网页接入
218
+ - [ ] 结构化数据接入(如 csv、Excel、SQL 等)
219
+ - [ ] 知识图谱/图数据库接入
220
+ - [ ] Agent 实现
221
+ - [x] 增加更多 LLM 模型支持
222
+ - [x] [THUDM/chatglm2-6b](https://huggingface.co/THUDM/chatglm2-6b)
223
+ - [x] [THUDM/chatglm-6b](https://huggingface.co/THUDM/chatglm-6b)
224
+ - [x] [THUDM/chatglm-6b-int8](https://huggingface.co/THUDM/chatglm-6b-int8)
225
+ - [x] [THUDM/chatglm-6b-int4](https://huggingface.co/THUDM/chatglm-6b-int4)
226
+ - [x] [THUDM/chatglm-6b-int4-qe](https://huggingface.co/THUDM/chatglm-6b-int4-qe)
227
+ - [x] [ClueAI/ChatYuan-large-v2](https://huggingface.co/ClueAI/ChatYuan-large-v2)
228
+ - [x] [fnlp/moss-moon-003-sft](https://huggingface.co/fnlp/moss-moon-003-sft)
229
+ - [x] [bigscience/bloomz-7b1](https://huggingface.co/bigscience/bloomz-7b1)
230
+ - [x] [bigscience/bloom-3b](https://huggingface.co/bigscience/bloom-3b)
231
+ - [x] [baichuan-inc/baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B)
232
+ - [x] [lmsys/vicuna-13b-delta-v1.1](https://huggingface.co/lmsys/vicuna-13b-delta-v1.1)
233
+ - [x] 支持通过调用 [fastchat](https://github.com/lm-sys/FastChat) api 调用 llm
234
+ - [x] 增加更多 Embedding 模型支持
235
+ - [x] [nghuyong/ernie-3.0-nano-zh](https://huggingface.co/nghuyong/ernie-3.0-nano-zh)
236
+ - [x] [nghuyong/ernie-3.0-base-zh](https://huggingface.co/nghuyong/ernie-3.0-base-zh)
237
+ - [x] [shibing624/text2vec-base-chinese](https://huggingface.co/shibing624/text2vec-base-chinese)
238
+ - [x] [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese)
239
+ - [x] [moka-ai/m3e-small](https://huggingface.co/moka-ai/m3e-small)
240
+ - [x] [moka-ai/m3e-base](https://huggingface.co/moka-ai/m3e-base)
241
+ - [ ] Web UI
242
+ - [x] 基于 gradio 实现 Web UI DEMO
243
+ - [x] 基于 streamlit 实现 Web UI DEMO
244
+ - [x] 添加输出内容及错误提示
245
+ - [x] 引用标注
246
+ - [ ] 增加知识库管理
247
+ - [x] 选择知识库开始问答
248
+ - [x] 上传文件/文件夹至知识库
249
+ - [x] 知识库测试
250
+ - [x] 删除知识库中文件
251
+ - [x] 支持搜索引擎问答
252
+ - [ ] 增加 API 支持
253
+ - [x] 利用 fastapi 实现 API 部署方式
254
+ - [ ] 实现调用 API 的 Web UI Demo
255
+ - [x] VUE 前端
256
+
257
+ ## 项目交流群
258
+ <img src="img/qr_code_45.jpg" alt="二维码" width="300" height="300" />
259
+
260
+
261
+ 🎉 langchain-ChatGLM 项目微信交流群,如果你也对本项目感兴趣,欢迎加入群聊参与讨论交流。
README_lang-en.md ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ChatGLM Application with Local Knowledge Implementation
2
+
3
+ ## Introduction
4
+
5
+ [![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?style=for-the-badge&logo=telegram&logoColor=white "langchain-chatglm")](https://t.me/+RjliQ3jnJ1YyN2E9)
6
+
7
+ 🌍 [_中文文档_](README.md)
8
+
9
+ 🤖️ This is a ChatGLM application based on local knowledge, implemented using [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B) and [langchain](https://github.com/hwchase17/langchain).
10
+
11
+ 💡 Inspired by [document.ai](https://github.com/GanymedeNil/document.ai) and [Alex Zhangji](https://github.com/AlexZhangji)'s [ChatGLM-6B Pull Request](https://github.com/THUDM/ChatGLM-6B/pull/216), this project establishes a local knowledge question-answering application using open-source models.
12
+
13
+ ✅ The embeddings used in this project are [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main), and the LLM is [ChatGLM-6B](https://github.com/THUDM/ChatGLM-6B). Relying on these models, this project enables the use of **open-source** models for **offline private deployment**.
14
+
15
+ ⛓️ The implementation principle of this project is illustrated in the figure below. The process includes loading files -> reading text -> text segmentation -> text vectorization -> question vectorization -> matching the top k most similar text vectors to the question vector -> adding the matched text to `prompt` along with the question as context -> submitting to `LLM` to generate an answer.
16
+
17
+ ![Implementation schematic diagram](img/langchain+chatglm.png)
18
+
19
+ 🚩 This project does not involve fine-tuning or training; however, fine-tuning or training can be employed to optimize the effectiveness of this project.
20
+
21
+ 📓 [ModelWhale online notebook](https://www.heywhale.com/mw/project/643977aa446c45f4592a1e59)
22
+
23
+ ## Changelog
24
+
25
+ **[2023/04/15]**
26
+
27
+ 1. refactor the project structure to keep the command line demo [cli_demo.py](cli_demo.py) and the Web UI demo [webui.py](webui.py) in the root directory.
28
+ 2. Improve the Web UI by modifying it to first load the model according to the default option of [configs/model_config.py](configs/model_config.py) after running the Web UI, and adding error messages, etc.
29
+ 3. Update FAQ.
30
+
31
+ **[2023/04/12]**
32
+
33
+ 1. Replaced the sample files in the Web UI to avoid issues with unreadable files due to encoding problems in Ubuntu;
34
+ 2. Replaced the prompt template in `knowledge_based_chatglm.py` to prevent confusion in the content returned by ChatGLM, which may arise from the prompt template containing Chinese and English bilingual text.
35
+
36
+ **[2023/04/11]**
37
+
38
+ 1. Added Web UI V0.1 version (thanks to [@liangtongt](https://github.com/liangtongt));
39
+ 2. Added Frequently Asked Questions in `README.md` (thanks to [@calcitem](https://github.com/calcitem) and [@bolongliu](https://github.com/bolongliu));
40
+ 3. Enhanced automatic detection for the availability of `cuda`, `mps`, and `cpu` for LLM and Embedding model running devices;
41
+ 4. Added a check for `filepath` in `knowledge_based_chatglm.py`. In addition to supporting single file import, it now supports a single folder path as input. After input, it will traverse each file in the folder and display a command-line message indicating the success of each file load.
42
+
43
+ 5. **[2023/04/09]**
44
+
45
+ 1. Replaced the previously selected `ChatVectorDBChain` with `RetrievalQA` in `langchain`, effectively reducing the issue of stopping due to insufficient video memory after asking 2-3 times;
46
+ 2. Added `EMBEDDING_MODEL`, `VECTOR_SEARCH_TOP_K`, `LLM_MODEL`, `LLM_HISTORY_LEN`, `REPLY_WITH_SOURCE` parameter value settings in `knowledge_based_chatglm.py`;
47
+ 3. Added `chatglm-6b-int4` and `chatglm-6b-int4-qe`, which require less GPU memory, as LLM model options;
48
+ 4. Corrected code errors in `README.md` (thanks to [@calcitem](https://github.com/calcitem)).
49
+
50
+ **[2023/04/07]**
51
+
52
+ 1. Resolved the issue of doubled video memory usage when loading the ChatGLM model (thanks to [@suc16](https://github.com/suc16) and [@myml](https://github.com/myml));
53
+ 2. Added a mechanism to clear video memory;
54
+ 3. Added `nghuyong/ernie-3.0-nano-zh` and `nghuyong/ernie-3.0-base-zh` as Embedding model options, which consume less video memory resources than `GanymedeNil/text2vec-large-chinese` (thanks to [@lastrei](https://github.com/lastrei)).
55
+
56
+ ## How to Use
57
+
58
+ ### Hardware Requirements
59
+
60
+ - ChatGLM-6B Model Hardware Requirements
61
+
62
+ | **Quantization Level** | **Minimum GPU Memory** (inference) | **Minimum GPU Memory** (efficient parameter fine-tuning) |
63
+ | -------------- | ------------------------- | --------------------------------- |
64
+ | FP16 (no quantization) | 13 GB | 14 GB |
65
+ | INT8 | 8 GB | 9 GB |
66
+ | INT4 | 6 GB | 7 GB |
67
+
68
+ - Embedding Model Hardware Requirements
69
+
70
+ The default Embedding model [GanymedeNil/text2vec-large-chinese](https://huggingface.co/GanymedeNil/text2vec-large-chinese/tree/main) in this project occupies around 3GB of video memory and can also be configured to run on a CPU.
71
+ ### Software Requirements
72
+
73
+ This repository has been tested with Python 3.8 and CUDA 11.7 environments.
74
+
75
+ ### 1. Setting up the environment
76
+
77
+ * Environment check
78
+
79
+ ```shell
80
+ # First, make sure your machine has Python 3.8 or higher installed
81
+ $ python --version
82
+ Python 3.8.13
83
+
84
+ # If your version is lower, you can use conda to install the environment
85
+ $ conda create -p /your_path/env_name python=3.8
86
+
87
+ # Activate the environment
88
+ $ source activate /your_path/env_name
89
+
90
+ # Deactivate the environment
91
+ $ source deactivate /your_path/env_name
92
+
93
+ # Remove the environment
94
+ $ conda env remove -p /your_path/env_name
95
+ ```
96
+
97
+ * Project dependencies
98
+
99
+ ```shell
100
+
101
+ # Clone the repository
102
+ $ git clone https://github.com/imClumsyPanda/langchain-ChatGLM.git
103
+
104
+ # Install dependencies
105
+ $ pip install -r requirements.txt
106
+ ```
107
+
108
+ Note: When using langchain.document_loaders.UnstructuredFileLoader for unstructured file integration, you may need to install other dependency packages according to the documentation. Please refer to [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html).
109
+
110
+ ### 2. Run Scripts to Experience Web UI or Command Line Interaction
111
+
112
+ Execute [webui.py](webui.py) script to experience **Web interaction** <img src="https://img.shields.io/badge/Version-0.1-brightgreen">
113
+ ```commandline
114
+ python webui.py
115
+
116
+ ```
117
+ Or execute [api.py](api.py) script to deploy web api.
118
+ ```shell
119
+ $ python api.py
120
+ ```
121
+ Note: Before executing, check the remaining space in the `$HOME/.cache/huggingface/` folder, at least 15G.
122
+
123
+ Or execute following command to run VUE after api.py executed
124
+ ```shell
125
+ $ cd views
126
+
127
+ $ pnpm i
128
+
129
+ $ npm run dev
130
+ ```
131
+
132
+ VUE interface screenshots:
133
+
134
+ ![](img/vue_0521_0.png)
135
+
136
+ ![](img/vue_0521_1.png)
137
+
138
+ ![](img/vue_0521_2.png)
139
+
140
+ Web UI interface screenshots:
141
+
142
+ ![img.png](img/webui_0521_0.png)
143
+
144
+ ![](img/webui_0510_1.png)
145
+
146
+ ![](img/webui_0510_2.png)
147
+
148
+ The Web UI supports the following features:
149
+
150
+ 1. Automatically reads the `LLM` and `embedding` model enumerations in `configs/model_config.py`, allowing you to select and reload the model by clicking `重新加载模型`.
151
+ 2. The length of retained dialogue history can be manually adjusted according to the available video memory.
152
+ 3. Adds a file upload function. Select the uploaded file through the drop-down box, click `加载文件` to load the file, and change the loaded file at any time during the process.
153
+
154
+ Alternatively, execute the [knowledge_based_chatglm.py](https://chat.openai.com/chat/cli_demo.py) script to experience **command line interaction**:
155
+
156
+ ```commandline
157
+ python knowledge_based_chatglm.py
158
+ ```
159
+
160
+ ### FAQ
161
+
162
+ Q1: What file formats does this project support?
163
+
164
+ A1: Currently, this project has been tested with txt, docx, and md file formats. For more file formats, please refer to the [langchain documentation](https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/unstructured_file.html). It is known that if the document contains special characters, there might be issues with loading the file.
165
+
166
+ Q2: How can I resolve the `detectron2` dependency issue when reading specific file formats?
167
+
168
+ A2: As the installation process for this package can be problematic and it is only required for some file formats, it is not included in `requirements.txt`. You can install it with the following command:
169
+
170
+ ```commandline
171
+ pip install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"
172
+ ```
173
+
174
+ Q3: How can I solve the `Resource punkt not found.` error?
175
+
176
+ A3: Unzip the `packages/tokenizers` folder from https://github.com/nltk/nltk_data/raw/gh-pages/packages/tokenizers/punkt.zip, and place it in the `nltk_data/tokenizers` storage path.
177
+
178
+ The `nltk_data` storage path can be found using `nltk.data.path`.
179
+
180
+ Q4: How can I solve the `Resource averaged_perceptron_tagger not found.` error?
181
+
182
+ A4: Download https://github.com/nltk/nltk_data/blob/gh-pages/packages/taggers/averaged_perceptron_tagger.zip, extract it, and place it in the `nltk_data/taggers` storage path.
183
+
184
+ The `nltk_data` storage path can be found using `nltk.data.path`.
185
+
186
+ Q5: Can this project run in Google Colab?
187
+
188
+ A5: You can try running the chatglm-6b-int4 model in Google Colab. Please note that if you want to run the Web UI in Colab, you need to set the `share` parameter in `demo.queue(concurrency_count=3).launch(server_name='0.0.0.0', share=False, inbrowser=False)` to `True`.
189
+
190
+ This issue is related to the system environment. For more details, please refer to [Issues with installing packages using pip in Anaconda](docs/Issue-with-Installing-Packages-Using-pip-in-Anaconda.md).
191
+
192
+ ## DEMO
193
+
194
+ Using the question "What are the limitations of ChatGLM-6B and how can they be improved?" as an example:
195
+
196
+ Without using langchain to access local documents:
197
+
198
+ > ChatGLM-6B is a chatbot based on GPT-3, and its limitations are mainly reflected in the following aspects:
199
+ >
200
+ > 1. Language model accuracy: Although GPT-3 is a large language model with relatively high accuracy, it still has some limitations. For instance, it may not be able to accurately understand certain dialects, accents, or complex sentence structures.
201
+ > 2. Completeness of the knowledge graph: GPT-3 can generate some knowledge graphs, but its completeness and accuracy may not be sufficient. For example, it may struggle to accurately understand specific domain expertise and terminology.
202
+ > 3. Coherence of dialogue generation: While GPT-3 can generate somewhat coherent dialogues, they may not always be natural. For instance, it may generate false, incoherent dialogue or misinterpret the user's intent.
203
+ >
204
+ > To improve ChatGLM-6B, consider the following aspects:
205
+ >
206
+ > 1. Enhance the accuracy of the language model: The accuracy of the language model can be improved by increasing the training data, using better language model architectures, and optimizing the model training process.
207
+ > 2. Strengthen the integrity of the knowledge graph: The integrity of the knowledge graph can be enhanced by increasing its dimensions, employing better knowledge graph construction methods, and optimizing the knowledge graph's matching algorithm.
208
+ > 3. Boost the coherence of dialogue generation: The coherence of dialogue generation can be improved by augmenting the context information of the dialogue generation model, utilizing better context information extraction methods, and optimizing the dialogue generation model's algorithm.
209
+ > 4. Incorporate more human feedback: Human feedback can help ChatGLM-6B better understand users' intentions and needs, thereby improving the quality and accuracy of dialogue generation. More human feedback can be introduced by involving more human agents and using human feedback collectors.
210
+
211
+ ChatGLM's answer after using LangChain to access the README.md file of the ChatGLM-6B project:
212
+ >ChatGLM-6B is a deep learning-based natural language processing model that excels at answering questions. However, it also has some limitations, as follows:
213
+ >1. Limited language understanding: ChatGLM-6B has been primarily trained on Chinese natural language, and its understanding of other languages may be limited.
214
+ >2. Insufficient knowledge base: The training dataset of ChatGLM-6B contains only a Chinese corpus, so it may not be able to answer non-Chinese questions or queries in specific domains.
215
+ >3. Limited data volume: ChatGLM-6B's training dataset has only a few million records, which may hinder its ability to answer very specific or complex questions.
216
+ >
217
+ >To improve ChatGLM-6B, consider the following aspects:
218
+ >1. Expand language knowledge: Learn natural language processing techniques in other languages to broaden the model's language understanding capabilities.
219
+ >2. Broaden the knowledge base: Collect more Chinese corpora or use datasets in other languages to expand the model's knowledge base.
220
+ >3. Increase data volume: Use larger datasets to train ChatGLM-6B, which can improve the model's performance.
221
+ >4. Introduce more evaluation metrics: Incorporate additional evaluation metrics to assess the model's performance, which can help identify the shortcomings and limitations of ChatGLM-6B.
222
+ >5. Enhance the model architecture: Improve ChatGLM-6B's model architecture to boost its performance and capabilities. For example, employ larger neural networks or refined convolutional neural network structures.
223
+
224
+ ## Roadmap
225
+
226
+ - [x] Implement LangChain + ChatGLM-6B for local knowledge application
227
+ - [x] Unstructured file access based on langchain
228
+ - [x].md
229
+ - [x].pdf
230
+ - [x].docx
231
+ - [x].txt
232
+ - [ ] Add support for more LLM models
233
+ - [x] THUDM/chatglm-6b
234
+ - [x] THUDM/chatglm-6b-int4
235
+ - [x] THUDM/chatglm-6b-int4-qe
236
+ - [ ] Add Web UI DEMO
237
+ - [x] Implement Web UI DEMO using Gradio
238
+ - [x] Add output and error messages
239
+ - [x] Citation callout
240
+ - [ ] Knowledge base management
241
+ - [x] QA based on selected knowledge base
242
+ - [x] Add files/folder to knowledge base
243
+ - [ ] Add files/folder to knowledge base
244
+ - [ ] Implement Web UI DEMO using Streamlit
245
+ - [ ] Add support for API deployment
246
+ - [x] Use fastapi to implement API
247
+ - [ ] Implement Web UI DEMO for API calls
Xubailing Log.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ 2023-7-29 �̻�����������а汾��
3
+
4
+ ǰ��������
5
+ ���õ�LLMģ�ͣ�
6
+ C:\Users\Administrator\ChatGLM2-6B
7
+ ���õ�embeddingsģ�ͣ�
8
+ C:/Users/Administrator/text2vec-large-chinese"
9
+
10
+ �޸��ļ����£�
11
+ C:\Users\Administrator\langchain-ChatGLM\configs\model_config
12
+ \chains\local_doc_qa line137
13
+
14
+ self.llm_model_chain = llm_model
15
+ self.embeddings = HuggingFaceEmbeddings(model_name="C:/Users/Administrator/text2vec-large-chinese",
16
+ model_kwargs={'device': embedding_device})
api.py ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #encoding:utf-8
2
+ import argparse
3
+ import json
4
+ import os
5
+ import shutil
6
+ from typing import List, Optional
7
+ import urllib
8
+ import asyncio
9
+ import nltk
10
+ import pydantic
11
+ import uvicorn
12
+ from fastapi import Body, FastAPI, File, Form, Query, UploadFile, WebSocket
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from pydantic import BaseModel
15
+ from typing_extensions import Annotated
16
+ from starlette.responses import RedirectResponse
17
+
18
+ from chains.local_doc_qa import LocalDocQA
19
+ from configs.model_config import (KB_ROOT_PATH, EMBEDDING_DEVICE,
20
+ EMBEDDING_MODEL, NLTK_DATA_PATH,
21
+ VECTOR_SEARCH_TOP_K, LLM_HISTORY_LEN, OPEN_CROSS_DOMAIN)
22
+ import models.shared as shared
23
+ from models.loader.args import parser
24
+ from models.loader import LoaderCheckPoint
25
+
26
+ nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
27
+
28
+
29
+ class BaseResponse(BaseModel):
30
+ code: int = pydantic.Field(200, description="HTTP status code")
31
+ msg: str = pydantic.Field("success", description="HTTP status message")
32
+
33
+ class Config:
34
+ schema_extra = {
35
+ "example": {
36
+ "code": 200,
37
+ "msg": "success",
38
+ }
39
+ }
40
+
41
+
42
+ class ListDocsResponse(BaseResponse):
43
+ data: List[str] = pydantic.Field(..., description="List of document names")
44
+
45
+ class Config:
46
+ schema_extra = {
47
+ "example": {
48
+ "code": 200,
49
+ "msg": "success",
50
+ "data": ["doc1.docx", "doc2.pdf", "doc3.txt"],
51
+ }
52
+ }
53
+
54
+
55
+ class ChatMessage(BaseModel):
56
+ question: str = pydantic.Field(..., description="Question text")
57
+ response: str = pydantic.Field(..., description="Response text")
58
+ history: List[List[str]] = pydantic.Field(..., description="History text")
59
+ source_documents: List[str] = pydantic.Field(
60
+ ..., description="List of source documents and their scores"
61
+ )
62
+
63
+ class Config:
64
+ schema_extra = {
65
+ "example": {
66
+ "question": "工伤保险如何办理?",
67
+ "response": "根据已知信息,可以总结如下:\n\n1. 参保单位为员工缴纳工伤保险费,以保障员工在发生工伤时能够获得相应的待遇。\n2. 不同地区的工伤保险缴费规定可能有所不同,需要向当地社保部门咨询以了解具体的缴费标准和规定。\n3. 工伤从业人员及其近亲属需要申请工伤认定,确认享受的待遇资格,并按时缴纳工伤保险费。\n4. 工伤保险待遇包括工伤医疗、康复、辅助器具配置费用、伤残待遇、工亡待遇、一次性工亡补助金等。\n5. 工伤保险待遇领取资格认证包括长期待遇领取人员认证和一次性待遇领取人员认证。\n6. 工伤保险基金支付的待遇项目包括工伤医疗待遇、康复待遇、辅助器具配置费用、一次性工亡补助金、丧葬补助金等。",
68
+ "history": [
69
+ [
70
+ "工伤保险是什么?",
71
+ "工伤保险是指用人单位按照国家规定,为本单位的职工和用人单位的其他人员,缴纳工伤保险费,由保险机构按照国家规定的标准,给予工伤保险待遇的社会保险制度。",
72
+ ]
73
+ ],
74
+ "source_documents": [
75
+ "出处 [1] 广州市单位从业的特定人员参加工伤保险办事指引.docx:\n\n\t( 一) 从业单位 (组织) 按“自愿参保”原则, 为未建 立劳动关系的特定从业人员单项参加工伤保险 、缴纳工伤保 险费。",
76
+ "出处 [2] ...",
77
+ "出处 [3] ...",
78
+ ],
79
+ }
80
+ }
81
+
82
+
83
+ def get_kb_path(local_doc_id: str):
84
+ return os.path.join(KB_ROOT_PATH, local_doc_id)
85
+
86
+
87
+ def get_doc_path(local_doc_id: str):
88
+ return os.path.join(get_kb_path(local_doc_id), "content")
89
+
90
+
91
+ def get_vs_path(local_doc_id: str):
92
+ return os.path.join(get_kb_path(local_doc_id), "vector_store")
93
+
94
+
95
+ def get_file_path(local_doc_id: str, doc_name: str):
96
+ return os.path.join(get_doc_path(local_doc_id), doc_name)
97
+
98
+
99
+ def validate_kb_name(knowledge_base_id: str) -> bool:
100
+ # 检查是否包含预期外的字符或路径攻击关键字
101
+ if "../" in knowledge_base_id:
102
+ return False
103
+ return True
104
+
105
+
106
+ async def upload_file(
107
+ file: UploadFile = File(description="A single binary file"),
108
+ knowledge_base_id: str = Form(..., description="Knowledge Base Name", example="kb1"),
109
+ ):
110
+ if not validate_kb_name(knowledge_base_id):
111
+ return BaseResponse(code=403, msg="Don't attack me", data=[])
112
+
113
+ saved_path = get_doc_path(knowledge_base_id)
114
+ if not os.path.exists(saved_path):
115
+ os.makedirs(saved_path)
116
+
117
+ file_content = await file.read() # 读取上传文件的内容
118
+
119
+ file_path = os.path.join(saved_path, file.filename)
120
+ if os.path.exists(file_path) and os.path.getsize(file_path) == len(file_content):
121
+ file_status = f"文件 {file.filename} 已存在。"
122
+ return BaseResponse(code=200, msg=file_status)
123
+
124
+ with open(file_path, "wb") as f:
125
+ f.write(file_content)
126
+
127
+ vs_path = get_vs_path(knowledge_base_id)
128
+ vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store([file_path], vs_path)
129
+ if len(loaded_files) > 0:
130
+ file_status = f"文件 {file.filename} 已上传至新的知识库,并已加载知识库,请开始提问。"
131
+ return BaseResponse(code=200, msg=file_status)
132
+ else:
133
+ file_status = "文件上传失败,请重新上传"
134
+ return BaseResponse(code=500, msg=file_status)
135
+
136
+
137
+ async def upload_files(
138
+ files: Annotated[
139
+ List[UploadFile], File(description="Multiple files as UploadFile")
140
+ ],
141
+ knowledge_base_id: str = Form(..., description="Knowledge Base Name", example="kb1"),
142
+ ):
143
+ if not validate_kb_name(knowledge_base_id):
144
+ return BaseResponse(code=403, msg="Don't attack me", data=[])
145
+
146
+ saved_path = get_doc_path(knowledge_base_id)
147
+ if not os.path.exists(saved_path):
148
+ os.makedirs(saved_path)
149
+ filelist = []
150
+ for file in files:
151
+ file_content = ''
152
+ file_path = os.path.join(saved_path, file.filename)
153
+ file_content = await file.read()
154
+ if os.path.exists(file_path) and os.path.getsize(file_path) == len(file_content):
155
+ continue
156
+ with open(file_path, "wb") as f:
157
+ f.write(file_content)
158
+ filelist.append(file_path)
159
+ if filelist:
160
+ vs_path = get_vs_path(knowledge_base_id)
161
+ vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, vs_path)
162
+ if len(loaded_files):
163
+ file_status = f"documents {', '.join([os.path.split(i)[-1] for i in loaded_files])} upload success"
164
+ return BaseResponse(code=200, msg=file_status)
165
+ file_status = f"documents {', '.join([os.path.split(i)[-1] for i in loaded_files])} upload fail"
166
+ return BaseResponse(code=500, msg=file_status)
167
+
168
+
169
+ async def list_kbs():
170
+ # Get List of Knowledge Base
171
+ if not os.path.exists(KB_ROOT_PATH):
172
+ all_doc_ids = []
173
+ else:
174
+ all_doc_ids = [
175
+ folder
176
+ for folder in os.listdir(KB_ROOT_PATH)
177
+ if os.path.isdir(os.path.join(KB_ROOT_PATH, folder))
178
+ and os.path.exists(os.path.join(KB_ROOT_PATH, folder, "vector_store", "index.faiss"))
179
+ ]
180
+
181
+ return ListDocsResponse(data=all_doc_ids)
182
+
183
+
184
+ async def list_docs(
185
+ knowledge_base_id: str = Query(..., description="Knowledge Base Name", example="kb1")
186
+ ):
187
+ if not validate_kb_name(knowledge_base_id):
188
+ return ListDocsResponse(code=403, msg="Don't attack me", data=[])
189
+
190
+ knowledge_base_id = urllib.parse.unquote(knowledge_base_id)
191
+ kb_path = get_kb_path(knowledge_base_id)
192
+ local_doc_folder = get_doc_path(knowledge_base_id)
193
+ if not os.path.exists(kb_path):
194
+ return ListDocsResponse(code=404, msg=f"Knowledge base {knowledge_base_id} not found", data=[])
195
+ if not os.path.exists(local_doc_folder):
196
+ all_doc_names = []
197
+ else:
198
+ all_doc_names = [
199
+ doc
200
+ for doc in os.listdir(local_doc_folder)
201
+ if os.path.isfile(os.path.join(local_doc_folder, doc))
202
+ ]
203
+ return ListDocsResponse(data=all_doc_names)
204
+
205
+
206
+ async def delete_kb(
207
+ knowledge_base_id: str = Query(...,
208
+ description="Knowledge Base Name",
209
+ example="kb1"),
210
+ ):
211
+ if not validate_kb_name(knowledge_base_id):
212
+ return BaseResponse(code=403, msg="Don't attack me")
213
+
214
+ # TODO: 确认是否支持批量删除知识库
215
+ knowledge_base_id = urllib.parse.unquote(knowledge_base_id)
216
+ kb_path = get_kb_path(knowledge_base_id)
217
+ if not os.path.exists(kb_path):
218
+ return BaseResponse(code=404, msg=f"Knowledge base {knowledge_base_id} not found")
219
+ shutil.rmtree(kb_path)
220
+ return BaseResponse(code=200, msg=f"Knowledge Base {knowledge_base_id} delete success")
221
+
222
+
223
+ async def delete_doc(
224
+ knowledge_base_id: str = Query(...,
225
+ description="Knowledge Base Name",
226
+ example="kb1"),
227
+ doc_name: str = Query(
228
+ ..., description="doc name", example="doc_name_1.pdf"
229
+ ),
230
+ ):
231
+ if not validate_kb_name(knowledge_base_id):
232
+ return BaseResponse(code=403, msg="Don't attack me")
233
+
234
+ knowledge_base_id = urllib.parse.unquote(knowledge_base_id)
235
+ if not os.path.exists(get_kb_path(knowledge_base_id)):
236
+ return BaseResponse(code=404, msg=f"Knowledge base {knowledge_base_id} not found")
237
+ doc_path = get_file_path(knowledge_base_id, doc_name)
238
+ if os.path.exists(doc_path):
239
+ os.remove(doc_path)
240
+ remain_docs = await list_docs(knowledge_base_id)
241
+ if len(remain_docs.data) == 0:
242
+ shutil.rmtree(get_kb_path(knowledge_base_id), ignore_errors=True)
243
+ return BaseResponse(code=200, msg=f"document {doc_name} delete success")
244
+ else:
245
+ status = local_doc_qa.delete_file_from_vector_store(doc_path, get_vs_path(knowledge_base_id))
246
+ if "success" in status:
247
+ return BaseResponse(code=200, msg=f"document {doc_name} delete success")
248
+ else:
249
+ return BaseResponse(code=500, msg=f"document {doc_name} delete fail")
250
+ else:
251
+ return BaseResponse(code=404, msg=f"document {doc_name} not found")
252
+
253
+
254
+ async def update_doc(
255
+ knowledge_base_id: str = Query(...,
256
+ description="知识库名",
257
+ example="kb1"),
258
+ old_doc: str = Query(
259
+ ..., description="待删除文件名,已存储在知识库中", example="doc_name_1.pdf"
260
+ ),
261
+ new_doc: UploadFile = File(description="待上传文件"),
262
+ ):
263
+ if not validate_kb_name(knowledge_base_id):
264
+ return BaseResponse(code=403, msg="Don't attack me")
265
+
266
+ knowledge_base_id = urllib.parse.unquote(knowledge_base_id)
267
+ if not os.path.exists(get_kb_path(knowledge_base_id)):
268
+ return BaseResponse(code=404, msg=f"Knowledge base {knowledge_base_id} not found")
269
+ doc_path = get_file_path(knowledge_base_id, old_doc)
270
+ if not os.path.exists(doc_path):
271
+ return BaseResponse(code=404, msg=f"document {old_doc} not found")
272
+ else:
273
+ os.remove(doc_path)
274
+ delete_status = local_doc_qa.delete_file_from_vector_store(doc_path, get_vs_path(knowledge_base_id))
275
+ if "fail" in delete_status:
276
+ return BaseResponse(code=500, msg=f"document {old_doc} delete failed")
277
+ else:
278
+ saved_path = get_doc_path(knowledge_base_id)
279
+ if not os.path.exists(saved_path):
280
+ os.makedirs(saved_path)
281
+
282
+ file_content = await new_doc.read() # 读取上传文件的内容
283
+
284
+ file_path = os.path.join(saved_path, new_doc.filename)
285
+ if os.path.exists(file_path) and os.path.getsize(file_path) == len(file_content):
286
+ file_status = f"document {new_doc.filename} already exists"
287
+ return BaseResponse(code=200, msg=file_status)
288
+
289
+ with open(file_path, "wb") as f:
290
+ f.write(file_content)
291
+
292
+ vs_path = get_vs_path(knowledge_base_id)
293
+ vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store([file_path], vs_path)
294
+ if len(loaded_files) > 0:
295
+ file_status = f"document {old_doc} delete and document {new_doc.filename} upload success"
296
+ return BaseResponse(code=200, msg=file_status)
297
+ else:
298
+ file_status = f"document {old_doc} success but document {new_doc.filename} upload fail"
299
+ return BaseResponse(code=500, msg=file_status)
300
+
301
+
302
+
303
+ async def local_doc_chat(
304
+ knowledge_base_id: str = Body(..., description="Knowledge Base Name", example="kb1"),
305
+ question: str = Body(..., description="Question", example="工伤保险是什么?"),
306
+ history: List[List[str]] = Body(
307
+ [],
308
+ description="History of previous questions and answers",
309
+ example=[
310
+ [
311
+ "工伤保险是什么?",
312
+ "工伤保险是指用人单位按照国家规定,为本单位的职工和用人单位的其他人员,缴纳工伤保险费,由保险机构按照国家规定的标准,给予工伤保险待遇的社会保险制度。",
313
+ ]
314
+ ],
315
+ ),
316
+ ):
317
+ vs_path = get_vs_path(knowledge_base_id)
318
+ if not os.path.exists(vs_path):
319
+ # return BaseResponse(code=404, msg=f"Knowledge base {knowledge_base_id} not found")
320
+ return ChatMessage(
321
+ question=question,
322
+ response=f"Knowledge base {knowledge_base_id} not found",
323
+ history=history,
324
+ source_documents=[],
325
+ )
326
+ else:
327
+ for resp, history in local_doc_qa.get_knowledge_based_answer(
328
+ query=question, vs_path=vs_path, chat_history=history, streaming=True
329
+ ):
330
+ pass
331
+ source_documents = [
332
+ f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n"""
333
+ f"""相关度:{doc.metadata['score']}\n\n"""
334
+ for inum, doc in enumerate(resp["source_documents"])
335
+ ]
336
+
337
+ return ChatMessage(
338
+ question=question,
339
+ response=resp["result"],
340
+ history=history,
341
+ source_documents=source_documents,
342
+ )
343
+
344
+
345
+ async def bing_search_chat(
346
+ question: str = Body(..., description="Question", example="工伤保险是什么?"),
347
+ history: Optional[List[List[str]]] = Body(
348
+ [],
349
+ description="History of previous questions and answers",
350
+ example=[
351
+ [
352
+ "工伤保险是什么?",
353
+ "工伤保险是指用人单位按照国家规定,为本单位的职工和用人单位的其他人员,缴纳工伤保险费,由保险机构按照国家规定的标准,给予工伤保险待遇的社会保险制度。",
354
+ ]
355
+ ],
356
+ ),
357
+ ):
358
+ for resp, history in local_doc_qa.get_search_result_based_answer(
359
+ query=question, chat_history=history, streaming=True
360
+ ):
361
+ pass
362
+ source_documents = [
363
+ f"""出处 [{inum + 1}] [{doc.metadata["source"]}]({doc.metadata["source"]}) \n\n{doc.page_content}\n\n"""
364
+ for inum, doc in enumerate(resp["source_documents"])
365
+ ]
366
+
367
+ return ChatMessage(
368
+ question=question,
369
+ response=resp["result"],
370
+ history=history,
371
+ source_documents=source_documents,
372
+ )
373
+
374
+
375
+ async def chat(
376
+ question: str = Body(..., description="Question", example="工伤保险是什么?"),
377
+ history: List[List[str]] = Body(
378
+ [],
379
+ description="History of previous questions and answers",
380
+ example=[
381
+ [
382
+ "工伤保险是什么?",
383
+ "工伤保险是指用人单位按照国家规定,为本单位的职工和用人单位的其他人员,缴纳工伤保险费,由保险机构按照国家规定的标准,给予工伤保险待遇的社会保险制度。",
384
+ ]
385
+ ],
386
+ ),
387
+ ):
388
+ answer_result_stream_result = local_doc_qa.llm_model_chain(
389
+ {"prompt": question, "history": history, "streaming": True})
390
+
391
+ for answer_result in answer_result_stream_result['answer_result_stream']:
392
+ resp = answer_result.llm_output["answer"]
393
+ history = answer_result.history
394
+ pass
395
+
396
+ return ChatMessage(
397
+ question=question,
398
+ response=resp,
399
+ history=history,
400
+ source_documents=[],
401
+ )
402
+
403
+
404
+ async def stream_chat(websocket: WebSocket):
405
+ await websocket.accept()
406
+ turn = 1
407
+ while True:
408
+ input_json = await websocket.receive_json()
409
+ question, history, knowledge_base_id = input_json["question"], input_json["history"], input_json[
410
+ "knowledge_base_id"]
411
+ vs_path = get_vs_path(knowledge_base_id)
412
+
413
+ if not os.path.exists(vs_path):
414
+ await websocket.send_json({"error": f"Knowledge base {knowledge_base_id} not found"})
415
+ await websocket.close()
416
+ return
417
+
418
+ await websocket.send_json({"question": question, "turn": turn, "flag": "start"})
419
+
420
+ last_print_len = 0
421
+ for resp, history in local_doc_qa.get_knowledge_based_answer(
422
+ query=question, vs_path=vs_path, chat_history=history, streaming=True
423
+ ):
424
+ await asyncio.sleep(0)
425
+ await websocket.send_text(resp["result"][last_print_len:])
426
+ last_print_len = len(resp["result"])
427
+
428
+ source_documents = [
429
+ f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n"""
430
+ f"""相关度:{doc.metadata['score']}\n\n"""
431
+ for inum, doc in enumerate(resp["source_documents"])
432
+ ]
433
+
434
+ await websocket.send_text(
435
+ json.dumps(
436
+ {
437
+ "question": question,
438
+ "turn": turn,
439
+ "flag": "end",
440
+ "sources_documents": source_documents,
441
+ },
442
+ ensure_ascii=False,
443
+ )
444
+ )
445
+ turn += 1
446
+
447
+ async def stream_chat_bing(websocket: WebSocket):
448
+ """
449
+ 基于bing搜索的流式问答
450
+ """
451
+ await websocket.accept()
452
+ turn = 1
453
+ while True:
454
+ input_json = await websocket.receive_json()
455
+ question, history = input_json["question"], input_json["history"]
456
+
457
+ await websocket.send_json({"question": question, "turn": turn, "flag": "start"})
458
+
459
+ last_print_len = 0
460
+ for resp, history in local_doc_qa.get_search_result_based_answer(question, chat_history=history, streaming=True):
461
+ await websocket.send_text(resp["result"][last_print_len:])
462
+ last_print_len = len(resp["result"])
463
+
464
+ source_documents = [
465
+ f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n"""
466
+ f"""相关度:{doc.metadata['score']}\n\n"""
467
+ for inum, doc in enumerate(resp["source_documents"])
468
+ ]
469
+
470
+ await websocket.send_text(
471
+ json.dumps(
472
+ {
473
+ "question": question,
474
+ "turn": turn,
475
+ "flag": "end",
476
+ "sources_documents": source_documents,
477
+ },
478
+ ensure_ascii=False,
479
+ )
480
+ )
481
+ turn += 1
482
+
483
+ async def document():
484
+ return RedirectResponse(url="/docs")
485
+
486
+
487
+ def api_start(host, port, **kwargs):
488
+ global app
489
+ global local_doc_qa
490
+
491
+ llm_model_ins = shared.loaderLLM()
492
+
493
+ app = FastAPI()
494
+ # Add CORS middleware to allow all origins
495
+ # 在config.py中设置OPEN_DOMAIN=True,允许跨域
496
+ # set OPEN_DOMAIN=True in config.py to allow cross-domain
497
+ if OPEN_CROSS_DOMAIN:
498
+ app.add_middleware(
499
+ CORSMiddleware,
500
+ allow_origins=["*"],
501
+ allow_credentials=True,
502
+ allow_methods=["*"],
503
+ allow_headers=["*"],
504
+ )
505
+ # 修改了stream_chat的接口,直接通过ws://localhost:7861/local_doc_qa/stream_chat建立连接,在请求体中选择knowledge_base_id
506
+ app.websocket("/local_doc_qa/stream_chat")(stream_chat)
507
+
508
+ app.get("/", response_model=BaseResponse, summary="swagger 文档")(document)
509
+
510
+ # 增加基于bing搜索的流式问答
511
+ # 需要说明的是,如果想测试websocket的流式问答,需要使用支持websocket的测试工具,如postman,insomnia
512
+ # 强烈推荐开源的insomnia
513
+ # 在测试时选择new websocket request,并将url的协议改为ws,如ws://localhost:7861/local_doc_qa/stream_chat_bing
514
+ app.websocket("/local_doc_qa/stream_chat_bing")(stream_chat_bing)
515
+
516
+ app.post("/chat", response_model=ChatMessage, summary="与模型对话")(chat)
517
+
518
+ app.post("/local_doc_qa/upload_file", response_model=BaseResponse, summary="上传文件到知识库")(upload_file)
519
+ app.post("/local_doc_qa/upload_files", response_model=BaseResponse, summary="批量上传文件到知识库")(upload_files)
520
+ app.post("/local_doc_qa/local_doc_chat", response_model=ChatMessage, summary="与知识库对话")(local_doc_chat)
521
+ app.post("/local_doc_qa/bing_search_chat", response_model=ChatMessage, summary="与必应搜索对话")(bing_search_chat)
522
+ app.get("/local_doc_qa/list_knowledge_base", response_model=ListDocsResponse, summary="获取知识库列表")(list_kbs)
523
+ app.get("/local_doc_qa/list_files", response_model=ListDocsResponse, summary="获取知识库内的文件列表")(list_docs)
524
+ app.delete("/local_doc_qa/delete_knowledge_base", response_model=BaseResponse, summary="删除知识库")(delete_kb)
525
+ app.delete("/local_doc_qa/delete_file", response_model=BaseResponse, summary="删除知识库内的文件")(delete_doc)
526
+ app.post("/local_doc_qa/update_file", response_model=BaseResponse, summary="上传文件到知识库,并删除另一个文件")(update_doc)
527
+
528
+ local_doc_qa = LocalDocQA()
529
+ local_doc_qa.init_cfg(
530
+ llm_model=llm_model_ins,
531
+ embedding_model=EMBEDDING_MODEL,
532
+ embedding_device=EMBEDDING_DEVICE,
533
+ top_k=VECTOR_SEARCH_TOP_K,
534
+ )
535
+ if kwargs.get("ssl_keyfile") and kwargs.get("ssl_certfile"):
536
+ uvicorn.run(app, host=host, port=port, ssl_keyfile=kwargs.get("ssl_keyfile"),
537
+ ssl_certfile=kwargs.get("ssl_certfile"))
538
+ else:
539
+ uvicorn.run(app, host=host, port=port)
540
+
541
+
542
+ if __name__ == "__main__":
543
+ parser.add_argument("--host", type=str, default="0.0.0.0")
544
+ parser.add_argument("--port", type=int, default=7861)
545
+ parser.add_argument("--ssl_keyfile", type=str)
546
+ parser.add_argument("--ssl_certfile", type=str)
547
+ # 初始化消息
548
+ args = None
549
+ args = parser.parse_args()
550
+ args_dict = vars(args)
551
+ shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
552
+ api_start(args.host, args.port, ssl_keyfile=args.ssl_keyfile, ssl_certfile=args.ssl_certfile)
cli.bat ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ @echo off
2
+ python cli.py %*
cli.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import click
2
+
3
+ from api import api_start as api_start
4
+ from cli_demo import main as cli_start
5
+ from configs.model_config import llm_model_dict, embedding_model_dict
6
+
7
+
8
+ @click.group()
9
+ @click.version_option(version='1.0.0')
10
+ @click.pass_context
11
+ def cli(ctx):
12
+ pass
13
+
14
+
15
+ @cli.group()
16
+ def llm():
17
+ pass
18
+
19
+
20
+ @llm.command(name="ls")
21
+ def llm_ls():
22
+ for k in llm_model_dict.keys():
23
+ print(k)
24
+
25
+
26
+ @cli.group()
27
+ def embedding():
28
+ pass
29
+
30
+
31
+ @embedding.command(name="ls")
32
+ def embedding_ls():
33
+ for k in embedding_model_dict.keys():
34
+ print(k)
35
+
36
+
37
+ @cli.group()
38
+ def start():
39
+ pass
40
+
41
+
42
+ @start.command(name="api", context_settings=dict(help_option_names=['-h', '--help']))
43
+ @click.option('-i', '--ip', default='0.0.0.0', show_default=True, type=str, help='api_server listen address.')
44
+ @click.option('-p', '--port', default=7861, show_default=True, type=int, help='api_server listen port.')
45
+ @click.option('-k', '--ssl_keyfile', type=int, help='enable api https/wss service, specify the ssl keyfile path.')
46
+ @click.option('-c', '--ssl_certfile', type=int, help='enable api https/wss service, specify the ssl certificate file path.')
47
+ def start_api(ip, port, **kwargs):
48
+ # 调用api_start之前需要先loadCheckPoint,并传入加载检查点的参数,
49
+ # 理论上可以用click包进行包装,但过于繁琐,改动较大,
50
+ # 此处仍用parser包,并以models.loader.args.DEFAULT_ARGS的参数为默认参数
51
+ # 如有改动需要可以更改models.loader.args.DEFAULT_ARGS
52
+ from models import shared
53
+ from models.loader import LoaderCheckPoint
54
+ from models.loader.args import DEFAULT_ARGS
55
+ shared.loaderCheckPoint = LoaderCheckPoint(DEFAULT_ARGS)
56
+ api_start(host=ip, port=port, **kwargs)
57
+
58
+ # # 通过cli.py调用cli_demo时需要在cli.py里初始化模型,否则会报错:
59
+ # langchain-ChatGLM: error: unrecognized arguments: start cli
60
+ # 为此需要先将
61
+ # args = None
62
+ # args = parser.parse_args()
63
+ # args_dict = vars(args)
64
+ # shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
65
+ # 语句从main函数里取出放到函数外部
66
+ # 然后在cli.py里初始化
67
+
68
+ @start.command(name="cli", context_settings=dict(help_option_names=['-h', '--help']))
69
+ def start_cli():
70
+ print("通过cli.py调用cli_demo...")
71
+
72
+ from models import shared
73
+ from models.loader import LoaderCheckPoint
74
+ from models.loader.args import DEFAULT_ARGS
75
+ shared.loaderCheckPoint = LoaderCheckPoint(DEFAULT_ARGS)
76
+ cli_start()
77
+
78
+ # 同cli命令,通过cli.py调用webui时,argparse的初始化需要放到cli.py里,
79
+ # 但由于webui.py里,模型初始化通过init_model函数实现,也无法简单地分离出主函数,
80
+ # 因此除非对webui进行大改,否则无法通过python cli.py start webui 调用webui。
81
+ # 故建议不要通过以上命令启动webui,将下述语句注释掉
82
+
83
+ @start.command(name="webui", context_settings=dict(help_option_names=['-h', '--help']))
84
+ def start_webui():
85
+ import webui
86
+
87
+
88
+ cli()
cli.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/bin/bash
2
+ python cli.py "$@"
cli_demo.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from configs.model_config import *
2
+ from chains.local_doc_qa import LocalDocQA
3
+ import os
4
+ import nltk
5
+ from models.loader.args import parser
6
+ import models.shared as shared
7
+ from models.loader import LoaderCheckPoint
8
+ nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
9
+
10
+ # Show reply with source text from input document
11
+ REPLY_WITH_SOURCE = True
12
+
13
+
14
+ def main():
15
+
16
+ llm_model_ins = shared.loaderLLM()
17
+ llm_model_ins.history_len = LLM_HISTORY_LEN
18
+
19
+ local_doc_qa = LocalDocQA()
20
+ local_doc_qa.init_cfg(llm_model=llm_model_ins,
21
+ embedding_model=EMBEDDING_MODEL,
22
+ embedding_device=EMBEDDING_DEVICE,
23
+ top_k=VECTOR_SEARCH_TOP_K)
24
+ vs_path = None
25
+ while not vs_path:
26
+ print("注意输入的路径是完整的文件路径,例如knowledge_base/`knowledge_base_id`/content/file.md,多个路径用英文逗号分割")
27
+ filepath = input("Input your local knowledge file path 请输入本地知识文件路径:")
28
+
29
+ # 判断 filepath 是否为空,如果为空的话,重新让用户输入,防止用户误触回车
30
+ if not filepath:
31
+ continue
32
+
33
+ # 支持加载多个文件
34
+ filepath = filepath.split(",")
35
+ # filepath错误的返回为None, 如果直接用原先的vs_path,_ = local_doc_qa.init_knowledge_vector_store(filepath)
36
+ # 会直接导致TypeError: cannot unpack non-iterable NoneType object而使得程序直接退出
37
+ # 因此需要先加一层判断,保证程序能继续运行
38
+ temp,loaded_files = local_doc_qa.init_knowledge_vector_store(filepath)
39
+ if temp is not None:
40
+ vs_path = temp
41
+ # 如果loaded_files和len(filepath)不一致,则说明部分文件没有加载成功
42
+ # 如果是路径错误,则应该支持重新加载
43
+ if len(loaded_files) != len(filepath):
44
+ reload_flag = eval(input("部分文件加载失败,若提示路径不存在,可重新加载,是否重新加载,输入True或False: "))
45
+ if reload_flag:
46
+ vs_path = None
47
+ continue
48
+
49
+ print(f"the loaded vs_path is 加载的vs_path为: {vs_path}")
50
+ else:
51
+ print("load file failed, re-input your local knowledge file path 请重新输入本地知识文件路径")
52
+
53
+ history = []
54
+ while True:
55
+ query = input("Input your question 请输入问题:")
56
+ last_print_len = 0
57
+ for resp, history in local_doc_qa.get_knowledge_based_answer(query=query,
58
+ vs_path=vs_path,
59
+ chat_history=history,
60
+ streaming=STREAMING):
61
+ if STREAMING:
62
+ print(resp["result"][last_print_len:], end="", flush=True)
63
+ last_print_len = len(resp["result"])
64
+ else:
65
+ print(resp["result"])
66
+ if REPLY_WITH_SOURCE:
67
+ source_text = [f"""出处 [{inum + 1}] {os.path.split(doc.metadata['source'])[-1]}:\n\n{doc.page_content}\n\n"""
68
+ # f"""相关度:{doc.metadata['score']}\n\n"""
69
+ for inum, doc in
70
+ enumerate(resp["source_documents"])]
71
+ print("\n\n" + "\n\n".join(source_text))
72
+
73
+
74
+ if __name__ == "__main__":
75
+ # # 通过cli.py调用cli_demo时需要在cli.py里初始化模型,否则会报错:
76
+ # langchain-ChatGLM: error: unrecognized arguments: start cli
77
+ # 为此需要先将
78
+ # args = None
79
+ # args = parser.parse_args()
80
+ # args_dict = vars(args)
81
+ # shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
82
+ # 语句从main函数里取出放到函数外部
83
+ # 然后在cli.py里初始化
84
+ args = None
85
+ args = parser.parse_args()
86
+ args_dict = vars(args)
87
+ shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
88
+ main()
release.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import re
4
+
5
+ def get_latest_tag():
6
+ output = subprocess.check_output(['git', 'tag'])
7
+ tags = output.decode('utf-8').split('\n')[:-1]
8
+ latest_tag = sorted(tags, key=lambda t: tuple(map(int, re.match(r'v(\d+)\.(\d+)\.(\d+)', t).groups())))[-1]
9
+ return latest_tag
10
+
11
+ def update_version_number(latest_tag, increment):
12
+ major, minor, patch = map(int, re.match(r'v(\d+)\.(\d+)\.(\d+)', latest_tag).groups())
13
+ if increment == 'X':
14
+ major += 1
15
+ minor, patch = 0, 0
16
+ elif increment == 'Y':
17
+ minor += 1
18
+ patch = 0
19
+ elif increment == 'Z':
20
+ patch += 1
21
+ new_version = f"v{major}.{minor}.{patch}"
22
+ return new_version
23
+
24
+ def main():
25
+ print("当前最近的Git标签:")
26
+ latest_tag = get_latest_tag()
27
+ print(latest_tag)
28
+
29
+ print("请选择要递增的版本号部分(X, Y, Z):")
30
+ increment = input().upper()
31
+
32
+ while increment not in ['X', 'Y', 'Z']:
33
+ print("输入错误,请输入X, Y或Z:")
34
+ increment = input().upper()
35
+
36
+ new_version = update_version_number(latest_tag, increment)
37
+ print(f"新的版本号为:{new_version}")
38
+
39
+ print("确认更新版本号并推送到远程仓库?(y/n)")
40
+ confirmation = input().lower()
41
+
42
+ if confirmation == 'y':
43
+ subprocess.run(['git', 'tag', new_version])
44
+ subprocess.run(['git', 'push', 'origin', new_version])
45
+ print("新版本号已创建并推送到远程仓库。")
46
+ else:
47
+ print("操作已取消。")
48
+
49
+ if __name__ == '__main__':
50
+ main()
requirements.txt ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pymupdf
2
+ paddlepaddle==2.4.2
3
+ paddleocr~=2.6.1.3
4
+ langchain==0.0.174
5
+ transformers==4.29.1
6
+ unstructured[local-inference]
7
+ layoutparser[layoutmodels,tesseract]
8
+ nltk~=3.8.1
9
+ sentence-transformers
10
+ beautifulsoup4
11
+ icetk
12
+ cpm_kernels
13
+ faiss-cpu
14
+ gradio==3.37.0
15
+ fastapi~=0.95.0
16
+ uvicorn~=0.21.1
17
+ pypinyin~=0.48.0
18
+ click~=8.1.3
19
+ tabulate
20
+ feedparser
21
+ azure-core
22
+ openai
23
+ #accelerate~=0.18.0
24
+ #peft~=0.3.0
25
+ #bitsandbytes; platform_system != "Windows"
26
+
27
+ # 要调用llama-cpp模型,如vicuma-13b量化模型需要安装llama-cpp-python库
28
+ # but!!! 实测pip install 不好使,需要手动从ttps://github.com/abetlen/llama-cpp-python/releases/下载
29
+ # 而且注意不同时期的ggml格式并不!兼!容!!!因此需要安装的llama-cpp-python版本也不一致,需要手动测试才能确定
30
+ # 实测ggml-vicuna-13b-1.1在llama-cpp-python 0.1.63上可正常兼容
31
+ # 不过!!!本项目模型加载的方式控制的比较严格,与llama-cpp-python的兼容性较差,很多参数设定不能使用,
32
+ # 建议如非必要还是不要使用llama-cpp
33
+ torch~=2.0.0
34
+ pydantic~=1.10.7
35
+ starlette~=0.26.1
36
+ numpy~=1.23.5
37
+ tqdm~=4.65.0
38
+ requests~=2.28.2
39
+ tenacity~=8.2.2
40
+ charset_normalizer==2.1.0
webui.py ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from paddleocr import PaddleOCR
2
+
3
+ import gradio as gr
4
+ import shutil
5
+ from configs.model_config import *
6
+
7
+ import nltk
8
+ import models.shared as shared
9
+ from models.loader.args import parser
10
+ from models.loader import LoaderCheckPoint
11
+ import os
12
+
13
+ from chains.local_doc_qa import LocalDocQA
14
+
15
+ nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
16
+
17
+ def get_vs_list():
18
+ lst_default = ["新建知识库"]
19
+ if not os.path.exists(KB_ROOT_PATH):
20
+ return lst_default
21
+ lst = os.listdir(KB_ROOT_PATH)
22
+ if not lst:
23
+ return lst_default
24
+ lst.sort()
25
+ return lst_default + lst
26
+
27
+
28
+ embedding_model_dict_list = list(embedding_model_dict.keys())
29
+
30
+ llm_model_dict_list = list(llm_model_dict.keys())
31
+
32
+ local_doc_qa = LocalDocQA()
33
+
34
+ flag_csv_logger = gr.CSVLogger()
35
+
36
+
37
+ def get_answer(query, vs_path, history, mode, score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
38
+ vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_conent: bool = True,
39
+ chunk_size=CHUNK_SIZE, streaming: bool = STREAMING):
40
+ if mode == "Bing搜索问答":
41
+ for resp, history in local_doc_qa.get_search_result_based_answer(
42
+ query=query, chat_history=history, streaming=streaming):
43
+ source = "\n\n"
44
+ source += "".join(
45
+ [
46
+ f"""<details> <summary>出处 [{i + 1}] <a href="{doc.metadata["source"]}" target="_blank">{doc.metadata["source"]}</a> </summary>\n"""
47
+ f"""{doc.page_content}\n"""
48
+ f"""</details>"""
49
+ for i, doc in
50
+ enumerate(resp["source_documents"])])
51
+ history[-1][-1] += source
52
+ yield history, ""
53
+ elif mode == "知识库问答" and vs_path is not None and os.path.exists(vs_path) and "index.faiss" in os.listdir(
54
+ vs_path):
55
+ for resp, history in local_doc_qa.get_knowledge_based_answer(
56
+ query=query, vs_path=vs_path, chat_history=history, streaming=streaming):
57
+ source = "\n\n"
58
+ source += "".join(
59
+ [f"""<details> <summary>出处 [{i + 1}] {os.path.split(doc.metadata["source"])[-1]}</summary>\n"""
60
+ f"""{doc.page_content}\n"""
61
+ f"""</details>"""
62
+ for i, doc in
63
+ enumerate(resp["source_documents"])])
64
+ history[-1][-1] += source
65
+ yield history, ""
66
+ elif mode == "知识库测试":
67
+ if os.path.exists(vs_path):
68
+ resp, prompt = local_doc_qa.get_knowledge_based_conent_test(query=query, vs_path=vs_path,
69
+ score_threshold=score_threshold,
70
+ vector_search_top_k=vector_search_top_k,
71
+ chunk_conent=chunk_conent,
72
+ chunk_size=chunk_size)
73
+ if not resp["source_documents"]:
74
+ yield history + [[query,
75
+ "根据您的设定,没有匹配到任何内容,请确认您设置的知识相关度 Score 阈值是否过小或其他参数是否正确。"]], ""
76
+ else:
77
+ source = "\n".join(
78
+ [
79
+ f"""<details open> <summary>【知识相关度 Score】:{doc.metadata["score"]} - 【出处{i + 1}】: {os.path.split(doc.metadata["source"])[-1]} </summary>\n"""
80
+ f"""{doc.page_content}\n"""
81
+ f"""</details>"""
82
+ for i, doc in
83
+ enumerate(resp["source_documents"])])
84
+ history.append([query, "以下内容为知识库中满足设置条件的匹配结果:\n\n" + source])
85
+ yield history, ""
86
+ else:
87
+ yield history + [[query,
88
+ "请选择知识库后进行测试,当前未选择知识库。"]], ""
89
+ else:
90
+ for answer_result in local_doc_qa.llm.generatorAnswer(prompt=query, history=history,
91
+ streaming=streaming):
92
+ resp = answer_result.llm_output["answer"]
93
+ history = answer_result.history
94
+ history[-1][-1] = resp
95
+ yield history, ""
96
+ logger.info(f"flagging: username={FLAG_USER_NAME},query={query},vs_path={vs_path},mode={mode},history={history}")
97
+ flag_csv_logger.flag([query, vs_path, history, mode], username=FLAG_USER_NAME)
98
+ print(torch.cuda.is_available())
99
+
100
+
101
+ def init_model():
102
+ print("start init_model!")
103
+ args = parser.parse_args()
104
+
105
+ args_dict = vars(args)
106
+ shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
107
+ llm_model_ins = shared.loaderLLM()
108
+ llm_model_ins.set_history_len(LLM_HISTORY_LEN)
109
+
110
+ try:
111
+ local_doc_qa.init_cfg(llm_model=llm_model_ins)
112
+ generator = local_doc_qa.llm.generatorAnswer("你好")
113
+ for answer_result in generator:
114
+ print(answer_result.llm_output)
115
+ reply = """模型已成功加载,可以开始对话,或从右侧选择模式后开始对话"""
116
+ logger.info(reply)
117
+ return reply
118
+ except Exception as e:
119
+ logger.error(e)
120
+ reply = """模型未成功加载,请到页面左上角"模型配置"选项卡中重新选择后点击"加载模型"按钮"""
121
+ if str(e) == "Unknown platform: darwin":
122
+ logger.info("该报错可能因为您使用的是 macOS 操作系统,需先下载模型至本地后执行 Web UI,具体方法请参考项目 README 中本地部署方法及常见问题:"
123
+ " https://github.com/imClumsyPanda/langchain-ChatGLM")
124
+ else:
125
+ logger.info(reply)
126
+ return reply
127
+
128
+
129
+ def reinit_model(llm_model, embedding_model, llm_history_len, no_remote_model, use_ptuning_v2, use_lora, top_k,
130
+ history):
131
+ try:
132
+ llm_model_ins = shared.loaderLLM(llm_model, no_remote_model, use_ptuning_v2)
133
+ llm_model_ins.history_len = llm_history_len
134
+ local_doc_qa.init_cfg(llm_model=llm_model_ins,
135
+ embedding_model=embedding_model,
136
+ top_k=top_k)
137
+ model_status = """模型已成功重新加载,可以开始对话,或从右侧选择模式后开始对话"""
138
+ logger.info(model_status)
139
+ except Exception as e:
140
+ logger.error(e)
141
+ model_status = """模型未成功重新加载,请到页面左上角"模型配置"选项卡中重新选择后点击"加载模型"按钮"""
142
+ logger.info(model_status)
143
+ return history + [[None, model_status]]
144
+
145
+
146
+ def get_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation):
147
+ vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
148
+ filelist = []
149
+ if local_doc_qa.llm and local_doc_qa.embeddings:
150
+ if isinstance(files, list):
151
+ for file in files:
152
+ filename = os.path.split(file.name)[-1]
153
+ shutil.move(file.name, os.path.join(KB_ROOT_PATH, vs_id, "content", filename))
154
+ filelist.append(os.path.join(KB_ROOT_PATH, vs_id, "content", filename))
155
+ vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(filelist, vs_path, sentence_size)
156
+ else:
157
+ vs_path, loaded_files = local_doc_qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation,
158
+ sentence_size)
159
+ if len(loaded_files):
160
+ file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
161
+ else:
162
+ file_status = "文件未成功加载,请重新上传文件"
163
+ else:
164
+ file_status = "模型未完成加载,请先在加载模型后再导入文件"
165
+ vs_path = None
166
+ logger.info(file_status)
167
+ return vs_path, None, history + [[None, file_status]], \
168
+ gr.update(choices=local_doc_qa.list_file_from_vector_store(vs_path) if vs_path else [])
169
+
170
+
171
+ def change_vs_name_input(vs_id, history):
172
+ if vs_id == "新建知识库":
173
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=False), None, history,\
174
+ gr.update(choices=[]), gr.update(visible=False)
175
+ else:
176
+ vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
177
+ if "index.faiss" in os.listdir(vs_path):
178
+ file_status = f"已加载知识库{vs_id},请开始提问"
179
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), \
180
+ vs_path, history + [[None, file_status]], \
181
+ gr.update(choices=local_doc_qa.list_file_from_vector_store(vs_path), value=[]), \
182
+ gr.update(visible=True)
183
+ else:
184
+ file_status = f"已选择知识库{vs_id},当前知识库中未上传文件,请先上传文件后,再开始提问"
185
+ return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), \
186
+ vs_path, history + [[None, file_status]], \
187
+ gr.update(choices=[], value=[]), gr.update(visible=True, value=[])
188
+
189
+
190
+ knowledge_base_test_mode_info = ("【注意】\n\n"
191
+ "1. 您已进入知识库测试模式,您输入的任何对话内容都将用于进行知识库查询,"
192
+ "并仅输出知识库匹配出的内容及相似度分值和及输入的文本源路径,查询的内容并不会进入模型查询。\n\n"
193
+ "2. 知识相关度 Score 经测试,建议设置为 500 或更低,具体设置情况请结合实际使用调整。"
194
+ """3. 使用"添加单条数据"添加文本至知识库时,内容如未分段,则内容越多越会稀释各查询内容与之关联的score阈值。\n\n"""
195
+ "4. 单条内容长度建议设置在100-150左右。\n\n"
196
+ "5. 本界面用于知识入库及知识匹配相关参数设定,但当前版本中,"
197
+ "本界面中修改的参数并不会直接修改对话界面中参数,仍需前往`configs/model_config.py`修改后生效。"
198
+ "相关参数将在后续版本中支持本界面直接修改。")
199
+
200
+
201
+ def change_mode(mode, history):
202
+ if mode == "知识库问答":
203
+ return gr.update(visible=True), gr.update(visible=False), history
204
+ # + [[None, "【注意】:您已进入知识库问答模式,您输入的任何查询都将进行知识库查询,然后会自动整理知识库关联内容进入模型查询!!!"]]
205
+ elif mode == "知识库测试":
206
+ return gr.update(visible=True), gr.update(visible=True), [[None,
207
+ knowledge_base_test_mode_info]]
208
+ else:
209
+ return gr.update(visible=False), gr.update(visible=False), history
210
+
211
+
212
+ def change_chunk_conent(mode, label_conent, history):
213
+ conent = ""
214
+ if "chunk_conent" in label_conent:
215
+ conent = "搜索结果上下文关联"
216
+ elif "one_content_segmentation" in label_conent: # 这里没用上,可以先留着
217
+ conent = "内容分段入库"
218
+
219
+ if mode:
220
+ return gr.update(visible=True), history + [[None, f"【已开启{conent}】"]]
221
+ else:
222
+ return gr.update(visible=False), history + [[None, f"【已关闭{conent}】"]]
223
+
224
+
225
+ def add_vs_name(vs_name, chatbot):
226
+ if vs_name in get_vs_list():
227
+ vs_status = "与已有知识库名称冲突,请重新选择其他名称后提交"
228
+ chatbot = chatbot + [[None, vs_status]]
229
+ return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True), gr.update(
230
+ visible=False), chatbot, gr.update(visible=False)
231
+ else:
232
+ # 新建上传文件存储路径
233
+ if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_name, "content")):
234
+ os.makedirs(os.path.join(KB_ROOT_PATH, vs_name, "content"))
235
+ # 新建向量库存储路径
236
+ if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_name, "vector_store")):
237
+ os.makedirs(os.path.join(KB_ROOT_PATH, vs_name, "vector_store"))
238
+ vs_status = f"""已新增知识库"{vs_name}",将在上传文件并载入成功后进行存储。请在开始对话前,先完成文件上传。 """
239
+ chatbot = chatbot + [[None, vs_status]]
240
+ return gr.update(visible=True, choices=get_vs_list(), value=vs_name), gr.update(
241
+ visible=False), gr.update(visible=False), gr.update(visible=True), chatbot, gr.update(visible=True)
242
+
243
+
244
+ # 自动化加载固定文件间中文件
245
+ def reinit_vector_store(vs_id, history):
246
+ try:
247
+ shutil.rmtree(os.path.join(KB_ROOT_PATH, vs_id, "vector_store"))
248
+ vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
249
+ sentence_size = gr.Number(value=SENTENCE_SIZE, precision=0,
250
+ label="文本入库分句长度限制",
251
+ interactive=True, visible=True)
252
+ vs_path, loaded_files = local_doc_qa.init_knowledge_vector_store(os.path.join(KB_ROOT_PATH, vs_id, "content"),
253
+ vs_path, sentence_size)
254
+ model_status = """知识库构建成功"""
255
+ except Exception as e:
256
+ logger.error(e)
257
+ model_status = """知识库构建未成功"""
258
+ logger.info(model_status)
259
+ return history + [[None, model_status]]
260
+
261
+
262
+ def refresh_vs_list():
263
+ return gr.update(choices=get_vs_list()), gr.update(choices=get_vs_list())
264
+
265
+ def delete_file(vs_id, files_to_delete, chatbot):
266
+ vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
267
+ content_path = os.path.join(KB_ROOT_PATH, vs_id, "content")
268
+ docs_path = [os.path.join(content_path, file) for file in files_to_delete]
269
+ status = local_doc_qa.delete_file_from_vector_store(vs_path=vs_path,
270
+ filepath=docs_path)
271
+ if "fail" not in status:
272
+ for doc_path in docs_path:
273
+ if os.path.exists(doc_path):
274
+ os.remove(doc_path)
275
+ rested_files = local_doc_qa.list_file_from_vector_store(vs_path)
276
+ if "fail" in status:
277
+ vs_status = "文件删除失败。"
278
+ elif len(rested_files)>0:
279
+ vs_status = "文件删除成功。"
280
+ else:
281
+ vs_status = f"文件删除成功,知识库{vs_id}中无已上传文件,请先上传文件后,再开始提问。"
282
+ logger.info(",".join(files_to_delete)+vs_status)
283
+ chatbot = chatbot + [[None, vs_status]]
284
+ return gr.update(choices=local_doc_qa.list_file_from_vector_store(vs_path), value=[]), chatbot
285
+
286
+
287
+ def delete_vs(vs_id, chatbot):
288
+ try:
289
+ shutil.rmtree(os.path.join(KB_ROOT_PATH, vs_id))
290
+ status = f"成功删除知识库{vs_id}"
291
+ logger.info(status)
292
+ chatbot = chatbot + [[None, status]]
293
+ return gr.update(choices=get_vs_list(), value=get_vs_list()[0]), gr.update(visible=True), gr.update(visible=True), \
294
+ gr.update(visible=False), chatbot, gr.update(visible=False)
295
+ except Exception as e:
296
+ logger.error(e)
297
+ status = f"删除知识库{vs_id}失败"
298
+ chatbot = chatbot + [[None, status]]
299
+ return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), \
300
+ gr.update(visible=True), chatbot, gr.update(visible=True)
301
+
302
+
303
+ block_css = """.importantButton {
304
+ background: linear-gradient(45deg, #7e0570,#5d1c99, #6e00ff) !important;
305
+ border: none !important;
306
+ }
307
+ .importantButton:hover {
308
+ background: linear-gradient(45deg, #ff00e0,#8500ff, #6e00ff) !important;
309
+ border: none !important;
310
+ }"""
311
+
312
+ webui_title = """
313
+ # 🎉张平的专属知识库
314
+ """
315
+ default_vs = get_vs_list()[0] if len(get_vs_list()) > 1 else "为空"
316
+ init_message = f"""欢迎使用 张平的专属知识库!
317
+
318
+ 请在右侧切换模式,目前支持直接与 LLM 模型对话或基于本地知识库问答。
319
+ 知识库问答模式,选择知识库名称后,即可开始问答,如有需要可以上传文件/文件夹至知识库。
320
+ 知识库暂不支持文件删除。
321
+ """
322
+
323
+ # 初始化消息
324
+ model_status = init_model()
325
+
326
+ default_theme_args = dict(
327
+ font=["Source Sans Pro", 'ui-sans-serif', 'system-ui', 'sans-serif'],
328
+ font_mono=['IBM Plex Mono', 'ui-monospace', 'Consolas', 'monospace'],
329
+ )
330
+
331
+ with gr.Blocks(css=block_css, theme=gr.themes.Default(**default_theme_args)) as demo:
332
+ vs_path, file_status, model_status = gr.State(
333
+ os.path.join(KB_ROOT_PATH, get_vs_list()[0], "vector_store") if len(get_vs_list()) > 1 else ""), gr.State(""), gr.State(
334
+ model_status)
335
+ gr.Markdown(webui_title)
336
+ with gr.Tab("对话"):
337
+ with gr.Row():
338
+ with gr.Column(scale=10):
339
+ chatbot = gr.Chatbot([[None, init_message], [None, model_status.value]],
340
+ elem_id="chat-box",
341
+ show_label=False).style(height=750)
342
+ query = gr.Textbox(show_label=False,
343
+ placeholder="请输入提问内容,按回车进行提交").style(container=False)
344
+ with gr.Column(scale=5):
345
+ mode = gr.Radio(["LLM 对话", "知识库问答", "Bing搜索问答"],
346
+ label="请选择使用模式",
347
+ value="知识库问答", )
348
+ knowledge_set = gr.Accordion("知识库设定", visible=False)
349
+ vs_setting = gr.Accordion("配置知识库")
350
+ mode.change(fn=change_mode,
351
+ inputs=[mode, chatbot],
352
+ outputs=[vs_setting, knowledge_set, chatbot])
353
+ with vs_setting:
354
+ vs_refresh = gr.Button("更新已有知识库选项")
355
+ select_vs = gr.Dropdown(get_vs_list(),
356
+ label="请选择要加载的知识库",
357
+ interactive=True,
358
+ value=get_vs_list()[0] if len(get_vs_list()) > 0 else None
359
+ )
360
+ vs_name = gr.Textbox(label="请输入新建知识库名称,当前知识库命名暂不支持中文",
361
+ lines=1,
362
+ interactive=True,
363
+ visible=True)
364
+ vs_add = gr.Button(value="添加至知识库选项", visible=True)
365
+ vs_delete = gr.Button("删除本知识库", visible=False)
366
+ file2vs = gr.Column(visible=False)
367
+ with file2vs:
368
+ # load_vs = gr.Button("加载知识库")
369
+ gr.Markdown("向知识库中添加文件")
370
+ sentence_size = gr.Number(value=SENTENCE_SIZE, precision=0,
371
+ label="文本入库分句长度限制",
372
+ interactive=True, visible=True)
373
+ with gr.Tab("上传文件"):
374
+ files = gr.File(label="添加文件",
375
+ file_types=['.txt', '.md', '.docx', '.pdf', '.png', '.jpg', ".csv"],
376
+ file_count="multiple",
377
+ show_label=False)
378
+ load_file_button = gr.Button("上传文件并加载知识库")
379
+ with gr.Tab("上传文件夹"):
380
+ folder_files = gr.File(label="添加文件",
381
+ file_count="directory",
382
+ show_label=False)
383
+ load_folder_button = gr.Button("上传文件夹并加载知识库")
384
+ with gr.Tab("删除文件"):
385
+ files_to_delete = gr.CheckboxGroup(choices=[],
386
+ label="请从知识库已有文件中选择要删除的文件",
387
+ interactive=True)
388
+ delete_file_button = gr.Button("从知识库中删除选中文件")
389
+ vs_refresh.click(fn=refresh_vs_list,
390
+ inputs=[],
391
+ outputs=select_vs)
392
+ vs_add.click(fn=add_vs_name,
393
+ inputs=[vs_name, chatbot],
394
+ outputs=[select_vs, vs_name, vs_add, file2vs, chatbot, vs_delete])
395
+ vs_delete.click(fn=delete_vs,
396
+ inputs=[select_vs, chatbot],
397
+ outputs=[select_vs, vs_name, vs_add, file2vs, chatbot, vs_delete])
398
+ select_vs.change(fn=change_vs_name_input,
399
+ inputs=[select_vs, chatbot],
400
+ outputs=[vs_name, vs_add, file2vs, vs_path, chatbot, files_to_delete, vs_delete])
401
+ load_file_button.click(get_vector_store,
402
+ show_progress=True,
403
+ inputs=[select_vs, files, sentence_size, chatbot, vs_add, vs_add],
404
+ outputs=[vs_path, files, chatbot, files_to_delete], )
405
+ load_folder_button.click(get_vector_store,
406
+ show_progress=True,
407
+ inputs=[select_vs, folder_files, sentence_size, chatbot, vs_add,
408
+ vs_add],
409
+ outputs=[vs_path, folder_files, chatbot, files_to_delete], )
410
+ flag_csv_logger.setup([query, vs_path, chatbot, mode], "flagged")
411
+ query.submit(get_answer,
412
+ [query, vs_path, chatbot, mode],
413
+ [chatbot, query])
414
+ delete_file_button.click(delete_file,
415
+ show_progress=True,
416
+ inputs=[select_vs, files_to_delete, chatbot],
417
+ outputs=[files_to_delete, chatbot])
418
+ with gr.Tab("知识库测试 Beta"):
419
+ with gr.Row():
420
+ with gr.Column(scale=10):
421
+ chatbot = gr.Chatbot([[None, knowledge_base_test_mode_info]],
422
+ elem_id="chat-box",
423
+ show_label=False).style(height=750)
424
+ query = gr.Textbox(show_label=False,
425
+ placeholder="请输入提问内容,按回车进行提交").style(container=False)
426
+ with gr.Column(scale=5):
427
+ mode = gr.Radio(["知识库测试"], # "知识库问答",
428
+ label="请选择使用模式",
429
+ value="知识库测试",
430
+ visible=False)
431
+ knowledge_set = gr.Accordion("知识库设定", visible=True)
432
+ vs_setting = gr.Accordion("配置知识库", visible=True)
433
+ mode.change(fn=change_mode,
434
+ inputs=[mode, chatbot],
435
+ outputs=[vs_setting, knowledge_set, chatbot])
436
+ with knowledge_set:
437
+ score_threshold = gr.Number(value=VECTOR_SEARCH_SCORE_THRESHOLD,
438
+ label="知识相关度 Score 阈值,分值越低匹配度越高",
439
+ precision=0,
440
+ interactive=True)
441
+ vector_search_top_k = gr.Number(value=VECTOR_SEARCH_TOP_K, precision=0,
442
+ label="获取知识库内容条数", interactive=True)
443
+ chunk_conent = gr.Checkbox(value=False,
444
+ label="是否启用上下文关联",
445
+ interactive=True)
446
+ chunk_sizes = gr.Number(value=CHUNK_SIZE, precision=0,
447
+ label="匹配单段内容的连接上下文后最大长度",
448
+ interactive=True, visible=False)
449
+ chunk_conent.change(fn=change_chunk_conent,
450
+ inputs=[chunk_conent, gr.Textbox(value="chunk_conent", visible=False), chatbot],
451
+ outputs=[chunk_sizes, chatbot])
452
+ with vs_setting:
453
+ vs_refresh = gr.Button("更新��有知识库选项")
454
+ select_vs_test = gr.Dropdown(get_vs_list(),
455
+ label="请选择要加载的知识库",
456
+ interactive=True,
457
+ value=get_vs_list()[0] if len(get_vs_list()) > 0 else None)
458
+ vs_name = gr.Textbox(label="请输入新建知识库名称,当前知识库命名暂不支持中文",
459
+ lines=1,
460
+ interactive=True,
461
+ visible=True)
462
+ vs_add = gr.Button(value="添加至知识库选项", visible=True)
463
+ file2vs = gr.Column(visible=False)
464
+ with file2vs:
465
+ # load_vs = gr.Button("加载知识库")
466
+ gr.Markdown("向知识库中添加单条内容或文件")
467
+ sentence_size = gr.Number(value=SENTENCE_SIZE, precision=0,
468
+ label="文本入库分句长度限制",
469
+ interactive=True, visible=True)
470
+ with gr.Tab("上传文件"):
471
+ files = gr.File(label="添加文件",
472
+ file_types=['.txt', '.md', '.docx', '.pdf'],
473
+ file_count="multiple",
474
+ show_label=False
475
+ )
476
+ load_file_button = gr.Button("上传文件并加载知识库")
477
+ with gr.Tab("上传文件夹"):
478
+ folder_files = gr.File(label="添加文件",
479
+ # file_types=['.txt', '.md', '.docx', '.pdf'],
480
+ file_count="directory",
481
+ show_label=False)
482
+ load_folder_button = gr.Button("上传文件夹并加载知识库")
483
+ with gr.Tab("添加单条内容"):
484
+ one_title = gr.Textbox(label="标题", placeholder="请输入要添加单条段落的标题", lines=1)
485
+ one_conent = gr.Textbox(label="内容", placeholder="请输入要添加单条段落的内容", lines=5)
486
+ one_content_segmentation = gr.Checkbox(value=True, label="禁止内容分句入库",
487
+ interactive=True)
488
+ load_conent_button = gr.Button("添加内容并加载知识库")
489
+ # 将上传的文件保存到content文件夹下,并更新下拉框
490
+ vs_refresh.click(fn=refresh_vs_list,
491
+ inputs=[],
492
+ outputs=select_vs_test)
493
+ vs_add.click(fn=add_vs_name,
494
+ inputs=[vs_name, chatbot],
495
+ outputs=[select_vs_test, vs_name, vs_add, file2vs, chatbot])
496
+ select_vs_test.change(fn=change_vs_name_input,
497
+ inputs=[select_vs_test, chatbot],
498
+ outputs=[vs_name, vs_add, file2vs, vs_path, chatbot])
499
+ load_file_button.click(get_vector_store,
500
+ show_progress=True,
501
+ inputs=[select_vs_test, files, sentence_size, chatbot, vs_add, vs_add],
502
+ outputs=[vs_path, files, chatbot], )
503
+ load_folder_button.click(get_vector_store,
504
+ show_progress=True,
505
+ inputs=[select_vs_test, folder_files, sentence_size, chatbot, vs_add,
506
+ vs_add],
507
+ outputs=[vs_path, folder_files, chatbot], )
508
+ load_conent_button.click(get_vector_store,
509
+ show_progress=True,
510
+ inputs=[select_vs_test, one_title, sentence_size, chatbot,
511
+ one_conent, one_content_segmentation],
512
+ outputs=[vs_path, files, chatbot], )
513
+ flag_csv_logger.setup([query, vs_path, chatbot, mode], "flagged")
514
+ query.submit(get_answer,
515
+ [query, vs_path, chatbot, mode, score_threshold, vector_search_top_k, chunk_conent,
516
+ chunk_sizes],
517
+ [chatbot, query])
518
+ with gr.Tab("模型配置"):
519
+ llm_model = gr.Radio(llm_model_dict_list,
520
+ label="LLM 模型",
521
+ value=LLM_MODEL,
522
+ interactive=True)
523
+ no_remote_model = gr.Checkbox(shared.LoaderCheckPoint.no_remote_model,
524
+ label="加载本地模型",
525
+ interactive=True)
526
+
527
+ llm_history_len = gr.Slider(0, 10,
528
+ value=LLM_HISTORY_LEN,
529
+ step=1,
530
+ label="LLM 对话轮数",
531
+ interactive=True)
532
+ use_ptuning_v2 = gr.Checkbox(USE_PTUNING_V2,
533
+ label="使用p-tuning-v2微调过的模型",
534
+ interactive=True)
535
+ use_lora = gr.Checkbox(USE_LORA,
536
+ label="使用lora微调的权重",
537
+ interactive=True)
538
+ embedding_model = gr.Radio(embedding_model_dict_list,
539
+ label="Embedding 模型",
540
+ value=EMBEDDING_MODEL,
541
+ interactive=True)
542
+ top_k = gr.Slider(1, 20, value=VECTOR_SEARCH_TOP_K, step=1,
543
+ label="向量匹配 top k", interactive=True)
544
+ load_model_button = gr.Button("重新加载模型")
545
+ load_model_button.click(reinit_model, show_progress=True,
546
+ inputs=[llm_model, embedding_model, llm_history_len, no_remote_model, use_ptuning_v2,
547
+ use_lora, top_k, chatbot], outputs=chatbot)
548
+ # load_knowlege_button = gr.Button("重新构建知识库")
549
+ # load_knowlege_button.click(reinit_vector_store, show_progress=True,
550
+ # inputs=[select_vs, chatbot], outputs=chatbot)
551
+ demo.load(
552
+ fn=refresh_vs_list,
553
+ inputs=None,
554
+ outputs=[select_vs, select_vs_test],
555
+ queue=True,
556
+ show_progress=False,
557
+ )
558
+
559
+ # (demo.queue(concurrency_count=3).launch(server_name='0.0.0.0', server_port=7880,show_api=False, share=True,inbrowser=True))
560
+ # why 不能指定端口,否则不能生成share link
561
+ (demo.queue().launch(server_port=7880,share=True, inbrowser=True))
562
+ # demo.queue().launch(share=True, inbrowser=True)
webui_st.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_chatbox import st_chatbox
3
+ import tempfile
4
+ ###### 从webui借用的代码 #####
5
+ ###### 做了少量修改 #####
6
+ import os
7
+ import shutil
8
+
9
+ from chains.local_doc_qa import LocalDocQA
10
+ from configs.model_config import *
11
+ import nltk
12
+ from models.base import (BaseAnswer,
13
+ AnswerResult,)
14
+ import models.shared as shared
15
+ from models.loader.args import parser
16
+ from models.loader import LoaderCheckPoint
17
+
18
+ nltk.data.path = [NLTK_DATA_PATH] + nltk.data.path
19
+
20
+
21
+ def get_vs_list():
22
+ lst_default = ["新建知识库"]
23
+ if not os.path.exists(KB_ROOT_PATH):
24
+ return lst_default
25
+ lst = os.listdir(KB_ROOT_PATH)
26
+ if not lst:
27
+ return lst_default
28
+ lst.sort()
29
+ return lst_default + lst
30
+
31
+
32
+ embedding_model_dict_list = list(embedding_model_dict.keys())
33
+ llm_model_dict_list = list(llm_model_dict.keys())
34
+
35
+
36
+ def get_answer(query, vs_path, history, mode, score_threshold=VECTOR_SEARCH_SCORE_THRESHOLD,
37
+ vector_search_top_k=VECTOR_SEARCH_TOP_K, chunk_conent: bool = True,
38
+ chunk_size=CHUNK_SIZE, streaming: bool = STREAMING,):
39
+ if mode == "Bing搜索问答":
40
+ for resp, history in local_doc_qa.get_search_result_based_answer(
41
+ query=query, chat_history=history, streaming=streaming):
42
+ source = "\n\n"
43
+ source += "".join(
44
+ [f"""<details> <summary>出处 [{i + 1}] <a href="{doc.metadata["source"]}" target="_blank">{doc.metadata["source"]}</a> </summary>\n"""
45
+ f"""{doc.page_content}\n"""
46
+ f"""</details>"""
47
+ for i, doc in
48
+ enumerate(resp["source_documents"])])
49
+ history[-1][-1] += source
50
+ yield history, ""
51
+ elif mode == "知识库问答" and vs_path is not None and os.path.exists(vs_path):
52
+ local_doc_qa.top_k = vector_search_top_k
53
+ local_doc_qa.chunk_conent = chunk_conent
54
+ local_doc_qa.chunk_size = chunk_size
55
+ for resp, history in local_doc_qa.get_knowledge_based_answer(
56
+ query=query, vs_path=vs_path, chat_history=history, streaming=streaming):
57
+ source = "\n\n"
58
+ source += "".join(
59
+ [f"""<details> <summary>出处 [{i + 1}] {os.path.split(doc.metadata["source"])[-1]}</summary>\n"""
60
+ f"""{doc.page_content}\n"""
61
+ f"""</details>"""
62
+ for i, doc in
63
+ enumerate(resp["source_documents"])])
64
+ history[-1][-1] += source
65
+ yield history, ""
66
+ elif mode == "知识库测试":
67
+ if os.path.exists(vs_path):
68
+ resp, prompt = local_doc_qa.get_knowledge_based_conent_test(query=query, vs_path=vs_path,
69
+ score_threshold=score_threshold,
70
+ vector_search_top_k=vector_search_top_k,
71
+ chunk_conent=chunk_conent,
72
+ chunk_size=chunk_size)
73
+ if not resp["source_documents"]:
74
+ yield history + [[query,
75
+ "根据您的设定,没有匹配到任何内容,请确认您设置的知识相关度 Score 阈值是否过小或其他参数是否正确。"]], ""
76
+ else:
77
+ source = "\n".join(
78
+ [
79
+ f"""<details open> <summary>【知识相关度 Score】:{doc.metadata["score"]} - 【出处{i + 1}】: {os.path.split(doc.metadata["source"])[-1]} </summary>\n"""
80
+ f"""{doc.page_content}\n"""
81
+ f"""</details>"""
82
+ for i, doc in
83
+ enumerate(resp["source_documents"])])
84
+ history.append([query, "以下内容为知识库中满足设置条件的匹配结果:\n\n" + source])
85
+ yield history, ""
86
+ else:
87
+ yield history + [[query,
88
+ "请选择知识库后进行测试,当前未选择知识库。"]], ""
89
+ else:
90
+ answer_result_stream_result = local_doc_qa.llm_model_chain(
91
+ {"prompt": query, "history": history, "streaming": streaming})
92
+
93
+ for answer_result in answer_result_stream_result['answer_result_stream']:
94
+ resp = answer_result.llm_output["answer"]
95
+ history = answer_result.history
96
+ history[-1][-1] = resp + (
97
+ "\n\n当前知识库为空,如需基于知识库进行问答,请先加载知识库后,再进行提问。" if mode == "知识库问答" else "")
98
+ yield history, ""
99
+ logger.info(f"flagging: username={FLAG_USER_NAME},query={query},vs_path={vs_path},mode={mode},history={history}")
100
+
101
+
102
+ def get_vector_store(vs_id, files, sentence_size, history, one_conent, one_content_segmentation):
103
+ vs_path = os.path.join(KB_ROOT_PATH, vs_id, "vector_store")
104
+ filelist = []
105
+ if not os.path.exists(os.path.join(KB_ROOT_PATH, vs_id, "content")):
106
+ os.makedirs(os.path.join(KB_ROOT_PATH, vs_id, "content"))
107
+ qa = st.session_state.local_doc_qa
108
+ if qa.llm_model_chain and qa.embeddings:
109
+ if isinstance(files, list):
110
+ for file in files:
111
+ filename = os.path.split(file.name)[-1]
112
+ shutil.move(file.name, os.path.join(
113
+ KB_ROOT_PATH, vs_id, "content", filename))
114
+ filelist.append(os.path.join(
115
+ KB_ROOT_PATH, vs_id, "content", filename))
116
+ vs_path, loaded_files = qa.init_knowledge_vector_store(
117
+ filelist, vs_path, sentence_size)
118
+ else:
119
+ vs_path, loaded_files = qa.one_knowledge_add(vs_path, files, one_conent, one_content_segmentation,
120
+ sentence_size)
121
+ if len(loaded_files):
122
+ file_status = f"已添加 {'、'.join([os.path.split(i)[-1] for i in loaded_files if i])} 内容至知识库,并已加载知识库,请开始提问"
123
+ else:
124
+ file_status = "文件未成功加载,请重新上传文件"
125
+ else:
126
+ file_status = "模型未完成加载,请先在加载模型后再导入文件"
127
+ vs_path = None
128
+ logger.info(file_status)
129
+ return vs_path, None, history + [[None, file_status]]
130
+
131
+
132
+ knowledge_base_test_mode_info = ("【注意】\n\n"
133
+ "1. 您已进入知识库测试模式,您输入的任何对话内容都将用于进行知识库查询,"
134
+ "并仅输出知识库匹配出的内容及相似度分值和及输入的文本源路径,查询的内容并不会进入模型查询。\n\n"
135
+ "2. 知识相关度 Score 经测试,建议设置为 500 或更低,具体设置情况请结合实际使用调整。"
136
+ """3. 使用"添加单条数据"添加文本至知识库时,内容如未分段,则内容越多越会稀释各查询内容与之关联的score阈值。\n\n"""
137
+ "4. 单条内容长度建议设置在100-150左右。")
138
+
139
+
140
+ webui_title = """
141
+ # 🎉langchain-ChatGLM WebUI🎉
142
+ 👍 [https://github.com/imClumsyPanda/langchain-ChatGLM](https://github.com/imClumsyPanda/langchain-ChatGLM)
143
+ """
144
+ ###### #####
145
+
146
+
147
+ ###### todo #####
148
+ # 1. streamlit运行方式与一般web服务器不同,使用模块是无法实现单例模式的,所以shared和local_doc_qa都需要进行全局化处理。
149
+ # 目前已经实现了local_doc_qa和shared.loaderCheckPoint的全局化。
150
+ # 2. 当前local_doc_qa是一个全局变量,一方面:任何一个session对其做出修改,都会影响所有session的对话;另一方面,如何处理所有session的请求竞争也是问题。
151
+ # 这个暂时无法避免,在配置普通的机器上暂时也无需考虑。
152
+ # 3. 目前只包含了get_answer对应的参数,以后可以添加其他参数,如temperature。
153
+ ###### #####
154
+
155
+
156
+ ###### 配置项 #####
157
+ class ST_CONFIG:
158
+ default_mode = "知识库问答"
159
+ default_kb = ""
160
+ ###### #####
161
+
162
+
163
+ class TempFile:
164
+ '''
165
+ 为保持与get_vector_store的兼容性,需要将streamlit上传文件转化为其可以接受的方式
166
+ '''
167
+
168
+ def __init__(self, path):
169
+ self.name = path
170
+
171
+
172
+ @st.cache_resource(show_spinner=False, max_entries=1)
173
+ def load_model(
174
+ llm_model: str = LLM_MODEL,
175
+ embedding_model: str = EMBEDDING_MODEL,
176
+ use_ptuning_v2: bool = USE_PTUNING_V2,
177
+ ):
178
+ '''
179
+ 对应init_model,利用streamlit cache避免模型重复加载
180
+ '''
181
+ local_doc_qa = LocalDocQA()
182
+ # 初始化消息
183
+ args = parser.parse_args()
184
+ args_dict = vars(args)
185
+ args_dict.update(model=llm_model)
186
+ if shared.loaderCheckPoint is None: # avoid checkpoint reloading when reinit model
187
+ shared.loaderCheckPoint = LoaderCheckPoint(args_dict)
188
+ # shared.loaderCheckPoint.model_name is different by no_remote_model.
189
+ # if it is not set properly error occurs when reinit llm model(issue#473).
190
+ # as no_remote_model is removed from model_config, need workaround to set it automaticlly.
191
+ local_model_path = llm_model_dict.get(llm_model, {}).get('local_model_path') or ''
192
+ no_remote_model = os.path.isdir(local_model_path)
193
+ llm_model_ins = shared.loaderLLM(llm_model, no_remote_model, use_ptuning_v2)
194
+ llm_model_ins.history_len = LLM_HISTORY_LEN
195
+
196
+ try:
197
+ local_doc_qa.init_cfg(llm_model=llm_model_ins,
198
+ embedding_model=embedding_model)
199
+ answer_result_stream_result = local_doc_qa.llm_model_chain(
200
+ {"prompt": "你好", "history": [], "streaming": False})
201
+
202
+ for answer_result in answer_result_stream_result['answer_result_stream']:
203
+ print(answer_result.llm_output)
204
+ reply = """模型已成功加载,可以开始对话,或从右侧选择模式后开始对话"""
205
+ logger.info(reply)
206
+ except Exception as e:
207
+ logger.error(e)
208
+ reply = """模型未成功加载,请到页面左上角"模型配置"选项卡中重新选择后点击"加载模型"按钮"""
209
+ if str(e) == "Unknown platform: darwin":
210
+ logger.info("该报错可能因为您使用的是 macOS 操作系统,需先下载模型至本地后执行 Web UI,具体方法请参考项目 README 中本地部署方法及常见问题:"
211
+ " https://github.com/imClumsyPanda/langchain-ChatGLM")
212
+ else:
213
+ logger.info(reply)
214
+ return local_doc_qa
215
+
216
+
217
+ # @st.cache_data
218
+ def answer(query, vs_path='', history=[], mode='', score_threshold=0,
219
+ vector_search_top_k=5, chunk_conent=True, chunk_size=100
220
+ ):
221
+ '''
222
+ 对应get_answer,--利用streamlit cache缓存相同问题的答案--
223
+ '''
224
+ return get_answer(query, vs_path, history, mode, score_threshold,
225
+ vector_search_top_k, chunk_conent, chunk_size)
226
+
227
+
228
+ def use_kb_mode(m):
229
+ return m in ["知识库问答", "知识库测试"]
230
+
231
+
232
+ # main ui
233
+ st.set_page_config(webui_title, layout='wide')
234
+
235
+ chat_box = st_chatbox(greetings=["模型已成功加载,可以开始对话,或从左侧选择模式后开始对话。"])
236
+ # 使用 help(st_chatbox) 查看自定义参数
237
+
238
+ # sidebar
239
+ modes = ['LLM 对话', '知识库问答', 'Bing搜索问答', '知识库测试']
240
+ with st.sidebar:
241
+ def on_mode_change():
242
+ m = st.session_state.mode
243
+ chat_box.robot_say(f'已切换到"{m}"模式')
244
+ if m == '知识库测试':
245
+ chat_box.robot_say(knowledge_base_test_mode_info)
246
+
247
+ index = 0
248
+ try:
249
+ index = modes.index(ST_CONFIG.default_mode)
250
+ except:
251
+ pass
252
+ mode = st.selectbox('对话模式', modes, index,
253
+ on_change=on_mode_change, key='mode')
254
+
255
+ with st.expander('模型配置', not use_kb_mode(mode)):
256
+ with st.form('model_config'):
257
+ index = 0
258
+ try:
259
+ index = llm_model_dict_list.index(LLM_MODEL)
260
+ except:
261
+ pass
262
+ llm_model = st.selectbox('LLM模型', llm_model_dict_list, index)
263
+
264
+ use_ptuning_v2 = st.checkbox('使用p-tuning-v2微调过的模型', False)
265
+
266
+ try:
267
+ index = embedding_model_dict_list.index(EMBEDDING_MODEL)
268
+ except:
269
+ pass
270
+ embedding_model = st.selectbox(
271
+ 'Embedding模型', embedding_model_dict_list, index)
272
+
273
+ btn_load_model = st.form_submit_button('重新加载模型')
274
+ if btn_load_model:
275
+ local_doc_qa = load_model(llm_model, embedding_model, use_ptuning_v2)
276
+
277
+ history_len = st.slider(
278
+ "LLM对话轮数", 1, 50, LLM_HISTORY_LEN)
279
+
280
+ if use_kb_mode(mode):
281
+ vs_list = get_vs_list()
282
+ vs_list.remove('新建知识库')
283
+
284
+ def on_new_kb():
285
+ name = st.session_state.kb_name
286
+ if name in vs_list:
287
+ st.error(f'名为“{name}”的知识库已存在。')
288
+ else:
289
+ vs_list.append(name)
290
+ st.session_state.vs_path = name
291
+
292
+ def on_vs_change():
293
+ chat_box.robot_say(f'已加载知识库: {st.session_state.vs_path}')
294
+ with st.expander('知识库配置', True):
295
+ cols = st.columns([12, 10])
296
+ kb_name = cols[0].text_input(
297
+ '新知识库名称', placeholder='新知识库名称', label_visibility='collapsed')
298
+ if 'kb_name' not in st.session_state:
299
+ st.session_state.kb_name = kb_name
300
+ cols[1].button('新建知识库', on_click=on_new_kb)
301
+ index = 0
302
+ try:
303
+ index = vs_list.index(ST_CONFIG.default_kb)
304
+ except:
305
+ pass
306
+ vs_path = st.selectbox(
307
+ '选择知识库', vs_list, index, on_change=on_vs_change, key='vs_path')
308
+
309
+ st.text('')
310
+
311
+ score_threshold = st.slider(
312
+ '知识相关度阈值', 0, 1000, VECTOR_SEARCH_SCORE_THRESHOLD)
313
+ top_k = st.slider('向量匹配数量', 1, 20, VECTOR_SEARCH_TOP_K)
314
+ chunk_conent = st.checkbox('启用上下文关联', False)
315
+ chunk_size = st.slider('上下文关联长度', 1, 1000, CHUNK_SIZE)
316
+ st.text('')
317
+ sentence_size = st.slider('文本入库分句长度限制', 1, 1000, SENTENCE_SIZE)
318
+ files = st.file_uploader('上传知识文件',
319
+ ['docx', 'txt', 'md', 'csv', 'xlsx', 'pdf'],
320
+ accept_multiple_files=True)
321
+ if st.button('添加文件到知识库'):
322
+ temp_dir = tempfile.mkdtemp()
323
+ file_list = []
324
+ for f in files:
325
+ file = os.path.join(temp_dir, f.name)
326
+ with open(file, 'wb') as fp:
327
+ fp.write(f.getvalue())
328
+ file_list.append(TempFile(file))
329
+ _, _, history = get_vector_store(
330
+ vs_path, file_list, sentence_size, [], None, None)
331
+ st.session_state.files = []
332
+
333
+
334
+ # load model after params rendered
335
+ with st.spinner(f"正在加载模型({llm_model} + {embedding_model}),请耐心等候..."):
336
+ local_doc_qa = load_model(
337
+ llm_model,
338
+ embedding_model,
339
+ use_ptuning_v2,
340
+ )
341
+ local_doc_qa.llm_model_chain.history_len = history_len
342
+ if use_kb_mode(mode):
343
+ local_doc_qa.chunk_conent = chunk_conent
344
+ local_doc_qa.chunk_size = chunk_size
345
+ # local_doc_qa.llm_model_chain.temperature = temperature # 这样设置temperature似乎不起作用
346
+ st.session_state.local_doc_qa = local_doc_qa
347
+
348
+ # input form
349
+ with st.form("my_form", clear_on_submit=True):
350
+ cols = st.columns([8, 1])
351
+ question = cols[0].text_area(
352
+ 'temp', key='input_question', label_visibility='collapsed')
353
+
354
+ if cols[1].form_submit_button("发送"):
355
+ chat_box.user_say(question)
356
+ history = []
357
+ if mode == "LLM 对话":
358
+ chat_box.robot_say("正在思考...")
359
+ chat_box.output_messages()
360
+ for history, _ in answer(question,
361
+ history=[],
362
+ mode=mode):
363
+ chat_box.update_last_box_text(history[-1][-1])
364
+ elif use_kb_mode(mode):
365
+ chat_box.robot_say(f"正在查询 [{vs_path}] ...")
366
+ chat_box.output_messages()
367
+ for history, _ in answer(question,
368
+ vs_path=os.path.join(
369
+ KB_ROOT_PATH, vs_path, 'vector_store'),
370
+ history=[],
371
+ mode=mode,
372
+ score_threshold=score_threshold,
373
+ vector_search_top_k=top_k,
374
+ chunk_conent=chunk_conent,
375
+ chunk_size=chunk_size):
376
+ chat_box.update_last_box_text(history[-1][-1])
377
+ else:
378
+ chat_box.robot_say(f"正在执行Bing搜索...")
379
+ chat_box.output_messages()
380
+ for history, _ in answer(question,
381
+ history=[],
382
+ mode=mode):
383
+ chat_box.update_last_box_text(history[-1][-1])
384
+
385
+ # st.write(chat_box.history)
386
+ chat_box.output_messages()